In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.optimize import curve_fit, OptimizeWarning
from tqdm import tqdm
import warnings
from scipy.stats import zscore
from statsmodels.tsa.stattools import acf, pacf
from scipy.optimize import minimize
from vqr import VectorQuantileRegressor
from vqr.solvers.regularized_lse import RegularizedDualVQRSolver
import statsmodels.api as sm


sns.set_theme()
sns.set_context("notebook")
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dtype_dict = {
    'FarmName_Pseudo': 'str',
    'SE_Number': 'str',
    'AnimalNumber': 'Int64',          
    'StartDate': 'str',
    'StartTime': 'str',
    'DateTime': 'str',
    'LactationNumber': 'Int64',       
    'DaysInMilk': 'Int64', 
    'YearSeason': 'str',           
    'TotalYield': 'float',
    'DateTime': 'str',
    'BreedName': 'str',
    'Age': 'Int64',
    'Mother': 'str',
    'Father': 'str',
    'CullDecisionDate': 'str',
    'Temperature': 'float',
    'RelativeHumidity': 'float',      
    'THI_adj': 'float',
    'HW': 'Int64',                    
    'cum_HW': 'Int64',                
    'Temp15Threshold': 'Int64'        
}


# Load the CSV with specified dtypes
data = pd.read_csv('../Data/MergedData/CleanedYieldData.csv', dtype=dtype_dict)

# Convert date and time columns back to datetime and time objects
data['DateTime'] = pd.to_datetime(data['DateTime'], errors='coerce')
data['StartTime'] = pd.to_datetime(data['StartTime'], format='%H:%M:%S', errors='coerce').dt.time
data['StartDate'] = pd.to_datetime(data['StartDate'], errors='coerce')
data['CullDecisionDate'] = pd.to_datetime(data['CullDecisionDate'], errors='coerce')
data['DateTime'] = pd.to_datetime(data['DateTime'], errors='coerce')
data.head()

Unnamed: 0,FarmName_Pseudo,SE_Number,AnimalNumber,StartDate,StartTime,LactationNumber,DaysInMilk,TotalYield,DateTime,YearSeason,...,Mother,Father,CullDecisionDate,Temperature,RelativeHumidity,THI_adj,HW,cum_HW,Temp15Threshold,Age
0,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,06:25:00,7,191,13.9,2022-01-01 06:25:00,2022-1,...,,,2022-12-20,-3.025,0.930917,28.012944,0,0,0,3095
1,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,16:41:00,7,191,16.87,2022-01-01 16:41:00,2022-1,...,,,2022-12-20,-3.025,0.930917,28.012944,0,0,0,3095
2,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,15:29:00,7,192,20.41,2022-01-02 15:29:00,2022-1,...,,,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096
3,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,22:44:00,7,192,11.53,2022-01-02 22:44:00,2022-1,...,,,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096
4,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,03:31:00,7,192,16.28,2022-01-02 03:31:00,2022-1,...,,,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096


In [3]:
# Define the THI threshold
THI_THRESHOLD = 61

# Calculate the daily heat load based on the THI threshold
data['HeatLoad'] = data['THI_adj'].apply(lambda x: x - THI_THRESHOLD if x > THI_THRESHOLD else -(THI_THRESHOLD - x))

# Initialize the cumulative heat load column with float type
data['CumulativeHeatLoad'] = 0.0  # Explicitly set as float

# Iterate through the data to calculate cumulative heat load correctly
for i in range(1, len(data)):
    previous_cumulative = data.at[i-1, 'CumulativeHeatLoad']
    current_heat_load = data.at[i, 'HeatLoad']
    if previous_cumulative + current_heat_load > 0:
        data.at[i, 'CumulativeHeatLoad'] = previous_cumulative + current_heat_load
    else:
        data.at[i, 'CumulativeHeatLoad'] = 0.0  # Ensure float is maintained

data.head(-5)

Unnamed: 0,FarmName_Pseudo,SE_Number,AnimalNumber,StartDate,StartTime,LactationNumber,DaysInMilk,TotalYield,DateTime,YearSeason,...,CullDecisionDate,Temperature,RelativeHumidity,THI_adj,HW,cum_HW,Temp15Threshold,Age,HeatLoad,CumulativeHeatLoad
0,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,06:25:00,7,191,13.90,2022-01-01 06:25:00,2022-1,...,2022-12-20,-3.025000,0.930917,28.012944,0,0,0,3095,-32.987056,0.000000
1,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,16:41:00,7,191,16.87,2022-01-01 16:41:00,2022-1,...,2022-12-20,-3.025000,0.930917,28.012944,0,0,0,3095,-32.987056,0.000000
2,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,15:29:00,7,192,20.41,2022-01-02 15:29:00,2022-1,...,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096,-28.101807,0.000000
3,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,22:44:00,7,192,11.53,2022-01-02 22:44:00,2022-1,...,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096,-28.101807,0.000000
4,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,03:31:00,7,192,16.28,2022-01-02 03:31:00,2022-1,...,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096,-28.101807,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1859047,f454e660,SE-fcdf259d-0044-0,1044,2023-06-07,00:51:00,10,351,5.63,2023-06-07 00:51:00,2023-3,...,2024-07-25,15.645833,0.731917,61.559237,0,0,1,4154,0.559237,0.559237
1859048,f454e660,SE-fcdf259d-0044-0,1044,2023-06-07,11:17:00,10,351,3.34,2023-06-07 11:17:00,2023-3,...,2024-07-25,15.645833,0.731917,61.559237,0,0,1,4154,0.559237,1.118475
1859049,f454e660,SE-fcdf259d-0044-0,1044,2023-06-08,17:01:00,10,352,6.96,2023-06-08 17:01:00,2023-3,...,2024-07-25,15.570833,0.601708,59.383267,0,0,1,4155,-1.616733,0.000000
1859050,f454e660,SE-fcdf259d-0044-0,1044,2023-06-08,02:23:00,10,352,8.18,2023-06-08 02:23:00,2023-3,...,2024-07-25,15.570833,0.601708,59.383267,0,0,1,4155,-1.616733,0.000000


In [4]:
# When CumulativeHeatLoad is greater than 5, it indicates that the cow is under heat stress
data['HeatStress'] = (data['CumulativeHeatLoad'] > 5).astype(int)
data.head(-5)

Unnamed: 0,FarmName_Pseudo,SE_Number,AnimalNumber,StartDate,StartTime,LactationNumber,DaysInMilk,TotalYield,DateTime,YearSeason,...,Temperature,RelativeHumidity,THI_adj,HW,cum_HW,Temp15Threshold,Age,HeatLoad,CumulativeHeatLoad,HeatStress
0,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,06:25:00,7,191,13.90,2022-01-01 06:25:00,2022-1,...,-3.025000,0.930917,28.012944,0,0,0,3095,-32.987056,0.000000,0
1,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,16:41:00,7,191,16.87,2022-01-01 16:41:00,2022-1,...,-3.025000,0.930917,28.012944,0,0,0,3095,-32.987056,0.000000,0
2,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,15:29:00,7,192,20.41,2022-01-02 15:29:00,2022-1,...,-0.279167,0.990542,32.898193,0,0,0,3096,-28.101807,0.000000,0
3,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,22:44:00,7,192,11.53,2022-01-02 22:44:00,2022-1,...,-0.279167,0.990542,32.898193,0,0,0,3096,-28.101807,0.000000,0
4,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,03:31:00,7,192,16.28,2022-01-02 03:31:00,2022-1,...,-0.279167,0.990542,32.898193,0,0,0,3096,-28.101807,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1859047,f454e660,SE-fcdf259d-0044-0,1044,2023-06-07,00:51:00,10,351,5.63,2023-06-07 00:51:00,2023-3,...,15.645833,0.731917,61.559237,0,0,1,4154,0.559237,0.559237,0
1859048,f454e660,SE-fcdf259d-0044-0,1044,2023-06-07,11:17:00,10,351,3.34,2023-06-07 11:17:00,2023-3,...,15.645833,0.731917,61.559237,0,0,1,4154,0.559237,1.118475,0
1859049,f454e660,SE-fcdf259d-0044-0,1044,2023-06-08,17:01:00,10,352,6.96,2023-06-08 17:01:00,2023-3,...,15.570833,0.601708,59.383267,0,0,1,4155,-1.616733,0.000000,0
1859050,f454e660,SE-fcdf259d-0044-0,1044,2023-06-08,02:23:00,10,352,8.18,2023-06-08 02:23:00,2023-3,...,15.570833,0.601708,59.383267,0,0,1,4155,-1.616733,0.000000,0


In [5]:
# Calculate the DailyYield for each cow each day
data['DailyYield'] = data.groupby(['SE_Number', 'StartDate'])['TotalYield'].transform('sum')

# Sort the data by AnimalNumber and StartDate
data.sort_values(['AnimalNumber', 'StartDate'], inplace=True)

# Calculate the previous day's total yield for each cow
data['PreviousDailyYield'] = data.groupby('AnimalNumber')['DailyYield'].shift(1)

# Calculate the daily yield change for each cow
data['DailyYieldChange'] = data['DailyYield'] - data['PreviousDailyYield']

# Group and aggregate data
data = data.groupby(['SE_Number', 'FarmName_Pseudo', 'StartDate']).agg({
    'DailyYield': 'first',
    'PreviousDailyYield': 'first',
    'DailyYieldChange': 'first',
    'HW': 'max',
    'Temperature': 'mean',
    'THI_adj': 'mean',
    'DaysInMilk': 'first',
    'YearSeason': 'first',
    'cum_HW': 'max',
    'Temp15Threshold': 'max',
    'Age': 'first',
    'BreedName': 'first',
    'LactationNumber': 'first',
    'HeatLoad': 'mean',
    'CumulativeHeatLoad': 'mean',
    'HeatStress': 'max'
}).reset_index()

# Renaming and formatting
data.rename(columns={
    'Temperature': 'MeanTemperature',
    'THI_adj': 'MeanTHI_adj',
    'StartDate': 'Date'
}, inplace=True)
data['Date'] = pd.to_datetime(data['Date'])

# Display the first few rows of the transformed data
data.head()

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,YearSeason,cum_HW,Temp15Threshold,Age,BreedName,LactationNumber,HeatLoad,CumulativeHeatLoad,HeatStress
0,SE-064c0cec-1189,a624fb9a,2022-01-01,30.77,30.77,0.0,0,-3.025,28.012944,191,2022-1,0,0,3095,02 SLB,7,-32.987056,0.0,0
1,SE-064c0cec-1189,a624fb9a,2022-01-02,48.22,30.77,17.45,0,-0.279167,32.898193,192,2022-1,0,0,3096,02 SLB,7,-28.101807,0.0,0
2,SE-064c0cec-1189,a624fb9a,2022-01-03,30.53,48.22,-17.69,0,2.033333,36.760487,193,2022-1,0,0,3097,02 SLB,7,-24.239513,0.0,0
3,SE-064c0cec-1189,a624fb9a,2022-01-04,42.26,30.53,11.73,0,0.066667,31.939524,194,2022-1,0,0,3098,02 SLB,7,-29.060476,0.0,0
4,SE-064c0cec-1189,a624fb9a,2022-01-05,38.49,42.26,-3.77,0,-3.7,26.498206,195,2022-1,0,0,3099,02 SLB,7,-34.501794,0.0,0


In [6]:
# Check if DailyYield is centered around approx the same for each farm
print("Mean of DailyYield:", data.groupby('FarmName_Pseudo')['DailyYield'].mean())
print("Standard Deviation of DailyYield:", data.groupby('FarmName_Pseudo')['DailyYield'].std())

Mean of DailyYield: FarmName_Pseudo
5c06d92d    37.322718
752efd72    31.412607
a624fb9a    34.164215
f454e660    30.811276
Name: DailyYield, dtype: float64
Standard Deviation of DailyYield: FarmName_Pseudo
5c06d92d     9.854998
752efd72     7.760655
a624fb9a    11.417583
f454e660    11.923900
Name: DailyYield, dtype: float64


In [7]:
# Define the Wilmink Lactation Curve function
def wilmink_lactation_curve(dim, a, b, c, d):
    return a + b * dim + c * np.exp(-d * dim)

# Function to remove outliers
def remove_outliers(group, threshold=3.5):
    mean = np.mean(group['DailyYield'])
    std_dev = np.std(group['DailyYield'])
    return group[(group['DailyYield'] > mean - threshold * std_dev) & (group['DailyYield'] < mean + threshold * std_dev)]

# Function to smooth the data using .loc to avoid SettingWithCopyWarning
def smooth_data(group, window=5):
    group.loc[:, 'DailyYield'] = group['DailyYield'].rolling(window, min_periods=1).mean()
    return group

# Function to fit curve_fit before applying Quantile Regression
def fit_with_curve_fit_before_quantreg(dataset, quantile=0.7, max_iter=100000):
    params_dict = {}
    valid_indices = []

    for (animal_number, lactation_number), group in tqdm(dataset.groupby(['SE_Number', 'LactationNumber']), unit=" Segments"):
        try:
            group = remove_outliers(group)
            group = smooth_data(group)
            x_data = group['DaysInMilk'].values.astype(float)
            y_data = group['DailyYield'].values.astype(float)

            # Ensure there are enough data points to fit the curve
            if (len(x_data) < 150) or (len(y_data) < 150):
                print(f"Insufficient data points for cow {animal_number}, lactation {lactation_number}, skipping.")
                continue

            valid_indices.extend(group.index)

            # Fit the model using curve_fit
            try:
                # Initial parameter guesses
                initial_guesses = [np.mean(y_data), 0, np.mean(y_data) / 2, 0.1]
                # Bounds on the parameters to prevent overflow
                bounds = ([-np.inf, -np.inf, -np.inf, 0], [np.inf, np.inf, np.inf, np.inf])

                with warnings.catch_warnings():
                    warnings.filterwarnings('error', category=OptimizeWarning)
                    popt, _ = curve_fit(
                        wilmink_lactation_curve, x_data, y_data,
                        p0=initial_guesses, bounds=bounds, maxfev=30000
                    )

                # Store the parameters in the dictionary
                params_dict[(animal_number, lactation_number)] = {'a': popt[0], 'b': popt[1], 'c': popt[2], 'd': popt[3]}

            except Exception as e:
                print(f"Curve fitting failed for cow {animal_number}, lactation {lactation_number}: {e}")
                continue

            # Now use the parameters from curve_fit for quantile regression
            X = np.column_stack([np.ones_like(x_data), x_data, np.exp(-x_data), -x_data * np.exp(-x_data)])
            quantreg_model = sm.QuantReg(y_data, X)
            quantreg_fit = quantreg_model.fit(q=quantile, max_iter=max_iter, start_params=popt)

            # Update parameters after quantile regression
            a, b, c, d = quantreg_fit.params
            dataset.loc[group.index, 'ExpectedYield'] = wilmink_lactation_curve(group['DaysInMilk'], a, b, c, d)
            params_dict[(animal_number, lactation_number)] = {'a': a, 'b': b, 'c': c, 'd': d}

        except Exception as e:
            print(f"Error processing cow {animal_number}, lactation {lactation_number}: {e}")

    return dataset, params_dict

# Apply the curve fitting before quantile regression
data, params_dict = fit_with_curve_fit_before_quantreg(data, quantile=0.7, max_iter=100000)

# Remove rows where ExpectedYield is NaN
data = data.dropna(subset=['ExpectedYield'])

# Calculate NormalizedDailyYield, PreviousDailyYield, DailyYieldChange, and NormalizedDailyYieldChange
data.loc[:, 'NormalizedDailyYield'] = data['DailyYield'] / data['ExpectedYield']
data.loc[:, 'PreviousDailyYield'] = data.groupby('SE_Number')['DailyYield'].shift(1)
data.loc[:, 'DailyYieldChange'] = data['DailyYield'] - data['PreviousDailyYield']
data.loc[:, 'NormalizedDailyYieldChange'] = data['DailyYieldChange'] / data['ExpectedYield']
data

  4%|▍         | 108/2746 [00:12<02:37, 16.76 Segments/s]

Insufficient data points for cow SE-5c06d92d-2621, lactation 3, skipping.


  4%|▍         | 122/2746 [00:14<04:16, 10.23 Segments/s]

Insufficient data points for cow SE-5c06d92d-2639, lactation 3, skipping.


  8%|▊         | 212/2746 [00:21<04:08, 10.21 Segments/s]

Insufficient data points for cow SE-5c06d92d-2776, lactation 5, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 10%|▉         | 267/2746 [00:25<02:19, 17.79 Segments/s]

Insufficient data points for cow SE-5c06d92d-2815, lactation 2, skipping.


 10%|▉         | 271/2746 [00:26<03:00, 13.72 Segments/s]

Insufficient data points for cow SE-5c06d92d-2824, lactation 3, skipping.


 11%|█         | 307/2746 [00:28<02:15, 17.97 Segments/s]

Insufficient data points for cow SE-5c06d92d-2845, lactation 2, skipping.


 12%|█▏        | 328/2746 [00:29<01:49, 22.13 Segments/s]

Insufficient data points for cow SE-5c06d92d-2870, lactation 2, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 14%|█▍        | 382/2746 [00:33<01:35, 24.68 Segments/s]

Insufficient data points for cow SE-5c06d92d-2911, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2914, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2919, lactation 2, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 19%|█▊        | 511/2746 [00:52<03:53,  9.56 Segments/s]

Insufficient data points for cow SE-5c06d92d-3045, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3047, lactation 1, skipping.


 19%|█▉        | 528/2746 [00:53<02:08, 17.30 Segments/s]

Insufficient data points for cow SE-5c06d92d-3048, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-3049, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3063, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3063, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3065, lactation 1, skipping.


 20%|█▉        | 543/2746 [00:53<01:53, 19.37 Segments/s]

Insufficient data points for cow SE-5c06d92d-3068, lactation 1, skipping.


 23%|██▎       | 622/2746 [00:58<01:44, 20.40 Segments/s]

Insufficient data points for cow SE-5c06d92d-3116, lactation 3, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 32%|███▏      | 877/2746 [01:09<00:28, 65.82 Segments/s]

Insufficient data points for cow SE-5c06d92d-3273, lactation 3, skipping.


 33%|███▎      | 902/2746 [01:10<00:32, 56.19 Segments/s]

Insufficient data points for cow SE-5c06d92d-3288, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3310, lactation 3, skipping.


 34%|███▍      | 937/2746 [01:14<02:15, 13.31 Segments/s]

Insufficient data points for cow SE-5c06d92d-3327, lactation 3, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
  result = getattr(ufunc, method)(*inputs2, **kwargs)
 42%|████▏     | 1152/2746 [01:26<01:51, 14.29 Segments/s]

Insufficient data points for cow SE-5c06d92d-3655, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3662, lactation 1, skipping.


 44%|████▍     | 1208/2746 [01:29<01:08, 22.31 Segments/s]

Insufficient data points for cow SE-752efd72-0051, lactation 3, skipping.


 46%|████▋     | 1275/2746 [01:34<01:03, 23.28 Segments/s]

Insufficient data points for cow SE-752efd72-0117, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0129, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0136, lactation 2, skipping.


 48%|████▊     | 1312/2746 [01:34<00:30, 46.76 Segments/s]

Insufficient data points for cow SE-752efd72-0143, lactation 2, skipping.


 48%|████▊     | 1328/2746 [01:35<00:57, 24.56 Segments/s]

Insufficient data points for cow SE-752efd72-0166, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 51%|█████     | 1389/2746 [01:45<01:44, 12.97 Segments/s]

Insufficient data points for cow SE-752efd72-0196, lactation 5, skipping.


 53%|█████▎    | 1463/2746 [01:48<00:29, 42.92 Segments/s]

Insufficient data points for cow SE-752efd72-0232, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0234, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0239, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0243, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 58%|█████▊    | 1584/2746 [01:58<00:55, 21.10 Segments/s]

Insufficient data points for cow SE-752efd72-0289, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0298, lactation 1, skipping.


 58%|█████▊    | 1595/2746 [01:58<00:40, 28.77 Segments/s]

Insufficient data points for cow SE-752efd72-0312, lactation 4, skipping.


 59%|█████▉    | 1617/2746 [01:59<00:42, 26.53 Segments/s]

Insufficient data points for cow SE-752efd72-0317, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0322, lactation 4, skipping.


 60%|██████    | 1653/2746 [02:00<00:19, 57.39 Segments/s]

Insufficient data points for cow SE-752efd72-0329, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 63%|██████▎   | 1735/2746 [02:01<00:11, 88.17 Segments/s]

Insufficient data points for cow SE-752efd72-0369, lactation 1, skipping.


 65%|██████▌   | 1786/2746 [02:01<00:13, 69.06 Segments/s]

Insufficient data points for cow SE-752efd72-0394, lactation 3, skipping.


 66%|██████▌   | 1800/2746 [02:02<00:11, 81.46 Segments/s]

Insufficient data points for cow SE-752efd72-0409, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0411, lactation 3, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 69%|██████▉   | 1899/2746 [02:06<00:28, 30.22 Segments/s]

Insufficient data points for cow SE-752efd72-0468, lactation 2, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 71%|███████▏  | 1959/2746 [02:09<00:32, 24.20 Segments/s]

Insufficient data points for cow SE-752efd72-0502, lactation 2, skipping.


 72%|███████▏  | 1989/2746 [02:09<00:18, 41.74 Segments/s]

Insufficient data points for cow SE-752efd72-0521, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0526, lactation 2, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
  result = getattr(ufunc, method)(*inputs2, **kwargs)
 75%|███████▍  | 2048/2746 [02:11<00:15, 45.24 Segments/s]

Insufficient data points for cow SE-752efd72-0590, lactation 1, skipping.


 75%|███████▍  | 2056/2746 [02:12<00:21, 31.80 Segments/s]

Insufficient data points for cow SE-752efd72-0612, lactation 1, skipping.


 75%|███████▌  | 2064/2746 [02:12<00:24, 27.72 Segments/s]

Insufficient data points for cow SE-752efd72-0621, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0622, lactation 1, skipping.


 76%|███████▌  | 2093/2746 [02:15<01:03, 10.34 Segments/s]

Insufficient data points for cow SE-752efd72-2751, lactation 5, skipping.


 78%|███████▊  | 2130/2746 [02:20<00:53, 11.47 Segments/s]

Insufficient data points for cow SE-752efd72-2797, lactation 3, skipping.
Insufficient data points for cow SE-7fd04cd3-679, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1162, lactation 7, skipping.


 78%|███████▊  | 2143/2746 [02:21<00:39, 15.11 Segments/s]

Insufficient data points for cow SE-a624fb9a-1200, lactation 4, skipping.


 79%|███████▉  | 2176/2746 [02:25<00:54, 10.41 Segments/s]

Insufficient data points for cow SE-a624fb9a-1251, lactation 3, skipping.


 80%|████████  | 2198/2746 [02:26<00:29, 18.75 Segments/s]

Insufficient data points for cow SE-a624fb9a-1267, lactation 3, skipping.


 81%|████████▏ | 2233/2746 [02:29<00:31, 16.23 Segments/s]

Insufficient data points for cow SE-a624fb9a-1312, lactation 2, skipping.


 82%|████████▏ | 2238/2746 [02:30<00:43, 11.64 Segments/s]

Insufficient data points for cow SE-a624fb9a-1330, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1333, lactation 1, skipping.


 84%|████████▎ | 2295/2746 [02:32<00:13, 33.58 Segments/s]

Insufficient data points for cow SE-a624fb9a-1373, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1374, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
  result = getattr(ufunc, method)(*inputs2, **kwargs)
 89%|████████▉ | 2441/2746 [02:43<00:04, 72.26 Segments/s]

Insufficient data points for cow SE-f454e660-0448, lactation 5, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 95%|█████████▍| 2602/2746 [02:59<00:06, 21.24 Segments/s]

Insufficient data points for cow SE-f454e660-509, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-510, lactation 2, skipping.


 96%|█████████▌| 2634/2746 [03:02<00:08, 13.65 Segments/s]

Insufficient data points for cow SE-f454e660-551, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-559, lactation 1, skipping.


 96%|█████████▌| 2639/2746 [03:03<00:08, 12.80 Segments/s]

Insufficient data points for cow SE-f454e660-567, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
  result = getattr(ufunc, method)(*inputs2, **kwargs)
 97%|█████████▋| 2663/2746 [03:04<00:05, 16.00 Segments/s]

Insufficient data points for cow SE-f454e660-585, lactation 1, skipping.


 99%|█████████▉| 2717/2746 [03:08<00:01, 20.17 Segments/s]

Insufficient data points for cow SE-f454e660-729, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
100%|██████████| 2746/2746 [03:14<00:00, 14.11 Segments/s]


Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,Temp15Threshold,Age,BreedName,LactationNumber,HeatLoad,CumulativeHeatLoad,HeatStress,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange
0,SE-064c0cec-1189,a624fb9a,2022-01-01,30.77,,,0,-3.025000,28.012944,191,...,0,3095,02 SLB,7,-32.987056,0.000000,0,36.70163,0.838382,
1,SE-064c0cec-1189,a624fb9a,2022-01-02,48.22,30.77,17.45,0,-0.279167,32.898193,192,...,0,3096,02 SLB,7,-28.101807,0.000000,0,36.59126,1.317801,0.47689
2,SE-064c0cec-1189,a624fb9a,2022-01-03,30.53,48.22,-17.69,0,2.033333,36.760487,193,...,0,3097,02 SLB,7,-24.239513,0.000000,0,36.48089,0.836877,-0.484911
3,SE-064c0cec-1189,a624fb9a,2022-01-04,42.26,30.53,11.73,0,0.066667,31.939524,194,...,0,3098,02 SLB,7,-29.060476,0.000000,0,36.370519,1.16193,0.322514
4,SE-064c0cec-1189,a624fb9a,2022-01-05,38.49,42.26,-3.77,0,-3.700000,26.498206,195,...,0,3099,02 SLB,7,-34.501794,0.000000,0,36.260149,1.061496,-0.103971
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659907,SE-fcdf259d-0044-0,f454e660,2023-06-08,15.14,8.97,6.17,0,15.570833,59.383267,352,...,1,4155,41 Fjällko,10,-1.616733,0.000000,0,14.629583,1.034889,0.421748
659908,SE-fcdf259d-0044-0,f454e660,2023-06-09,7.47,15.14,-7.67,0,13.254167,54.534255,353,...,1,4156,41 Fjällko,10,-6.465745,0.000000,0,14.54211,0.513681,-0.527434
659909,SE-fcdf259d-0044-0,f454e660,2023-06-10,14.73,7.47,7.26,0,13.258333,54.082367,354,...,1,4157,41 Fjällko,10,-6.917633,0.000000,0,14.454637,1.01905,0.502261
659910,SE-fcdf259d-0044-0,f454e660,2023-06-12,12.27,14.73,-2.46,0,15.820833,62.015093,356,...,1,4159,41 Fjällko,10,1.015093,1.015093,0,14.279692,0.859262,-0.172273


In [8]:
# Check if NormalizedDailyYield is centered around 1 for each unique farm
print("Mean of NormalizedDailyYield:", data.groupby('FarmName_Pseudo')['NormalizedDailyYield'].mean())
print("Standard Deviation of NormalizedDailyYield:", data.groupby('FarmName_Pseudo')['NormalizedDailyYield'].std())

Mean of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.946694
752efd72    0.950787
a624fb9a    0.910134
f454e660    0.912572
Name: NormalizedDailyYield, dtype: Float64
Standard Deviation of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.137908
752efd72     0.12354
a624fb9a    0.213946
f454e660    0.246999
Name: NormalizedDailyYield, dtype: Float64
