In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.optimize import curve_fit, OptimizeWarning
from tqdm import tqdm
import warnings
from scipy.stats import zscore
from statsmodels.tsa.stattools import acf, pacf
from scipy.optimize import minimize
from vqr import VectorQuantileRegressor
from vqr.solvers.regularized_lse import RegularizedDualVQRSolver
import statsmodels.api as sm


sns.set_theme()
sns.set_context("notebook")
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dtype_dict = {
    'FarmName_Pseudo': 'str',
    'SE_Number': 'str',
    'AnimalNumber': 'Int64',          
    'StartDate': 'str',
    'StartTime': 'str',
    'DateTime': 'str',
    'LactationNumber': 'Int64',       
    'DaysInMilk': 'Int64', 
    'YearSeason': 'str',           
    'TotalYield': 'float',
    'DateTime': 'str',
    'BreedName': 'str',
    'Age': 'Int64',
    'Mother': 'str',
    'Father': 'str',
    'CullDecisionDate': 'str',
    'Temperature': 'float',
    'RelativeHumidity': 'float',      
    'THI_adj': 'float',
    'HW': 'Int64',                    
    'cum_HW': 'Int64',                
    'Temp15Threshold': 'Int64'        
}


# Load the CSV with specified dtypes
data = pd.read_csv('../Data/MergedData/CleanedYieldData.csv', dtype=dtype_dict)

# Convert date and time columns back to datetime and time objects
data['DateTime'] = pd.to_datetime(data['DateTime'], errors='coerce')
data['StartTime'] = pd.to_datetime(data['StartTime'], format='%H:%M:%S', errors='coerce').dt.time
data['StartDate'] = pd.to_datetime(data['StartDate'], errors='coerce')
data['CullDecisionDate'] = pd.to_datetime(data['CullDecisionDate'], errors='coerce')
data['DateTime'] = pd.to_datetime(data['DateTime'], errors='coerce')
data.head()

Unnamed: 0,FarmName_Pseudo,SE_Number,AnimalNumber,StartDate,StartTime,LactationNumber,DaysInMilk,TotalYield,DateTime,YearSeason,...,Mother,Father,CullDecisionDate,Temperature,RelativeHumidity,THI_adj,HW,cum_HW,Temp15Threshold,Age
0,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,06:25:00,7,191,13.9,2022-01-01 06:25:00,2022-1,...,,,2022-12-20,-3.025,0.930917,28.012944,0,0,0,3095
1,a624fb9a,SE-064c0cec-1189,5189,2022-01-01,16:41:00,7,191,16.87,2022-01-01 16:41:00,2022-1,...,,,2022-12-20,-3.025,0.930917,28.012944,0,0,0,3095
2,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,15:29:00,7,192,20.41,2022-01-02 15:29:00,2022-1,...,,,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096
3,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,22:44:00,7,192,11.53,2022-01-02 22:44:00,2022-1,...,,,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096
4,a624fb9a,SE-064c0cec-1189,5189,2022-01-02,03:31:00,7,192,16.28,2022-01-02 03:31:00,2022-1,...,,,2022-12-20,-0.279167,0.990542,32.898193,0,0,0,3096


In [3]:
# Calculate the DailyYield for each cow each day
data['DailyYield'] = data.groupby(['SE_Number', 'StartDate'])['TotalYield'].transform('sum')

# Sort the data by AnimalNumber and StartDate
data.sort_values(['AnimalNumber', 'StartDate'], inplace=True)

# Calculate the previous day's total yield for each cow
data['PreviousDailyYield'] = data.groupby('AnimalNumber')['DailyYield'].shift(1)

# Calculate the daily yield change for each cow
data['DailyYieldChange'] = data['DailyYield'] - data['PreviousDailyYield']

# Group and aggregate data
data = data.groupby(['SE_Number', 'FarmName_Pseudo', 'StartDate']).agg({
    'DailyYield': 'first',
    'PreviousDailyYield': 'first',
    'DailyYieldChange': 'first',
    'HW': 'max',
    'Temperature': 'mean',
    'THI_adj': 'mean',
    'DaysInMilk': 'first',
    'YearSeason': 'first',
    'cum_HW': 'max',
    'Temp15Threshold': 'max',
    'Age': 'first',
    'BreedName': 'first',
    'LactationNumber': 'first'
}).reset_index()

# Renaming and formatting
data.rename(columns={
    'Temperature': 'MeanTemperature',
    'THI_adj': 'MeanTHI_adj',
    'StartDate': 'Date'
}, inplace=True)
data['Date'] = pd.to_datetime(data['Date'])

# Display the first few rows of the transformed data
data.head()

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,YearSeason,cum_HW,Temp15Threshold,Age,BreedName,LactationNumber
0,SE-064c0cec-1189,a624fb9a,2022-01-01,30.77,30.77,0.0,0,-3.025,28.012944,191,2022-1,0,0,3095,02 SLB,7
1,SE-064c0cec-1189,a624fb9a,2022-01-02,48.22,30.77,17.45,0,-0.279167,32.898193,192,2022-1,0,0,3096,02 SLB,7
2,SE-064c0cec-1189,a624fb9a,2022-01-03,30.53,48.22,-17.69,0,2.033333,36.760487,193,2022-1,0,0,3097,02 SLB,7
3,SE-064c0cec-1189,a624fb9a,2022-01-04,42.26,30.53,11.73,0,0.066667,31.939524,194,2022-1,0,0,3098,02 SLB,7
4,SE-064c0cec-1189,a624fb9a,2022-01-05,38.49,42.26,-3.77,0,-3.7,26.498206,195,2022-1,0,0,3099,02 SLB,7


In [4]:
# Check if DailyYield is centered around approx the same for each farm
print("Mean of DailyYield:", data.groupby('FarmName_Pseudo')['DailyYield'].mean())
print("Standard Deviation of DailyYield:", data.groupby('FarmName_Pseudo')['DailyYield'].std())

Mean of DailyYield: FarmName_Pseudo
5c06d92d    37.322718
752efd72    31.412607
a624fb9a    34.164215
f454e660    30.811276
Name: DailyYield, dtype: float64
Standard Deviation of DailyYield: FarmName_Pseudo
5c06d92d     9.854998
752efd72     7.760655
a624fb9a    11.417583
f454e660    11.923900
Name: DailyYield, dtype: float64


In [5]:
# Define the Wilmink Lactation Curve function
def wilmink_lactation_curve(dim, a, b, c, d):
    return a + b * dim + c * np.exp(-d * dim)

# Function to remove outliers
def remove_outliers(group, threshold=3.5):
    mean = np.mean(group['DailyYield'])
    std_dev = np.std(group['DailyYield'])
    return group[(group['DailyYield'] > mean - threshold * std_dev) & (group['DailyYield'] < mean + threshold * std_dev)]

# Function to smooth the data using .loc to avoid SettingWithCopyWarning
def smooth_data(group, window=5):
    group.loc[:, 'DailyYield'] = group['DailyYield'].rolling(window, min_periods=1).mean()
    return group

# Function to fit curve_fit before applying Quantile Regression
def fit_with_curve_fit_before_quantreg(dataset, quantile=0.7, max_iter=100000):
    params_dict = {}
    valid_indices = []

    for (animal_number, lactation_number), group in tqdm(dataset.groupby(['SE_Number', 'LactationNumber']), unit=" Segments"):
        try:
            group = remove_outliers(group)
            group = smooth_data(group)
            x_data = group['DaysInMilk'].values.astype(float)
            y_data = group['DailyYield'].values.astype(float)

            # Ensure there are enough data points to fit the curve
            if (len(x_data) < 150) or (len(y_data) < 150):
                print(f"Insufficient data points for cow {animal_number}, lactation {lactation_number}, skipping.")
                continue

            valid_indices.extend(group.index)

            # Fit the model using curve_fit
            try:
                # Initial parameter guesses
                initial_guesses = [np.mean(y_data), 0, np.mean(y_data) / 2, 0.1]
                # Bounds on the parameters to prevent overflow
                bounds = ([-np.inf, -np.inf, -np.inf, 0], [np.inf, np.inf, np.inf, np.inf])

                with warnings.catch_warnings():
                    warnings.filterwarnings('error', category=OptimizeWarning)
                    popt, _ = curve_fit(
                        wilmink_lactation_curve, x_data, y_data,
                        p0=initial_guesses, bounds=bounds, maxfev=30000
                    )

                # Store the parameters in the dictionary
                params_dict[(animal_number, lactation_number)] = {'a': popt[0], 'b': popt[1], 'c': popt[2], 'd': popt[3]}

            except Exception as e:
                print(f"Curve fitting failed for cow {animal_number}, lactation {lactation_number}: {e}")
                continue

            # Now use the parameters from curve_fit for quantile regression
            X = np.column_stack([np.ones_like(x_data), x_data, np.exp(-x_data), -x_data * np.exp(-x_data)])
            quantreg_model = sm.QuantReg(y_data, X)
            quantreg_fit = quantreg_model.fit(q=quantile, max_iter=max_iter, start_params=popt)

            # Update parameters after quantile regression
            a, b, c, d = quantreg_fit.params
            dataset.loc[group.index, 'ExpectedYield'] = wilmink_lactation_curve(group['DaysInMilk'], a, b, c, d)
            params_dict[(animal_number, lactation_number)] = {'a': a, 'b': b, 'c': c, 'd': d}

        except Exception as e:
            print(f"Error processing cow {animal_number}, lactation {lactation_number}: {e}")

    return dataset, params_dict

# Apply the curve fitting before quantile regression
data, params_dict = fit_with_curve_fit_before_quantreg(data, quantile=0.7, max_iter=100000)

# Remove rows where ExpectedYield is NaN
data = data.dropna(subset=['ExpectedYield'])

# Calculate NormalizedDailyYield, PreviousDailyYield, DailyYieldChange, and NormalizedDailyYieldChange
data.loc[:, 'NormalizedDailyYield'] = data['DailyYield'] / data['ExpectedYield']
data.loc[:, 'PreviousDailyYield'] = data.groupby('SE_Number')['DailyYield'].shift(1)
data.loc[:, 'DailyYieldChange'] = data['DailyYield'] - data['PreviousDailyYield']
data.loc[:, 'NormalizedDailyYieldChange'] = data['DailyYieldChange'] / data['ExpectedYield']
data

  0%|          | 2/2746 [00:00<02:27, 18.59 Segments/s]

Insufficient data points for cow SE-064c0cec-1189, lactation 7, skipping.
Insufficient data points for cow SE-30dc5787-1389, lactation 5, skipping.


  0%|          | 10/2746 [00:00<04:07, 11.07 Segments/s]

Insufficient data points for cow SE-5c06d92d-2055, lactation 8, skipping.


  1%|          | 33/2746 [00:01<01:30, 29.98 Segments/s]

Insufficient data points for cow SE-5c06d92d-2268, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2268, lactation 8, skipping.
Insufficient data points for cow SE-5c06d92d-2325, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2325, lactation 7, skipping.
Insufficient data points for cow SE-5c06d92d-2327, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2328, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2384, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2405, lactation 5, skipping.


  2%|▏         | 62/2746 [00:02<01:08, 39.04 Segments/s]

Insufficient data points for cow SE-5c06d92d-2408, lactation 7, skipping.
Insufficient data points for cow SE-5c06d92d-2410, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2457, lactation 7, skipping.
Insufficient data points for cow SE-5c06d92d-2461, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2470, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2470, lactation 7, skipping.
Insufficient data points for cow SE-5c06d92d-2478, lactation 7, skipping.
Insufficient data points for cow SE-5c06d92d-2483, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2502, lactation 4, skipping.


  3%|▎         | 73/2746 [00:06<06:00,  7.42 Segments/s]

Insufficient data points for cow SE-5c06d92d-2514, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2514, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2515, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2529, lactation 4, skipping.


  3%|▎         | 81/2746 [00:07<05:31,  8.03 Segments/s]

Insufficient data points for cow SE-5c06d92d-2545, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2550, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2564, lactation 4, skipping.


  4%|▎         | 97/2746 [00:08<03:41, 11.97 Segments/s]

Insufficient data points for cow SE-5c06d92d-2572, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2577, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2582, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2601, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2606, lactation 4, skipping.


  4%|▍         | 108/2746 [00:08<03:01, 14.53 Segments/s]

Insufficient data points for cow SE-5c06d92d-2621, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2628, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2628, lactation 5, skipping.


  4%|▍         | 115/2746 [00:09<02:57, 14.84 Segments/s]

Insufficient data points for cow SE-5c06d92d-2631, lactation 5, skipping.


  5%|▍         | 136/2746 [00:10<02:20, 18.58 Segments/s]

Insufficient data points for cow SE-5c06d92d-2639, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2643, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2647, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2649, lactation 3, skipping.


  5%|▌         | 143/2746 [00:10<02:24, 18.07 Segments/s]

Insufficient data points for cow SE-5c06d92d-2662, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2669, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2673, lactation 3, skipping.


  6%|▌         | 156/2746 [00:12<04:41,  9.21 Segments/s]

Insufficient data points for cow SE-5c06d92d-2697, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2713, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2724, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2731, lactation 3, skipping.


  6%|▌         | 171/2746 [00:16<07:27,  5.76 Segments/s]

Insufficient data points for cow SE-5c06d92d-2741, lactation 3, skipping.


  7%|▋         | 187/2746 [00:17<05:11,  8.20 Segments/s]

Insufficient data points for cow SE-5c06d92d-2744, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2744, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2745, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2747, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2748, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2754, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2757, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2757, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2762, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2762, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2764, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2768, lactation 3, skipping.


  8%|▊         | 218/2746 [00:19<03:01, 13.90 Segments/s]

Insufficient data points for cow SE-5c06d92d-2776, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2778, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2780, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2780, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2782, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2782, lactation 5, skipping.


  9%|▉         | 250/2746 [00:21<03:00, 13.80 Segments/s]

Insufficient data points for cow SE-5c06d92d-2804, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2808, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2808, lactation 6, skipping.
Insufficient data points for cow SE-5c06d92d-2815, lactation 2, skipping.


 10%|▉         | 268/2746 [00:23<03:57, 10.45 Segments/s]

Insufficient data points for cow SE-5c06d92d-2821, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2824, lactation 3, skipping.


 10%|█         | 275/2746 [00:24<03:47, 10.87 Segments/s]

Insufficient data points for cow SE-5c06d92d-2830, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2831, lactation 2, skipping.


 11%|█         | 301/2746 [00:25<02:44, 14.86 Segments/s]

Insufficient data points for cow SE-5c06d92d-2837, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2837, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2840, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2845, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2845, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2855, lactation 2, skipping.


 12%|█▏        | 332/2746 [00:26<01:28, 27.31 Segments/s]

Insufficient data points for cow SE-5c06d92d-2859, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2859, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2864, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2866, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2870, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2870, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2877, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2885, lactation 3, skipping.


 13%|█▎        | 353/2746 [00:30<03:24, 11.69 Segments/s]

Insufficient data points for cow SE-5c06d92d-2895, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2895, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2896, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2896, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2897, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2904, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2904, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2911, lactation 2, skipping.


 14%|█▎        | 375/2746 [00:30<01:55, 20.45 Segments/s]

Insufficient data points for cow SE-5c06d92d-2914, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2914, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2915, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2919, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2920, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2922, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2922, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2926, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2929, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2932, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2932, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2937, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2937, lactation 5, skipping.


 14%|█▍        | 398/2746 [00:31<01:40, 23.43 Segments/s]

Insufficient data points for cow SE-5c06d92d-2941, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-2948, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2949, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2950, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2950, lactation 5, skipping.


 15%|█▍        | 406/2746 [00:31<01:43, 22.58 Segments/s]

Insufficient data points for cow SE-5c06d92d-2955, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-2963, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2964, lactation 2, skipping.


 16%|█▌        | 431/2746 [00:32<01:22, 28.13 Segments/s]

Insufficient data points for cow SE-5c06d92d-2982, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2984, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-2987, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2993, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-2993, lactation 5, skipping.


 16%|█▌        | 443/2746 [00:36<05:10,  7.41 Segments/s]

Insufficient data points for cow SE-5c06d92d-3012, lactation 4, skipping.


 16%|█▋        | 447/2746 [00:38<06:12,  6.16 Segments/s]

Insufficient data points for cow SE-5c06d92d-3017, lactation 4, skipping.


 17%|█▋        | 465/2746 [00:38<03:38, 10.46 Segments/s]

Insufficient data points for cow SE-5c06d92d-3023, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-3024, lactation 4, skipping.


 17%|█▋        | 479/2746 [00:40<04:00,  9.41 Segments/s]

Insufficient data points for cow SE-5c06d92d-3033, lactation 5, skipping.


 18%|█▊        | 495/2746 [00:44<06:37,  5.67 Segments/s]

Insufficient data points for cow SE-5c06d92d-3040, lactation 5, skipping.


 19%|█▉        | 517/2746 [00:45<02:47, 13.32 Segments/s]

Insufficient data points for cow SE-5c06d92d-3041, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3043, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3045, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3046, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3046, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3047, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3047, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3048, lactation 5, skipping.
Insufficient data points for cow SE-5c06d92d-3049, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3049, lactation 4, skipping.


 19%|█▉        | 527/2746 [00:45<02:25, 15.23 Segments/s]

Insufficient data points for cow SE-5c06d92d-3054, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3056, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3060, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3063, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3063, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3065, lactation 1, skipping.


 20%|██        | 555/2746 [00:46<01:27, 24.94 Segments/s]

Insufficient data points for cow SE-5c06d92d-3068, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3068, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3070, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3071, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3072, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3073, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3073, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3075, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3076, lactation 1, skipping.


 21%|██        | 581/2746 [00:46<01:01, 35.02 Segments/s]

Insufficient data points for cow SE-5c06d92d-3076, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3078, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3078, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3080, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3083, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3085, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3095, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3095, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3098, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3099, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3099, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3104, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3104, lactation 4, skipping.
Insufficient data points for cow SE-5c

 22%|██▏       | 617/2746 [00:47<00:34, 62.25 Segments/s]

Insufficient data points for cow SE-5c06d92d-3111, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3113, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3114, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3114, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3116, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3116, lactation 3, skipping.


 23%|██▎       | 629/2746 [00:47<00:39, 53.53 Segments/s]

Insufficient data points for cow SE-5c06d92d-3118, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3118, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3120, lactation 1, skipping.


 23%|██▎       | 639/2746 [00:48<01:12, 29.02 Segments/s]

Insufficient data points for cow SE-5c06d92d-3124, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3124, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3126, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3129, lactation 1, skipping.


 24%|██▎       | 650/2746 [00:49<01:48, 19.35 Segments/s]

Insufficient data points for cow SE-5c06d92d-3134, lactation 1, skipping.


 24%|██▍       | 656/2746 [00:52<04:51,  7.16 Segments/s]

Insufficient data points for cow SE-5c06d92d-3134, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3136, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3139, lactation 4, skipping.


 25%|██▍       | 673/2746 [00:53<03:24, 10.16 Segments/s]

Insufficient data points for cow SE-5c06d92d-3144, lactation 4, skipping.


 25%|██▌       | 691/2746 [00:54<02:53, 11.84 Segments/s]

Insufficient data points for cow SE-5c06d92d-3153, lactation 4, skipping.
Insufficient data points for cow SE-5c06d92d-3154, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3154, lactation 4, skipping.


 26%|██▌       | 702/2746 [00:55<03:01, 11.24 Segments/s]

Insufficient data points for cow SE-5c06d92d-3165, lactation 3, skipping.


 26%|██▌       | 713/2746 [00:56<02:29, 13.61 Segments/s]

Insufficient data points for cow SE-5c06d92d-3173, lactation 3, skipping.


 27%|██▋       | 730/2746 [00:57<01:36, 20.80 Segments/s]

Insufficient data points for cow SE-5c06d92d-3176, lactation 3, skipping.


 28%|██▊       | 759/2746 [00:57<00:52, 37.68 Segments/s]

Insufficient data points for cow SE-5c06d92d-3195, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3197, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3200, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3207, lactation 3, skipping.


 29%|██▉       | 808/2746 [00:58<00:32, 59.11 Segments/s]

Insufficient data points for cow SE-5c06d92d-3224, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3225, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3226, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3238, lactation 3, skipping.


 30%|███       | 837/2746 [00:58<00:22, 86.35 Segments/s]

Insufficient data points for cow SE-5c06d92d-3242, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3243, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3245, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3251, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3254, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3258, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3264, lactation 3, skipping.


 31%|███       | 849/2746 [00:59<00:22, 83.92 Segments/s]

Insufficient data points for cow SE-5c06d92d-3265, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3266, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3270, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3272, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3273, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3274, lactation 3, skipping.


 32%|███▏      | 877/2746 [00:59<00:20, 90.07 Segments/s]

Insufficient data points for cow SE-5c06d92d-3277, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3280, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3282, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3283, lactation 3, skipping.


 32%|███▏      | 888/2746 [00:59<00:20, 92.30 Segments/s]

Insufficient data points for cow SE-5c06d92d-3285, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3288, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3292, lactation 1, skipping.


 33%|███▎      | 913/2746 [01:00<00:32, 57.25 Segments/s]

Insufficient data points for cow SE-5c06d92d-3294, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3310, lactation 3, skipping.


 34%|███▍      | 940/2746 [01:00<00:22, 79.47 Segments/s]

Insufficient data points for cow SE-5c06d92d-3323, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3327, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3329, lactation 3, skipping.
Insufficient data points for cow SE-5c06d92d-3330, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3354, lactation 2, skipping.


 36%|███▌      | 986/2746 [01:00<00:16, 104.79 Segments/s]

Insufficient data points for cow SE-5c06d92d-3377, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3384, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3390, lactation 2, skipping.


 37%|███▋      | 1009/2746 [01:01<00:32, 53.60 Segments/s]

Insufficient data points for cow SE-5c06d92d-3397, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3401, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3403, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3404, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3406, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3407, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3413, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3416, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3418, lactation 2, skipping.


 37%|███▋      | 1022/2746 [01:01<00:31, 55.06 Segments/s]

Insufficient data points for cow SE-5c06d92d-3424, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3427, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3432, lactation 2, skipping.


 38%|███▊      | 1054/2746 [01:02<00:20, 80.88 Segments/s]

Insufficient data points for cow SE-5c06d92d-3438, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3441, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3443, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3448, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3450, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3452, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3455, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3461, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3463, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3468, lactation 2, skipping.


 39%|███▉      | 1072/2746 [01:02<00:16, 99.45 Segments/s]

Insufficient data points for cow SE-5c06d92d-3474, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3475, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3485, lactation 2, skipping.
Insufficient data points for cow SE-5c06d92d-3493, lactation 2, skipping.


 42%|████▏     | 1159/2746 [01:02<00:07, 209.31 Segments/s]

Insufficient data points for cow SE-5c06d92d-3543, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3582, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3589, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3593, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3594, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3595, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3597, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3598, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3599, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3601, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3602, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3603, lactation 1, skipping.
Insufficient data points for cow SE-5c06d92d-3604, lactation 1, skipping.
Insufficient data points for cow SE-5c

 43%|████▎     | 1183/2746 [01:03<00:21, 71.62 Segments/s] 

Insufficient data points for cow SE-752efd72-0038, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0042, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0048, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0051, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0065, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0066, lactation 3, skipping.


 44%|████▎     | 1201/2746 [01:04<00:33, 46.15 Segments/s]

Insufficient data points for cow SE-752efd72-0069, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0072, lactation 3, skipping.


 45%|████▍     | 1224/2746 [01:06<01:07, 22.71 Segments/s]

Insufficient data points for cow SE-752efd72-0081, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0088, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0090, lactation 4, skipping.


 45%|████▍     | 1231/2746 [01:07<01:19, 19.14 Segments/s]

Insufficient data points for cow SE-752efd72-0097, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0098, lactation 4, skipping.


 45%|████▌     | 1239/2746 [01:07<01:23, 17.96 Segments/s]

Insufficient data points for cow SE-752efd72-0100, lactation 5, skipping.
Insufficient data points for cow SE-752efd72-0101, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0103, lactation 5, skipping.


 46%|████▌     | 1257/2746 [01:08<01:32, 16.15 Segments/s]

Insufficient data points for cow SE-752efd72-0116, lactation 5, skipping.
Insufficient data points for cow SE-752efd72-0117, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0118, lactation 5, skipping.


 46%|████▋     | 1276/2746 [01:09<01:13, 20.07 Segments/s]

Insufficient data points for cow SE-752efd72-0129, lactation 2, skipping.


 47%|████▋     | 1282/2746 [01:10<01:20, 18.12 Segments/s]

Insufficient data points for cow SE-752efd72-0134, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0136, lactation 2, skipping.


 47%|████▋     | 1304/2746 [01:10<00:49, 29.20 Segments/s]

Insufficient data points for cow SE-752efd72-0137, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0143, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0143, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0147, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0154, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0155, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0155, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0157, lactation 2, skipping.


 48%|████▊     | 1312/2746 [01:10<00:55, 26.03 Segments/s]

Insufficient data points for cow SE-752efd72-0162, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0163, lactation 3, skipping.


 48%|████▊     | 1320/2746 [01:11<01:24, 16.93 Segments/s]

Insufficient data points for cow SE-752efd72-0164, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0166, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0168, lactation 2, skipping.


 48%|████▊     | 1330/2746 [01:12<01:35, 14.90 Segments/s]

Insufficient data points for cow SE-752efd72-0170, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0170, lactation 5, skipping.


 49%|████▊     | 1337/2746 [01:13<01:43, 13.66 Segments/s]

Insufficient data points for cow SE-752efd72-0172, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0173, lactation 2, skipping.


 49%|████▉     | 1346/2746 [01:14<01:59, 11.70 Segments/s]

Insufficient data points for cow SE-752efd72-0176, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0177, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0179, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0181, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0182, lactation 2, skipping.


 50%|████▉     | 1364/2746 [01:15<02:00, 11.44 Segments/s]

Insufficient data points for cow SE-752efd72-0190, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0190, lactation 3, skipping.


 50%|█████     | 1376/2746 [01:17<02:21,  9.68 Segments/s]

Insufficient data points for cow SE-752efd72-0193, lactation 5, skipping.
Insufficient data points for cow SE-752efd72-0196, lactation 5, skipping.
Insufficient data points for cow SE-752efd72-0198, lactation 2, skipping.


 51%|█████     | 1392/2746 [01:18<02:13, 10.12 Segments/s]

Insufficient data points for cow SE-752efd72-0202, lactation 5, skipping.


 51%|█████▏    | 1408/2746 [01:20<03:09,  7.04 Segments/s]

Insufficient data points for cow SE-752efd72-0210, lactation 5, skipping.


 53%|█████▎    | 1455/2746 [01:22<00:35, 35.94 Segments/s]

Insufficient data points for cow SE-752efd72-0230, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0232, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0232, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0234, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0234, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0235, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0236, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0236, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0239, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0243, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0243, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0246, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0246, lactation 4, skipping.
Insufficient data points for cow SE-75

 56%|█████▌    | 1534/2746 [01:22<00:10, 117.47 Segments/s]

Insufficient data points for cow SE-752efd72-0256, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0257, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0258, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0259, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0259, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0260, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0262, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0262, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0263, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0264, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0265, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0265, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0266, lactation 1, skipping.
Insufficient data points for cow SE-75

 57%|█████▋    | 1556/2746 [01:22<00:08, 135.16 Segments/s]

Insufficient data points for cow SE-752efd72-0279, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0280, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0280, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0281, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0284, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0284, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0285, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0286, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0287, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0289, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0292, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0293, lactation 1, skipping.


 58%|█████▊    | 1592/2746 [01:24<00:35, 32.12 Segments/s] 

Insufficient data points for cow SE-752efd72-0293, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0298, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0303, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0312, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-0314, lactation 1, skipping.


 59%|█████▉    | 1620/2746 [01:26<00:38, 29.33 Segments/s]

Insufficient data points for cow SE-752efd72-0315, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0316, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0317, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0320, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0322, lactation 4, skipping.


 60%|██████    | 1649/2746 [01:26<00:30, 35.76 Segments/s]

Insufficient data points for cow SE-752efd72-0329, lactation 1, skipping.


 61%|██████    | 1676/2746 [01:27<00:25, 41.94 Segments/s]

Insufficient data points for cow SE-752efd72-0339, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0340, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0347, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0351, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0354, lactation 1, skipping.


 62%|██████▏   | 1710/2746 [01:27<00:14, 70.77 Segments/s]

Insufficient data points for cow SE-752efd72-0357, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0363, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0364, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0367, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0369, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0372, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0373, lactation 3, skipping.


 64%|██████▎   | 1748/2746 [01:27<00:09, 105.86 Segments/s]

Insufficient data points for cow SE-752efd72-0377, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0378, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0379, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0380, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0387, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0389, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0390, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0391, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0392, lactation 3, skipping.


 65%|██████▍   | 1783/2746 [01:28<00:07, 129.15 Segments/s]

Insufficient data points for cow SE-752efd72-0393, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0394, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0395, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0396, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0398, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0399, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0401, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0403, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0408, lactation 3, skipping.


 66%|██████▌   | 1803/2746 [01:28<00:06, 142.84 Segments/s]

Insufficient data points for cow SE-752efd72-0409, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-0410, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0411, lactation 3, skipping.


 68%|██████▊   | 1863/2746 [01:30<00:16, 54.60 Segments/s] 

Insufficient data points for cow SE-752efd72-0429, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0430, lactation 2, skipping.


 69%|██████▉   | 1898/2746 [01:31<00:16, 52.02 Segments/s]

Insufficient data points for cow SE-752efd72-0447, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0452, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0453, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0454, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0457, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0461, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0466, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0467, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0468, lactation 2, skipping.


 70%|██████▉   | 1912/2746 [01:31<00:22, 36.49 Segments/s]

Insufficient data points for cow SE-752efd72-0474, lactation 2, skipping.


 70%|██████▉   | 1922/2746 [01:32<00:27, 29.68 Segments/s]

Insufficient data points for cow SE-752efd72-0476, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0477, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0478, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0479, lactation 2, skipping.


 71%|███████   | 1954/2746 [01:33<00:19, 40.37 Segments/s]

Insufficient data points for cow SE-752efd72-0481, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0482, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0486, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0487, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0488, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0489, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0492, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0493, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0496, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0498, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0499, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0501, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0502, lactation 2, skipping.
Insufficient data points for cow SE-75

 73%|███████▎  | 1992/2746 [01:33<00:11, 67.31 Segments/s]

Insufficient data points for cow SE-752efd72-0519, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0521, lactation 2, skipping.
Insufficient data points for cow SE-752efd72-0526, lactation 2, skipping.


 73%|███████▎  | 2006/2746 [01:33<00:10, 70.78 Segments/s]

Insufficient data points for cow SE-752efd72-0544, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0550, lactation 1, skipping.


 73%|███████▎  | 2018/2746 [01:34<00:16, 43.03 Segments/s]

Insufficient data points for cow SE-752efd72-0559, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0562, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0564, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0568, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0569, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0571, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0572, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0573, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0574, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0576, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0577, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0578, lactation 1, skipping.
Insufficient data points for cow SE-752efd72-0581, lactation 1, skipping.
Insufficient data points for cow SE-75

 76%|███████▌  | 2087/2746 [01:35<00:10, 64.62 Segments/s]

Insufficient data points for cow SE-752efd72-2614, lactation 5, skipping.
Insufficient data points for cow SE-752efd72-2682, lactation 5, skipping.
Insufficient data points for cow SE-752efd72-2705, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-2729, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-2751, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-2751, lactation 5, skipping.


 76%|███████▋  | 2098/2746 [01:36<00:20, 31.09 Segments/s]

Insufficient data points for cow SE-752efd72-2772, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-2772, lactation 5, skipping.


 77%|███████▋  | 2106/2746 [01:37<00:29, 21.65 Segments/s]

Insufficient data points for cow SE-752efd72-2777, lactation 6, skipping.
Insufficient data points for cow SE-752efd72-2780, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-2787, lactation 4, skipping.


 77%|███████▋  | 2124/2746 [01:40<01:07,  9.24 Segments/s]

Insufficient data points for cow SE-752efd72-2797, lactation 3, skipping.
Insufficient data points for cow SE-752efd72-2797, lactation 4, skipping.
Insufficient data points for cow SE-752efd72-494, lactation 2, skipping.
Insufficient data points for cow SE-7fd04cd3-679, lactation 4, skipping.


 78%|███████▊  | 2129/2746 [01:40<01:00, 10.26 Segments/s]

Insufficient data points for cow SE-a624fb9a-1162, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1162, lactation 7, skipping.
Insufficient data points for cow SE-a624fb9a-1187, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1187, lactation 6, skipping.
Insufficient data points for cow SE-a624fb9a-1193, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1193, lactation 7, skipping.
Insufficient data points for cow SE-a624fb9a-1195, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1197, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1199, lactation 7, skipping.
Insufficient data points for cow SE-a624fb9a-1200, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1201, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1210, lactation 4, skipping.


 79%|███████▊  | 2157/2746 [01:41<00:40, 14.48 Segments/s]

Insufficient data points for cow SE-a624fb9a-1215, lactation 6, skipping.


 79%|███████▊  | 2159/2746 [01:42<00:53, 10.89 Segments/s]

Insufficient data points for cow SE-a624fb9a-1223, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1225, lactation 5, skipping.


 79%|███████▉  | 2165/2746 [01:43<01:21,  7.16 Segments/s]

Insufficient data points for cow SE-a624fb9a-1232, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1249, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1251, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1251, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1252, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1252, lactation 6, skipping.


 80%|████████  | 2206/2746 [01:44<00:20, 26.30 Segments/s]

Insufficient data points for cow SE-a624fb9a-1264, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1266, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1267, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1274, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1276, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1285, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1287, lactation 5, skipping.
Insufficient data points for cow SE-a624fb9a-1295, lactation 3, skipping.
Insufficient data points for cow SE-a624fb9a-1296, lactation 3, skipping.


 82%|████████▏ | 2242/2746 [01:47<00:26, 18.76 Segments/s]

Insufficient data points for cow SE-a624fb9a-1307, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1308, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1308, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1312, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1320, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1321, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1322, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1322, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1324, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1327, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1330, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1333, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1339, lactation 2, skipping.


 83%|████████▎ | 2266/2746 [01:48<00:19, 24.41 Segments/s]

Insufficient data points for cow SE-a624fb9a-1342, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1347, lactation 4, skipping.
Insufficient data points for cow SE-a624fb9a-1348, lactation 3, skipping.


 83%|████████▎ | 2292/2746 [01:49<00:15, 28.77 Segments/s]

Insufficient data points for cow SE-a624fb9a-1367, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1368, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1373, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1374, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1376, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1378, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1380, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1381, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1383, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1385, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1388, lactation 1, skipping.


 84%|████████▍ | 2306/2746 [01:49<00:12, 35.92 Segments/s]

Insufficient data points for cow SE-a624fb9a-1394, lactation 1, skipping.


 84%|████████▍ | 2314/2746 [01:50<00:16, 25.44 Segments/s]

Insufficient data points for cow SE-a624fb9a-1395, lactation 3, skipping.


 85%|████████▌ | 2337/2746 [01:55<01:02,  6.50 Segments/s]

Insufficient data points for cow SE-a624fb9a-1422, lactation 2, skipping.
Insufficient data points for cow SE-a624fb9a-1423, lactation 1, skipping.
Insufficient data points for cow SE-a624fb9a-1425, lactation 2, skipping.


 87%|████████▋ | 2401/2746 [01:58<00:11, 30.14 Segments/s]

Insufficient data points for cow SE-a624fb9a-1482, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 89%|████████▉ | 2443/2746 [01:59<00:04, 73.71 Segments/s]

Insufficient data points for cow SE-f454e660-0282, lactation 6, skipping.
Insufficient data points for cow SE-f454e660-0338, lactation 6, skipping.
Insufficient data points for cow SE-f454e660-0444, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-0448, lactation 5, skipping.
Insufficient data points for cow SE-f454e660-0494, lactation 5, skipping.
Insufficient data points for cow SE-f454e660-0503, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-0579, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-0622, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-0673, lactation 3, skipping.


 90%|█████████ | 2473/2746 [02:02<00:15, 17.56 Segments/s]

Insufficient data points for cow SE-f454e660-0710, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-0742, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-0743, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-0745, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-0760, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-0766, lactation 2, skipping.


 90%|█████████ | 2485/2746 [02:03<00:14, 18.53 Segments/s]

Insufficient data points for cow SE-f454e660-0768, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-0770, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-0785, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-0790, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-0791, lactation 2, skipping.


 91%|█████████ | 2504/2746 [02:03<00:11, 20.83 Segments/s]

Insufficient data points for cow SE-f454e660-0807, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-0833, lactation 1, skipping.


 91%|█████████▏| 2511/2746 [02:04<00:12, 18.84 Segments/s]

Insufficient data points for cow SE-f454e660-0858, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-0874, lactation 1, skipping.


 92%|█████████▏| 2516/2746 [02:07<00:33,  6.83 Segments/s]

Insufficient data points for cow SE-f454e660-243, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-243, lactation 5, skipping.


 92%|█████████▏| 2530/2746 [02:08<00:21, 10.00 Segments/s]

Insufficient data points for cow SE-f454e660-268, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-269, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-285, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-323, lactation 4, skipping.


 93%|█████████▎| 2554/2746 [02:09<00:11, 17.39 Segments/s]

Insufficient data points for cow SE-f454e660-360, lactation 5, skipping.
Insufficient data points for cow SE-f454e660-400, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-407, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-408, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-409, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-431, lactation 3, skipping.


 94%|█████████▎| 2570/2746 [02:09<00:07, 24.39 Segments/s]

Insufficient data points for cow SE-f454e660-453, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-455, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-456, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-458, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-459, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-462, lactation 3, skipping.


 94%|█████████▍| 2586/2746 [02:09<00:04, 37.58 Segments/s]

Insufficient data points for cow SE-f454e660-467, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-480, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-483, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-487, lactation 2, skipping.


 95%|█████████▍| 2595/2746 [02:13<00:17,  8.42 Segments/s]

Insufficient data points for cow SE-f454e660-500, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-500, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-501, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-501, lactation 4, skipping.
Insufficient data points for cow SE-f454e660-508, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-508, lactation 4, skipping.


 95%|█████████▍| 2602/2746 [02:14<00:16,  8.93 Segments/s]

Insufficient data points for cow SE-f454e660-509, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-510, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-510, lactation 3, skipping.


 95%|█████████▌| 2615/2746 [02:14<00:11, 11.80 Segments/s]

Insufficient data points for cow SE-f454e660-528, lactation 1, skipping.


 95%|█████████▌| 2620/2746 [02:16<00:16,  7.82 Segments/s]

Insufficient data points for cow SE-f454e660-539, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-545, lactation 2, skipping.


 96%|█████████▌| 2638/2746 [02:17<00:08, 13.07 Segments/s]

Insufficient data points for cow SE-f454e660-550, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-551, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-553, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-559, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-567, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
 97%|█████████▋| 2665/2746 [02:18<00:04, 19.34 Segments/s]

Insufficient data points for cow SE-f454e660-576, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-580, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-584, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-584, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-585, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-585, lactation 3, skipping.
Insufficient data points for cow SE-f454e660-591, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-600, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-612, lactation 1, skipping.


 98%|█████████▊| 2681/2746 [02:19<00:03, 20.26 Segments/s]

Insufficient data points for cow SE-f454e660-636, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-639, lactation 2, skipping.
Insufficient data points for cow SE-f454e660-641, lactation 1, skipping.


 99%|█████████▊| 2705/2746 [02:20<00:01, 20.52 Segments/s]

Insufficient data points for cow SE-f454e660-665, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-667, lactation 1, skipping.


  result = getattr(ufunc, method)(*inputs2, **kwargs)
100%|█████████▉| 2740/2746 [02:23<00:00, 20.15 Segments/s]

Insufficient data points for cow SE-f454e660-729, lactation 1, skipping.
Insufficient data points for cow SE-f454e660-735, lactation 2, skipping.


100%|██████████| 2746/2746 [02:27<00:00, 18.60 Segments/s]


Insufficient data points for cow SE-fcdf259d-0044-0, lactation 9, skipping.


Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,YearSeason,cum_HW,Temp15Threshold,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange
88,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,2022-2,0,0,3242,02 SLB,8,41.792667,0.364179,
89,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,2022-2,0,0,3243,02 SLB,8,41.712445,0.454541,0.089661
90,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,2022-2,0,1,3244,02 SLB,8,41.632223,0.54381,0.088393
91,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,2022-2,0,0,3245,02 SLB,8,41.552001,0.637514,0.092655
92,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,2022-3,0,1,3246,02 SLB,8,41.471778,0.810431,0.171683
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659907,SE-fcdf259d-0044-0,f454e660,2023-06-08,15.14,8.97,6.17,0,15.570833,59.383267,352,2023-3,0,1,4155,41 Fjällko,10,14.629583,1.034889,0.421748
659908,SE-fcdf259d-0044-0,f454e660,2023-06-09,7.47,15.14,-7.67,0,13.254167,54.534255,353,2023-3,0,1,4156,41 Fjällko,10,14.54211,0.513681,-0.527434
659909,SE-fcdf259d-0044-0,f454e660,2023-06-10,14.73,7.47,7.26,0,13.258333,54.082367,354,2023-3,0,1,4157,41 Fjällko,10,14.454637,1.01905,0.502261
659910,SE-fcdf259d-0044-0,f454e660,2023-06-12,12.27,14.73,-2.46,0,15.820833,62.015093,356,2023-3,0,1,4159,41 Fjällko,10,14.279692,0.859262,-0.172273


In [6]:
# Check if NormalizedDailyYield is centered around 1 for each unique farm
print("Mean of NormalizedDailyYield:", data.groupby('FarmName_Pseudo')['NormalizedDailyYield'].mean())
print("Standard Deviation of NormalizedDailyYield:", data.groupby('FarmName_Pseudo')['NormalizedDailyYield'].std())

Mean of NormalizedDailyYield: FarmName_Pseudo
5c06d92d     0.94483
752efd72    0.951713
a624fb9a    0.907158
f454e660    0.912254
Name: NormalizedDailyYield, dtype: Float64
Standard Deviation of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.141058
752efd72    0.117541
a624fb9a    0.216816
f454e660    0.247192
Name: NormalizedDailyYield, dtype: Float64


In [7]:
# Define the THI threshold
THI_THRESHOLD = 61

# Calculate the daily heat load based on the THI threshold
data['HeatLoad'] = data['MeanTHI_adj'].apply(lambda x: x - THI_THRESHOLD if x > THI_THRESHOLD else -(THI_THRESHOLD - x))

# Initialize the cumulative heat load column with float type
data['CumulativeHeatLoad'] = 0.0  # Explicitly set as float

data = data.reset_index(drop=True)

# Iterate through the data to calculate cumulative heat load correctly
for i in range(1, len(data)):
    previous_cumulative = data.at[i-1, 'CumulativeHeatLoad']
    current_heat_load = data.at[i, 'HeatLoad']
    
    if current_heat_load < 0:  # If current heat load is negative
        new_cumulative = previous_cumulative + 2 * current_heat_load
    else:
        new_cumulative = previous_cumulative + current_heat_load
    
    # Ensure the cumulative heat load never goes below zero
    if new_cumulative > 0:
        data.at[i, 'CumulativeHeatLoad'] = new_cumulative
    else:
        data.at[i, 'CumulativeHeatLoad'] = 0.0  # Ensure float is maintained

# Drop rows where the 'DailyYield' column has NaN values
data = data.dropna(subset=['DailyYield'])

data.head(-5)

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,cum_HW,Temp15Threshold,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange,HeatLoad,CumulativeHeatLoad
0,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,...,0,0,3242,02 SLB,8,41.792667,0.364179,,-10.521327,0.000000
1,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,...,0,0,3243,02 SLB,8,41.712445,0.454541,0.089661,-7.158352,0.000000
2,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,...,0,1,3244,02 SLB,8,41.632223,0.54381,0.088393,-8.064041,0.000000
3,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,...,0,0,3245,02 SLB,8,41.552001,0.637514,0.092655,-8.127888,0.000000
4,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,...,0,1,3246,02 SLB,8,41.471778,0.810431,0.171683,-4.943453,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605438,SE-fcdf259d-0044-0,f454e660,2023-06-03,12.67,15.75,-3.08,0,12.666667,53.132530,347,...,0,1,4150,41 Fjällko,10,15.066946,0.840914,-0.204421,-7.867470,0.000000
605439,SE-fcdf259d-0044-0,f454e660,2023-06-04,22.31,12.67,9.64,0,13.079167,56.726870,348,...,0,1,4151,41 Fjällko,10,14.979474,1.489371,0.643547,-4.273130,0.000000
605440,SE-fcdf259d-0044-0,f454e660,2023-06-05,12.84,22.31,-9.47,0,14.237500,58.482418,349,...,0,1,4152,41 Fjällko,10,14.892001,0.862208,-0.635912,-2.517582,0.000000
605441,SE-fcdf259d-0044-0,f454e660,2023-06-06,9.47,12.84,-3.37,0,15.345833,60.546358,350,...,0,1,4153,41 Fjällko,10,14.804528,0.639669,-0.227633,-0.453642,0.000000


In [8]:
# When CumulativeHeatLoad is greater than 3, it indicates that the cow is under heat stress
data['HeatStress'] = (data['CumulativeHeatLoad'] > 3).astype(int)
data.head(-5)

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,Temp15Threshold,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange,HeatLoad,CumulativeHeatLoad,HeatStress
0,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,...,0,3242,02 SLB,8,41.792667,0.364179,,-10.521327,0.000000,0
1,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,...,0,3243,02 SLB,8,41.712445,0.454541,0.089661,-7.158352,0.000000,0
2,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,...,1,3244,02 SLB,8,41.632223,0.54381,0.088393,-8.064041,0.000000,0
3,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,...,0,3245,02 SLB,8,41.552001,0.637514,0.092655,-8.127888,0.000000,0
4,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,...,1,3246,02 SLB,8,41.471778,0.810431,0.171683,-4.943453,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605438,SE-fcdf259d-0044-0,f454e660,2023-06-03,12.67,15.75,-3.08,0,12.666667,53.132530,347,...,1,4150,41 Fjällko,10,15.066946,0.840914,-0.204421,-7.867470,0.000000,0
605439,SE-fcdf259d-0044-0,f454e660,2023-06-04,22.31,12.67,9.64,0,13.079167,56.726870,348,...,1,4151,41 Fjällko,10,14.979474,1.489371,0.643547,-4.273130,0.000000,0
605440,SE-fcdf259d-0044-0,f454e660,2023-06-05,12.84,22.31,-9.47,0,14.237500,58.482418,349,...,1,4152,41 Fjällko,10,14.892001,0.862208,-0.635912,-2.517582,0.000000,0
605441,SE-fcdf259d-0044-0,f454e660,2023-06-06,9.47,12.84,-3.37,0,15.345833,60.546358,350,...,1,4153,41 Fjällko,10,14.804528,0.639669,-0.227633,-0.453642,0.000000,0


In [9]:
# Make a dataframe from the parameters dictionary, it should contain Se_Number, LactationNumber, a, b, c, d
params_df = pd.DataFrame(params_dict).T.reset_index()
params_df.columns = ['SE_Number', 'LactationNumber', 'a', 'b', 'c', 'd']
params_df.head(-5)

Unnamed: 0,SE_Number,LactationNumber,a,b,c,d
0,SE-064c0cec-1189,8,42.033334,-0.080222,1.898115e+03,8.106137e+02
1,SE-30dc5787-1389,6,53.527867,-0.078718,2.219759e+03,8.703531e+02
2,SE-30dc5787-1389,7,50.409783,-0.102973,2.765920e+00,1.116788e+02
3,SE-30dc5787-1396,5,35.152650,-0.024675,-6.560076e-92,1.212551e-89
4,SE-30dc5787-1396,6,56.606093,-0.078237,1.683317e+00,1.470512e+02
...,...,...,...,...,...,...
2029,SE-f454e660-778,1,23.615367,0.029837,7.821117e+02,3.860034e+02
2030,SE-f454e660-780,1,20.483449,0.003832,2.093686e+03,7.422069e+02
2031,SE-f454e660-787,1,35.441859,0.001323,1.359524e+03,5.631486e+02
2032,SE-f454e660-788,1,20.860446,0.017855,4.799397e+03,1.477833e+03


In [10]:
# Calculate Z-scores for each parameter
params_df['z_a'] = zscore(params_df['a'])
params_df['z_b'] = zscore(params_df['b'])
params_df['z_c'] = zscore(params_df['c'])
params_df['z_d'] = zscore(params_df['d'])

params_df.head(-5)

Unnamed: 0,SE_Number,LactationNumber,a,b,c,d,z_a,z_b,z_c,z_d
0,SE-064c0cec-1189,8,42.033334,-0.080222,1.898115e+03,8.106137e+02,-0.243702,-0.924142,-0.033840,-0.062644
1,SE-30dc5787-1389,6,53.527867,-0.078718,2.219759e+03,8.703531e+02,0.842135,-0.879756,0.025373,-0.050381
2,SE-30dc5787-1389,7,50.409783,-0.102973,2.765920e+00,1.116788e+02,0.547584,-1.595556,-0.382764,-0.206116
3,SE-30dc5787-1396,5,35.152650,-0.024675,-6.560076e-92,1.212551e-89,-0.893690,0.715146,-0.383273,-0.229041
4,SE-30dc5787-1396,6,56.606093,-0.078237,1.683317e+00,1.470512e+02,1.132922,-0.865558,-0.382963,-0.198855
...,...,...,...,...,...,...,...,...,...,...
2029,SE-f454e660-778,1,23.615367,0.029837,7.821117e+02,3.860034e+02,-1.983566,2.323912,-0.239290,-0.149805
2030,SE-f454e660-780,1,20.483449,0.003832,2.093686e+03,7.422069e+02,-2.279424,1.556437,0.002164,-0.076686
2031,SE-f454e660-787,1,35.441859,0.001323,1.359524e+03,5.631486e+02,-0.866370,1.482398,-0.132992,-0.113442
2032,SE-f454e660-788,1,20.860446,0.017855,4.799397e+03,1.477833e+03,-2.243811,1.970282,0.500271,0.074319


In [11]:
# Identify outliers (using Z-score > 3.5 or < -3.5 as threshold)
outliers = params_df[(np.abs(params_df[['z_a', 'z_b', 'z_c', 'z_d']]) > 3.5).any(axis=1)]

x = outliers.count()
print("Number of outliers:", x)

# Optionally, drop the outliers
params_df_cleaned = params_df.drop(outliers.index)
params_df_cleaned.head(-5)

Number of outliers: SE_Number          51
LactationNumber    51
a                  51
b                  51
c                  51
d                  51
z_a                51
z_b                51
z_c                51
z_d                51
dtype: int64


Unnamed: 0,SE_Number,LactationNumber,a,b,c,d,z_a,z_b,z_c,z_d
0,SE-064c0cec-1189,8,42.033334,-0.080222,1.898115e+03,8.106137e+02,-0.243702,-0.924142,-0.033840,-0.062644
1,SE-30dc5787-1389,6,53.527867,-0.078718,2.219759e+03,8.703531e+02,0.842135,-0.879756,0.025373,-0.050381
2,SE-30dc5787-1389,7,50.409783,-0.102973,2.765920e+00,1.116788e+02,0.547584,-1.595556,-0.382764,-0.206116
3,SE-30dc5787-1396,5,35.152650,-0.024675,-6.560076e-92,1.212551e-89,-0.893690,0.715146,-0.383273,-0.229041
4,SE-30dc5787-1396,6,56.606093,-0.078237,1.683317e+00,1.470512e+02,1.132922,-0.865558,-0.382963,-0.198855
...,...,...,...,...,...,...,...,...,...,...
2027,SE-f454e660-750,1,32.711090,-0.001080,6.658862e+03,2.062282e+03,-1.124333,1.411480,0.842589,0.194291
2028,SE-f454e660-767,1,26.261397,0.045251,5.227893e+03,1.437320e+03,-1.733607,2.778795,0.579155,0.066003
2029,SE-f454e660-778,1,23.615367,0.029837,7.821117e+02,3.860034e+02,-1.983566,2.323912,-0.239290,-0.149805
2030,SE-f454e660-780,1,20.483449,0.003832,2.093686e+03,7.422069e+02,-2.279424,1.556437,0.002164,-0.076686


In [12]:
# Identify unique SE_Number and LactationNumber combinations from the outliers
outlier_combinations = outliers[['SE_Number', 'LactationNumber']].drop_duplicates()

# Merge with the original data to find rows that match these outlier combinations
data_cleaned = data.merge(outlier_combinations, on=['SE_Number', 'LactationNumber'], how='left', indicator=True)

# Keep only the rows that do not match the outlier combinations
data_cleaned = data_cleaned[data_cleaned['_merge'] == 'left_only'].drop(columns=['_merge'])

# Now data_cleaned contains the original data with the outlier combinations removed
print("Number of rows removed:", len(data) - len(data_cleaned))
data_cleaned.head(-5)

Number of rows removed: 15389


Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,Temp15Threshold,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange,HeatLoad,CumulativeHeatLoad,HeatStress
0,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,...,0,3242,02 SLB,8,41.792667,0.364179,,-10.521327,0.000000,0
1,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,...,0,3243,02 SLB,8,41.712445,0.454541,0.089661,-7.158352,0.000000,0
2,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,...,1,3244,02 SLB,8,41.632223,0.54381,0.088393,-8.064041,0.000000,0
3,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,...,0,3245,02 SLB,8,41.552001,0.637514,0.092655,-8.127888,0.000000,0
4,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,...,1,3246,02 SLB,8,41.471778,0.810431,0.171683,-4.943453,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605438,SE-fcdf259d-0044-0,f454e660,2023-06-03,12.67,15.75,-3.08,0,12.666667,53.132530,347,...,1,4150,41 Fjällko,10,15.066946,0.840914,-0.204421,-7.867470,0.000000,0
605439,SE-fcdf259d-0044-0,f454e660,2023-06-04,22.31,12.67,9.64,0,13.079167,56.726870,348,...,1,4151,41 Fjällko,10,14.979474,1.489371,0.643547,-4.273130,0.000000,0
605440,SE-fcdf259d-0044-0,f454e660,2023-06-05,12.84,22.31,-9.47,0,14.237500,58.482418,349,...,1,4152,41 Fjällko,10,14.892001,0.862208,-0.635912,-2.517582,0.000000,0
605441,SE-fcdf259d-0044-0,f454e660,2023-06-06,9.47,12.84,-3.37,0,15.345833,60.546358,350,...,1,4153,41 Fjällko,10,14.804528,0.639669,-0.227633,-0.453642,0.000000,0


In [13]:
# Check if NormalizedDailyYield is centered around 1 for each unique farm
print("Mean of NormalizedDailyYield:", data_cleaned.groupby('FarmName_Pseudo')['NormalizedDailyYield'].mean())
print("Standard Deviation of NormalizedDailyYield:", data_cleaned.groupby('FarmName_Pseudo')['NormalizedDailyYield'].std())

Mean of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.945267
752efd72    0.951719
a624fb9a    0.907128
f454e660    0.912273
Name: NormalizedDailyYield, dtype: Float64
Standard Deviation of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.140407
752efd72    0.117494
a624fb9a    0.216923
f454e660    0.248424
Name: NormalizedDailyYield, dtype: Float64


In [14]:
data_cleaned['Residuals'] = data_cleaned['DailyYield'] - data_cleaned['ExpectedYield']
data_cleaned

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange,HeatLoad,CumulativeHeatLoad,HeatStress,Residuals
0,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,...,3242,02 SLB,8,41.792667,0.364179,,-10.521327,0.000000,0,-26.572667
1,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,...,3243,02 SLB,8,41.712445,0.454541,0.089661,-7.158352,0.000000,0,-22.752445
2,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,...,3244,02 SLB,8,41.632223,0.54381,0.088393,-8.064041,0.000000,0,-18.992223
3,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,...,3245,02 SLB,8,41.552001,0.637514,0.092655,-8.127888,0.000000,0,-15.062001
4,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,...,3246,02 SLB,8,41.471778,0.810431,0.171683,-4.943453,0.000000,0,-7.861778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605443,SE-fcdf259d-0044-0,f454e660,2023-06-08,15.14,8.97,6.17,0,15.570833,59.383267,352,...,4155,41 Fjällko,10,14.629583,1.034889,0.421748,-1.616733,0.000000,0,0.510417
605444,SE-fcdf259d-0044-0,f454e660,2023-06-09,7.47,15.14,-7.67,0,13.254167,54.534255,353,...,4156,41 Fjällko,10,14.54211,0.513681,-0.527434,-6.465745,0.000000,0,-7.07211
605445,SE-fcdf259d-0044-0,f454e660,2023-06-10,14.73,7.47,7.26,0,13.258333,54.082367,354,...,4157,41 Fjällko,10,14.454637,1.01905,0.502261,-6.917633,0.000000,0,0.275363
605446,SE-fcdf259d-0044-0,f454e660,2023-06-12,12.27,14.73,-2.46,0,15.820833,62.015093,356,...,4159,41 Fjällko,10,14.279692,0.859262,-0.172273,1.015093,1.015093,0,-2.009692


In [15]:
farm_results = []

for farm_name, farm_group in data_cleaned.groupby('FarmName_Pseudo'):
    farm_residuals = []
    
    for se_number, cow_group in farm_group.groupby('SE_Number'):
        residuals = cow_group['Residuals'].dropna()  # Drop NaN values
        
        if len(residuals) > 1:  # Ensure there are residuals to analyze
            farm_residuals.append(residuals)
    
    if len(farm_residuals) > 0:
        # Combine residuals from all cows in the farm
        combined_residuals = np.concatenate(farm_residuals)
        
        if len(combined_residuals) > 1:  # Ensure enough data to perform calculations
            # Calculate farm-level statistics
            acf_values = acf(combined_residuals, nlags=30, fft=False)
            pacf_values = pacf(combined_residuals, nlags=min(30, len(combined_residuals)//2))

            # Print the farm-level statistics
            print(f"Farm: {farm_name}")
            print(f"ACF (first 5 lags): {acf_values[:5]}")
            print(f"PACF (first 5 lags): {pacf_values[:5]}")
        else:
            print(f"Farm: {farm_name} does not have enough data for reliable calculations.")
        
        print("=" * 50)

Farm: 5c06d92d
ACF (first 5 lags): [1.         0.45033682 0.42398472 0.37190396 0.33789599]
PACF (first 5 lags): [1.         0.4503387  0.27745195 0.14813144 0.09485012]
Farm: 752efd72
ACF (first 5 lags): [1.         0.47760427 0.44521024 0.40059334 0.35844906]
PACF (first 5 lags): [1.         0.47760678 0.28126571 0.15958209 0.08931462]
Farm: a624fb9a
ACF (first 5 lags): [1.         0.50335434 0.63869248 0.6122737  0.5971928 ]
PACF (first 5 lags): [1.         0.50336104 0.51610349 0.35953799 0.22438246]


  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)


Farm: f454e660
ACF (first 5 lags): [nan nan nan nan nan]
PACF (first 5 lags): [ 1. nan nan nan nan]


  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)


In [16]:
# Group by 'FarmName_Pseudo', 'SE_Number', and 'LactationNumber' to perform individual calculations
farm_results = []

for farm_name, farm_group in data_cleaned.groupby('FarmName_Pseudo'):
    print(f"Farm: {farm_name}")
    
    for (se_number, lactation_number), cow_group in farm_group.groupby(['SE_Number', 'LactationNumber']):
        residuals = cow_group['Residuals']
        residuals = cow_group['Residuals'].dropna()  # Drop NaN values
        
        if len(residuals) > 1:  # Ensure there are residuals to analyze
            acf_values = acf(residuals, nlags=30, fft=False)
            pacf_values = pacf(residuals, nlags=min(30, len(residuals)//2))

            # Print the statistics
            print(f"\nCow: {se_number}, Lactation Number: {lactation_number}")
            print(f"ACF (first 5 lags): {acf_values[:5]}")
            print(f"PACF (first 5 lags): {pacf_values[:5]}")
            print("-" * 50)
            
    print("=" * 50)

Farm: 5c06d92d

Cow: SE-5c06d92d-2000, Lactation Number: 8
ACF (first 5 lags): [1.         0.18872437 0.2788504  0.16545083 0.16538944]
PACF (first 5 lags): [1.         0.18943923 0.25420648 0.08814745 0.07086122]
--------------------------------------------------

Cow: SE-5c06d92d-2016, Lactation Number: 8
ACF (first 5 lags): [1.         0.26713025 0.25739877 0.27546499 0.28108091]
PACF (first 5 lags): [1.         0.26804823 0.20182519 0.18956226 0.16715481]
--------------------------------------------------

Cow: SE-5c06d92d-2055, Lactation Number: 6
ACF (first 5 lags): [ 1.          0.08095262  0.32228993  0.02447001 -0.00513046]
PACF (first 5 lags): [ 1.          0.08142056  0.32153976 -0.02165918 -0.12385351]
--------------------------------------------------

Cow: SE-5c06d92d-2055, Lactation Number: 7
ACF (first 5 lags): [1.         0.44834849 0.40025248 0.3540946  0.32802499]
PACF (first 5 lags): [1.         0.44956024 0.25105723 0.14529408 0.1062623 ]
--------------------------

  xo = x - x.mean()
  x -= x.mean()



Cow: SE-f454e660-0754, Lactation Number: 1
ACF (first 5 lags): [1.         0.17863145 0.36189841 0.33433242 0.33368107]
PACF (first 5 lags): [1.         0.17908254 0.3426492  0.27238783 0.21129439]
--------------------------------------------------

Cow: SE-f454e660-0754, Lactation Number: 2
ACF (first 5 lags): [ 1.         -0.24311942  0.20279973  0.09005426  0.00811697]
PACF (first 5 lags): [ 1.         -0.24444072  0.1544962   0.18705007  0.04062721]
--------------------------------------------------

Cow: SE-f454e660-0759, Lactation Number: 1
ACF (first 5 lags): [ 1.         -0.13463557  0.11748206  0.23485685  0.06649766]
PACF (first 5 lags): [ 1.         -0.13490376  0.10160107  0.27197381  0.13671224]
--------------------------------------------------

Cow: SE-f454e660-0760, Lactation Number: 1
ACF (first 5 lags): [ 1.         -0.16193679  0.13312437  0.23003329  0.10454497]
PACF (first 5 lags): [ 1.         -0.16237564  0.11039262  0.27987299  0.19359616]
---------------------

  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()


In [17]:
# Define the thresholds
mean_residual_threshold = 0.075
std_residual_threshold = 7.5
acf_threshold = 0.25
pacf_threshold = 0.25

# List to collect flagged combinations
flagged_combinations = []

for farm_name, farm_group in data_cleaned.groupby('FarmName_Pseudo'):
    for (se_number, lactation_number), cow_group in farm_group.groupby(['SE_Number', 'LactationNumber']):
        residuals = cow_group['Residuals'].dropna()
        
        if len(residuals) > 1:  # Ensure there are residuals to analyze
            acf_values = acf(residuals, nlags=30, fft=False)
            pacf_values = pacf(residuals, nlags=min(30, len(residuals)//2))

            # Check against thresholds
            if (abs(acf_values[1]) > acf_threshold or 
                abs(pacf_values[1]) > pacf_threshold):
                
                # Collect the combination if it exceeds any threshold
                flagged_combinations.append({
                    'Farm': farm_name,
                    'SE_Number': se_number,
                    'LactationNumber': lactation_number,
                    'ACF[1]': acf_values[1],
                    'PACF[1]': pacf_values[1]
                })

# Convert to a DataFrame for easier inspection
flagged_df = pd.DataFrame(flagged_combinations)
flagged_df

  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()


Unnamed: 0,Farm,SE_Number,LactationNumber,ACF[1],PACF[1]
0,5c06d92d,SE-5c06d92d-2016,8,0.267130,0.268048
1,5c06d92d,SE-5c06d92d-2055,7,0.448348,0.449560
2,5c06d92d,SE-5c06d92d-2058,8,0.551838,0.553461
3,5c06d92d,SE-5c06d92d-2211,7,0.599805,0.602582
4,5c06d92d,SE-5c06d92d-2246,6,0.524548,0.527580
...,...,...,...,...,...
1458,f454e660,SE-f454e660-709,1,0.310864,0.311406
1459,f454e660,SE-f454e660-724,1,0.261598,0.262874
1460,f454e660,SE-f454e660-731,1,0.641951,0.643114
1461,f454e660,SE-f454e660-748,1,0.335238,0.335834


In [18]:
# Define the Wilmink Lactation Curve function
def wilmink_lactation_curve(dim, a, b, c, d):
    dim = np.array(dim, dtype=float)
    return a + b * dim + c * np.exp(-d * dim)

# Function to directly refit the Wilmink Lactation Curve (Standard Process)
def refit_wilmink(cow_data):
    x_data = cow_data['DaysInMilk'].values
    y_data = cow_data['DailyYield'].values

    # Use initial guesses and bounds from the original fitting process
    initial_guesses = [np.mean(y_data), 0, np.mean(y_data) / 2, 0.1]
    bounds = ([-np.inf, -np.inf, -np.inf, 0], [np.inf, np.inf, np.inf, np.inf])

    popt, _ = curve_fit(wilmink_lactation_curve, x_data, y_data, p0=initial_guesses, bounds=bounds, maxfev=30000)
    
    # Calculate the expected yield with the refitted parameters
    cow_data['ExpectedYield'] = wilmink_lactation_curve(cow_data['DaysInMilk'], *popt)
    
    # Calculate new residuals
    cow_data['Residuals'] = cow_data['DailyYield'] - cow_data['ExpectedYield']
    
    return cow_data

# Function to add lagged variables for addressing autocorrelation
def add_lagged_variables(cow_data, max_lag=3):
    for lag in range(1, max_lag + 1):
        cow_data[f'lag_{lag}'] = cow_data['DailyYield'].shift(lag)
    return cow_data.dropna()

# Define the Robust Wilmink Lactation Curve function
def robust_wilmink_lactation_curve(dim, a, b, c, d, lag1, lag2, lag3):
    dim = np.array(dim, dtype=np.float64)
    days_in_milk = dim[0]
    lag_1 = dim[1]
    lag_2 = dim[2]
    lag_3 = dim[3]
    
    return a + b * days_in_milk + c * np.exp(-d * days_in_milk) + lag1 * lag_1 + lag2 * lag_2 + lag3 * lag_3

# Function to fit the robust Wilmink model
def fit_robust_wilmink(cow_data, lags=3):
    cow_data = add_lagged_variables(cow_data, max_lag=lags)
    
    x_data = cow_data[['DaysInMilk', 'lag_1', 'lag_2', 'lag_3']].values.T
    y_data = cow_data['DailyYield'].values
    
    initial_guesses = [np.mean(y_data), 0, np.mean(y_data) / 2, 0.1, 0, 0, 0]
    bounds = ([-np.inf, -np.inf, -np.inf, 0, -np.inf, -np.inf, -np.inf], 
              [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf])
    
    try:
        popt, _ = curve_fit(robust_wilmink_lactation_curve, x_data, y_data, p0=initial_guesses, bounds=bounds, maxfev=50000)
        cow_data.loc[:, 'ExpectedYield'] = robust_wilmink_lactation_curve(x_data, *popt)
        cow_data.loc[:, 'Residuals'] = cow_data['DailyYield'] - cow_data['ExpectedYield']
    except RuntimeError as e:
        print(f"Curve fitting failed: {e}")
        cow_data.loc[:, 'ExpectedYield'] = np.nan
        cow_data.loc[:, 'Residuals'] = np.nan
    
    return cow_data

# Function to add lagged variables for addressing autocorrelation
def add_lagged_variables(cow_data, max_lag=3):
    for lag in range(1, max_lag + 1):
        cow_data[f'lag_{lag}'] = cow_data['DailyYield'].shift(lag)
    return cow_data.dropna()

# Example usage: Applying the robust model to flagged cases
for index, row in flagged_df.iterrows():
    se_number = row['SE_Number']
    lactation_number = row['LactationNumber']
    
    cow_data = data_cleaned[(data_cleaned['SE_Number'] == se_number) & 
                            (data_cleaned['LactationNumber'] == lactation_number)].copy()
    
    if abs(row['ACF[1]']) > 0.2:  # Significant autocorrelation
        cow_data = add_lagged_variables(cow_data, max_lag=3)
        cow_data_refitted = fit_robust_wilmink(cow_data, lags=3)
        data_cleaned.update(cow_data_refitted)
    else:
        cow_data_refitted = refit_wilmink(cow_data)
        data_cleaned.update(cow_data_refitted)

# Erase all rows where ExpectedYield is NaN
data_cleaned = data_cleaned.dropna(subset=['ExpectedYield']).reset_index(drop=True)

data_cleaned['NormalizedDailyYield'] = data_cleaned['DailyYield'] / data_cleaned['ExpectedYield']
data_cleaned['NormalizedDailyYieldChange'] = data_cleaned['DailyYieldChange'] / data_cleaned['ExpectedYield']

data_cleaned

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange,HeatLoad,CumulativeHeatLoad,HeatStress,Residuals
0,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,...,3242,02 SLB,8,41.792667,0.364179,,-10.521327,0.000000,0,-26.572667
1,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,...,3243,02 SLB,8,41.712445,0.454541,0.089661,-7.158352,0.000000,0,-22.752445
2,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,...,3244,02 SLB,8,41.632223,0.54381,0.088393,-8.064041,0.000000,0,-18.992223
3,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,...,3245,02 SLB,8,41.552001,0.637514,0.092655,-8.127888,0.000000,0,-15.062001
4,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,...,3246,02 SLB,8,41.471778,0.810431,0.171683,-4.943453,0.000000,0,-7.861778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
590054,SE-fcdf259d-0044-0,f454e660,2023-06-08,15.14,8.97,6.17,0,15.570833,59.383267,352,...,4155,41 Fjällko,10,14.629583,1.034889,0.421748,-1.616733,0.000000,0,0.510417
590055,SE-fcdf259d-0044-0,f454e660,2023-06-09,7.47,15.14,-7.67,0,13.254167,54.534255,353,...,4156,41 Fjällko,10,14.54211,0.513681,-0.527434,-6.465745,0.000000,0,-7.07211
590056,SE-fcdf259d-0044-0,f454e660,2023-06-10,14.73,7.47,7.26,0,13.258333,54.082367,354,...,4157,41 Fjällko,10,14.454637,1.01905,0.502261,-6.917633,0.000000,0,0.275363
590057,SE-fcdf259d-0044-0,f454e660,2023-06-12,12.27,14.73,-2.46,0,15.820833,62.015093,356,...,4159,41 Fjällko,10,14.279692,0.859262,-0.172273,1.015093,1.015093,0,-2.009692


In [19]:
# Define the thresholds
mean_residual_threshold = 0.075
std_residual_threshold = 7.5
acf_threshold = 0.25
pacf_threshold = 0.25

# List to collect flagged combinations
flagged_combinations = []

for farm_name, farm_group in data_cleaned.groupby('FarmName_Pseudo'):
    for (se_number, lactation_number), cow_group in farm_group.groupby(['SE_Number', 'LactationNumber']):
        residuals = cow_group['Residuals'].dropna()
        
        if len(residuals) > 1:  # Ensure there are residuals to analyze
            acf_values = acf(residuals, nlags=30, fft=False)
            pacf_values = pacf(residuals, nlags=min(30, len(residuals)//2))

            # Check against thresholds
            if (abs(acf_values[1]) > acf_threshold or 
                abs(pacf_values[1]) > pacf_threshold):
                
                # Collect the combination if it exceeds any threshold
                flagged_combinations.append({
                    'Farm': farm_name,
                    'SE_Number': se_number,
                    'LactationNumber': lactation_number,
                    'ACF[1]': acf_values[1],
                    'PACF[1]': pacf_values[1]
                })

# Convert to a DataFrame for easier inspection
flagged_df = pd.DataFrame(flagged_combinations)
flagged_df

  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()


Unnamed: 0,Farm,SE_Number,LactationNumber,ACF[1],PACF[1]
0,5c06d92d,SE-5c06d92d-2211,7,0.456755,0.458870
1,5c06d92d,SE-5c06d92d-2246,6,0.300373,0.302109
2,5c06d92d,SE-5c06d92d-2333,7,0.452537,0.454201
3,5c06d92d,SE-5c06d92d-2333,8,0.320294,0.322073
4,5c06d92d,SE-5c06d92d-2405,6,0.326889,0.327542
...,...,...,...,...,...
546,a624fb9a,SE-a624fb9a-1425,1,0.281904,0.282856
547,a624fb9a,SE-a624fb9a-1426,1,0.259438,0.260403
548,a624fb9a,SE-a624fb9a-1428,1,0.277993,0.279688
549,a624fb9a,SE-a624fb9a-1439,1,0.257284,0.258234


In [20]:
def remove_outliers(data, threshold=3.5):
    # Calculate z-scores of residuals
    data = data.copy()  # Create a copy to avoid the SettingWithCopyWarning
    data['z_score'] = (data['Residuals'] - data['Residuals'].mean()) / data['Residuals'].std()
    
    # Identify the number of outliers
    num_outliers = (data['z_score'].abs() >= threshold).sum()
    print(f"Number of outliers detected: {num_outliers}")
    
    # Remove rows where the z-score of the residual is greater than the threshold
    cleaned_data = data.loc[(data['z_score'].abs() < threshold)].drop(columns=['z_score'])
    
    # Print the number of rows before and after
    print(f"Number of rows before outlier removal: {len(data)}")
    print(f"Number of rows after outlier removal: {len(cleaned_data)}")
    
    return cleaned_data

# Apply to flagged cases
for index, row in flagged_df.iterrows():
    se_number = row['SE_Number']
    lactation_number = row['LactationNumber']
    
    # Select the cow data for the specific SE_Number and LactationNumber
    cow_data = data_cleaned.loc[(data_cleaned['SE_Number'] == se_number) & 
                                (data_cleaned['LactationNumber'] == lactation_number)]
    
    # Remove outliers
    cow_data_trimmed = remove_outliers(cow_data, threshold=3.5)
    
    # Recalculate the residuals and update the dataset
    cow_data_trimmed['Residuals'] = cow_data_trimmed['DailyYield'] - cow_data_trimmed['ExpectedYield']
    
    # Remove the old data for this cow from data_cleaned
    data_cleaned = data_cleaned.loc[~((data_cleaned['SE_Number'] == se_number) & 
                                      (data_cleaned['LactationNumber'] == lactation_number))]
    
    # Append the cleaned data back to data_cleaned
    data_cleaned = pd.concat([data_cleaned, cow_data_trimmed], ignore_index=True)

Number of outliers detected: 4
Number of rows before outlier removal: 217
Number of rows after outlier removal: 213
Number of outliers detected: 0
Number of rows before outlier removal: 174
Number of rows after outlier removal: 174
Number of outliers detected: 5
Number of rows before outlier removal: 273
Number of rows after outlier removal: 268
Number of outliers detected: 2
Number of rows before outlier removal: 181
Number of rows after outlier removal: 179
Number of outliers detected: 8
Number of rows before outlier removal: 502
Number of rows after outlier removal: 494
Number of outliers detected: 7
Number of rows before outlier removal: 421
Number of rows after outlier removal: 414
Number of outliers detected: 7
Number of rows before outlier removal: 359
Number of rows after outlier removal: 352
Number of outliers detected: 6
Number of rows before outlier removal: 484
Number of rows after outlier removal: 478
Number of outliers detected: 6
Number of rows before outlier removal: 29

In [21]:
data_cleaned

Unnamed: 0,SE_Number,FarmName_Pseudo,Date,DailyYield,PreviousDailyYield,DailyYieldChange,HW,MeanTemperature,MeanTHI_adj,DaysInMilk,...,Age,BreedName,LactationNumber,ExpectedYield,NormalizedDailyYield,NormalizedDailyYieldChange,HeatLoad,CumulativeHeatLoad,HeatStress,Residuals
0,SE-064c0cec-1189,a624fb9a,2022-05-28,15.22,,,0,9.912500,50.478673,3,...,3242,02 SLB,8,41.792667,0.364179,,-10.521327,0.000000,0,-26.572667
1,SE-064c0cec-1189,a624fb9a,2022-05-29,18.96,15.22,3.74,0,10.066667,53.841648,4,...,3243,02 SLB,8,41.712445,0.454541,0.089661,-7.158352,0.000000,0,-22.752445
2,SE-064c0cec-1189,a624fb9a,2022-05-30,22.64,18.96,3.68,0,10.466667,52.935959,5,...,3244,02 SLB,8,41.632223,0.54381,0.088393,-8.064041,0.000000,0,-18.992223
3,SE-064c0cec-1189,a624fb9a,2022-05-31,26.49,22.64,3.85,0,11.183333,52.872112,6,...,3245,02 SLB,8,41.552001,0.637514,0.092655,-8.127888,0.000000,0,-15.062001
4,SE-064c0cec-1189,a624fb9a,2022-06-01,33.61,26.49,7.12,0,12.704167,56.056547,7,...,3246,02 SLB,8,41.471778,0.810431,0.171683,-4.943453,0.000000,0,-7.861778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587159,SE-a624fb9a-1508,a624fb9a,2024-06-22,21.16,18.18,2.98,0,16.537500,62.915525,273,...,1055,01 SRB,1,23.118573,0.915281,0.128901,1.915525,4.541283,1,-1.958573
587160,SE-a624fb9a-1508,a624fb9a,2024-06-25,12.47,21.16,-8.69,0,21.387500,68.473036,276,...,1058,01 SRB,1,22.392523,0.556882,-0.388076,7.473036,12.014319,1,-9.922523
587161,SE-a624fb9a-1508,a624fb9a,2024-08-16,15.37,12.47,2.90,0,18.200000,63.953433,328,...,1110,01 SRB,1,21.653516,0.709815,0.133927,2.953433,14.967752,1,-6.283516
587162,SE-a624fb9a-1508,a624fb9a,2024-08-17,23.34,15.37,7.97,0,14.529167,58.763362,329,...,1111,01 SRB,1,19.023785,1.226885,0.418949,-2.236638,10.494476,1,4.316215


In [22]:
# Define the thresholds
mean_residual_threshold = 0.075
std_residual_threshold = 7.5
acf_threshold = 0.25
pacf_threshold = 0.25

# List to collect flagged combinations
flagged_combinations = []

for farm_name, farm_group in data_cleaned.groupby('FarmName_Pseudo'):
    for (se_number, lactation_number), cow_group in farm_group.groupby(['SE_Number', 'LactationNumber']):
        residuals = cow_group['Residuals'].dropna()
        
        if len(residuals) > 1:  # Ensure there are residuals to analyze
            acf_values = acf(residuals, nlags=30, fft=False)
            pacf_values = pacf(residuals, nlags=min(30, len(residuals)//2))

            # Check against thresholds
            if (abs(acf_values[1]) > acf_threshold or 
                abs(pacf_values[1]) > pacf_threshold):
                
                # Collect the combination if it exceeds any threshold
                flagged_combinations.append({
                    'Farm': farm_name,
                    'SE_Number': se_number,
                    'LactationNumber': lactation_number,
                    'ACF[1]': acf_values[1],
                    'PACF[1]': pacf_values[1]
                })

# Convert to a DataFrame for easier inspection
flagged_df = pd.DataFrame(flagged_combinations)
flagged_df

  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()
  xo = x - x.mean()
  x -= x.mean()


Unnamed: 0,Farm,SE_Number,LactationNumber,ACF[1],PACF[1]
0,5c06d92d,SE-5c06d92d-2246,6,0.300373,0.302109
1,5c06d92d,SE-5c06d92d-2797,5,0.389932,0.392532
2,5c06d92d,SE-5c06d92d-2990,3,0.289774,0.291489
3,5c06d92d,SE-5c06d92d-3223,1,0.265669,0.266707
4,5c06d92d,SE-5c06d92d-3279,2,0.262929,0.264373
5,5c06d92d,SE-5c06d92d-3281,2,0.345996,0.348043
6,5c06d92d,SE-5c06d92d-3297,2,0.26434,0.265564
7,5c06d92d,SE-5c06d92d-3411,2,0.293228,0.295158
8,5c06d92d,SE-5c06d92d-3584,1,0.303395,0.305169
9,752efd72,SE-752efd72-0112,3,0.249157,0.250162


In [23]:
# Reorder columns
new_order = [
    "Date", "FarmName_Pseudo", "SE_Number", "Age", "BreedName", "LactationNumber", "DaysInMilk",'YearSeason', "DailyYield", "PreviousDailyYield", 
    "DailyYieldChange", "ExpectedYield", "NormalizedDailyYield", 
    "NormalizedDailyYieldChange", "Residuals", "HeatStress", "Temp15Threshold", "HW", 
    "cum_HW", "MeanTemperature", "MeanTHI_adj", "HeatLoad", "CumulativeHeatLoad"
]
data_cleaned = data_cleaned[new_order]
data_cleaned

Unnamed: 0,Date,FarmName_Pseudo,SE_Number,Age,BreedName,LactationNumber,DaysInMilk,YearSeason,DailyYield,PreviousDailyYield,...,NormalizedDailyYieldChange,Residuals,HeatStress,Temp15Threshold,HW,cum_HW,MeanTemperature,MeanTHI_adj,HeatLoad,CumulativeHeatLoad
0,2022-05-28,a624fb9a,SE-064c0cec-1189,3242,02 SLB,8,3,2022-2,15.22,,...,,-26.572667,0,0,0,0,9.912500,50.478673,-10.521327,0.000000
1,2022-05-29,a624fb9a,SE-064c0cec-1189,3243,02 SLB,8,4,2022-2,18.96,15.22,...,0.089661,-22.752445,0,0,0,0,10.066667,53.841648,-7.158352,0.000000
2,2022-05-30,a624fb9a,SE-064c0cec-1189,3244,02 SLB,8,5,2022-2,22.64,18.96,...,0.088393,-18.992223,0,1,0,0,10.466667,52.935959,-8.064041,0.000000
3,2022-05-31,a624fb9a,SE-064c0cec-1189,3245,02 SLB,8,6,2022-2,26.49,22.64,...,0.092655,-15.062001,0,0,0,0,11.183333,52.872112,-8.127888,0.000000
4,2022-06-01,a624fb9a,SE-064c0cec-1189,3246,02 SLB,8,7,2022-3,33.61,26.49,...,0.171683,-7.861778,0,1,0,0,12.704167,56.056547,-4.943453,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587159,2024-06-22,a624fb9a,SE-a624fb9a-1508,1055,01 SRB,1,273,2024-3,21.16,18.18,...,0.128901,-1.958573,1,1,0,0,16.537500,62.915525,1.915525,4.541283
587160,2024-06-25,a624fb9a,SE-a624fb9a-1508,1058,01 SRB,1,276,2024-3,12.47,21.16,...,-0.388076,-9.922523,1,1,0,0,21.387500,68.473036,7.473036,12.014319
587161,2024-08-16,a624fb9a,SE-a624fb9a-1508,1110,01 SRB,1,328,2024-3,15.37,12.47,...,0.133927,-6.283516,1,1,0,0,18.200000,63.953433,2.953433,14.967752
587162,2024-08-17,a624fb9a,SE-a624fb9a-1508,1111,01 SRB,1,329,2024-3,23.34,15.37,...,0.418949,4.316215,1,1,0,0,14.529167,58.763362,-2.236638,10.494476


In [24]:
# Check if NormalizedDailyYield is centered around 1 for each unique farm
print("Mean of NormalizedDailyYield:", data_cleaned.groupby('FarmName_Pseudo')['NormalizedDailyYield'].mean())
print("Standard Deviation of NormalizedDailyYield:", data_cleaned.groupby('FarmName_Pseudo')['NormalizedDailyYield'].std())

Mean of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.988489
752efd72    0.992766
a624fb9a    0.974249
f454e660     0.93193
Name: NormalizedDailyYield, dtype: Float64
Standard Deviation of NormalizedDailyYield: FarmName_Pseudo
5c06d92d    0.129999
752efd72    0.104875
a624fb9a    0.184483
f454e660    0.246121
Name: NormalizedDailyYield, dtype: Float64


In [25]:
# Count the number of HeatStress occurrences in each farm
heat_stress_counts = data_cleaned.groupby('FarmName_Pseudo')['HeatStress'].sum()
heat_stress_counts

FarmName_Pseudo
5c06d92d    62191
752efd72    46020
a624fb9a    19048
f454e660    21005
Name: HeatStress, dtype: int64

In [26]:
# Save the reordered DataFrame to a CSV file
data_cleaned.to_csv('../Data/MergedData/HeatApproachYieldDataTestQuantile.csv', index=False)