In [1]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
import xgboost as xgb
import optuna
import numpy as np

# Set the Optuna logger to output only WARNING and higher levels
optuna.logging.set_verbosity(optuna.logging.WARNING)

sns.set_theme()
sns.set_context("notebook")
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dtype_dict = {
    'Date': 'str',
    'FarmName_Pseudo': 'str',
    'SE_Number': 'str',
    'Age': 'Int64',
    'BreedName': 'str',
    'DailyYield': 'float',
    'PreviousDailyYield': 'float',
    'DailyYieldChange': 'float',
    'DaysInMilk': 'Int64',
    'YearSeason': 'str',
    'LactationNumber': 'Int64',
    'ExpectedYield': 'float',
    'NormalizedDailyYield': 'float',
    'NormalizedDailyYieldChange': 'float',
    'HeatStress': 'Int64',
    'Temp15Threshold': 'Int64',
    'HW': 'Int64',
    'cum_HW': 'Int64',
    'MeanTemperature': 'float',
    'MeanTHI_adj': 'float',
    'HeatLoad': 'float',
    'CumulativeHeatLoad': 'float',
}

milk_data = pd.read_csv('../Data/MergedData/HeatApproachYieldDataTest.csv', dtype=dtype_dict)
milk_data['Date'] = pd.to_datetime(milk_data['Date'], format='%Y-%m-%d')
milk_data.head(-5)

Unnamed: 0,Date,FarmName_Pseudo,SE_Number,Age,BreedName,LactationNumber,DaysInMilk,YearSeason,DailyYield,PreviousDailyYield,...,NormalizedDailyYieldChange,Residuals,HeatStress,Temp15Threshold,HW,cum_HW,MeanTemperature,MeanTHI_adj,HeatLoad,CumulativeHeatLoad
0,2022-05-28,a624fb9a,SE-064c0cec-1189,3242,02 SLB,8,3,2022-2,15.22,,...,,1.820438,0,0,0,0,9.912500,50.478673,-10.521327,0.000000
1,2022-05-29,a624fb9a,SE-064c0cec-1189,3243,02 SLB,8,4,2022-2,18.96,15.22,...,0.215311,1.589745,0,0,0,0,10.066667,53.841648,-7.158352,0.000000
2,2022-05-30,a624fb9a,SE-064c0cec-1189,3244,02 SLB,8,5,2022-2,22.64,18.96,...,0.177389,1.894598,0,1,0,0,10.466667,52.935959,-8.064041,0.000000
3,2022-05-31,a624fb9a,SE-064c0cec-1189,3245,02 SLB,8,6,2022-2,26.49,22.64,...,0.163049,2.877443,0,0,0,0,11.183333,52.872112,-8.127888,0.000000
4,2022-06-01,a624fb9a,SE-064c0cec-1189,3246,02 SLB,8,7,2022-3,33.61,26.49,...,0.273358,7.563598,0,1,0,0,12.704167,56.056547,-4.943453,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
576066,2024-08-09,f454e660,SE-f454e660-810,1020,99 Korsning/obestämbar ras,1,365,2024-3,37.53,31.36,...,0.168909,1.001508,1,1,0,0,18.650000,62.298109,1.298109,84.512273
576067,2024-08-10,f454e660,SE-f454e660-810,1021,99 Korsning/obestämbar ras,1,365,2024-3,36.48,37.53,...,-0.028745,-0.048492,1,1,0,0,18.308333,54.687773,-6.312227,71.887820
576068,2024-08-11,f454e660,SE-f454e660-810,1022,99 Korsning/obestämbar ras,1,365,2024-3,34.76,36.48,...,-0.047087,-1.768492,1,1,0,0,17.841667,55.974084,-5.025916,61.835988
576069,2024-08-12,f454e660,SE-f454e660-810,1023,99 Korsning/obestämbar ras,1,365,2024-3,38.06,34.76,...,0.090340,1.531508,1,1,0,0,17.516667,60.115007,-0.884993,60.066002


In [3]:
# Initialize an empty DataFrame to store results
results_df = pd.DataFrame(columns=['FarmName_Pseudo', 'FarmHeatStressProduction'])

# Specify the farm ID for analysis
farm_id = 'a624fb9a'

# Filter data for the specific farm
farm_data = milk_data[milk_data['FarmName_Pseudo'] == farm_id]

# Check if NormalizedDailyYield is centered around 1
normalized_mean = farm_data['NormalizedDailyYield'].mean()
normalized_variance = farm_data['NormalizedDailyYield'].var()
print("Mean of NormalizedDailyYield:", normalized_mean)
print("Standard Deviation of NormalizedDailyYield:", farm_data['NormalizedDailyYield'].std())
print("Variance of NormalizedDailyYield:", normalized_variance)

# Define the target variable
target = 'NormalizedDailyYield'

# Split the data into train and validation sets
train_data, val_data = train_test_split(farm_data, test_size=0.3, random_state=42)

# Define the single feature
features = ['HeatStress']

# Function to optimize hyperparameters using Optuna
def objective(trial):
    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 15),
    }

    # Cross-validation to estimate performance
    model = xgb.XGBRegressor(**param)
    scores = cross_val_score(model, train_data[features], train_data[target], cv=5, scoring='neg_mean_squared_error')
    mse = -scores.mean()
    return mse

# Function to fit the model and calculate reduction due to HeatStress
def fit_xgboost_and_calculate_reduction(train_data, val_data, features, baseline_mean):
    print(f"\nSelected features: {features}")

    # Optimize hyperparameters using Optuna
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=100, timeout=600)
    best_params = study.best_params
    print(f"Best parameters: {best_params}")

    # Train final model using the best parameters
    best_model = xgb.XGBRegressor(**best_params)
    best_model.fit(train_data[features], train_data[target])

    # Predict the NormalizedDailyYield under heat stress (HeatStress = 1)
    val_data_heatstress = val_data.copy()
    val_data_heatstress['HeatStress'] = 1
    y_pred_heatstress = best_model.predict(val_data_heatstress[features])
    avg_production_heatstress = y_pred_heatstress.mean()

    # Predict the NormalizedDailyYield without heat stress (HeatStress = 0)
    val_data_no_heatstress = val_data.copy()
    val_data_no_heatstress['HeatStress'] = 0
    y_pred_no_heatstress = best_model.predict(val_data_no_heatstress[features])
    avg_production_no_heatstress = y_pred_no_heatstress.mean()

    # Calculate the reduction due to heat stress
    reduction_due_to_heatstress = avg_production_heatstress - avg_production_no_heatstress

    print(f"Estimated average production during heat stress: {avg_production_heatstress:.4f}")
    print(f"Estimated average production without heat stress: {avg_production_no_heatstress:.4f}")
    print(f"Reduction in milk production during heat stress: {reduction_due_to_heatstress:.4f}")

    return reduction_due_to_heatstress

# Train the model and calculate the reduction
reduction = fit_xgboost_and_calculate_reduction(train_data, val_data, features, normalized_mean)

# Create a new DataFrame for the current farm's result
new_result = pd.DataFrame([{
    'FarmName_Pseudo': farm_id,
    'FarmHeatStressProduction': reduction
}])

# Check if results_df is empty before concatenation
if results_df.empty:
    results_df = new_result
else:
    results_df = pd.concat([results_df, new_result], ignore_index=True)

results_df

Mean of NormalizedDailyYield: 1.0007464812391016
Standard Deviation of NormalizedDailyYield: 0.17820337652716353
Variance of NormalizedDailyYield: 0.03175644340568202

Selected features: ['HeatStress']
Best parameters: {'lambda': 0.034621477616408596, 'alpha': 0.0003778581621813498, 'subsample': 0.5209161179387255, 'colsample_bytree': 0.8726918083283083, 'learning_rate': 0.08538776303366605, 'n_estimators': 172, 'max_depth': 2}
Estimated average production during heat stress: 0.9921
Estimated average production without heat stress: 1.0034
Reduction in milk production during heat stress: -0.0114


Unnamed: 0,FarmName_Pseudo,FarmHeatStressProduction
0,a624fb9a,-0.011362


In [4]:
# Specify the farm ID for analysis
farm_id = '5c06d92d'

# Filter data for the specific farm
farm_data = milk_data[milk_data['FarmName_Pseudo'] == farm_id]

# Check if NormalizedDailyYield is centered around 1
normalized_mean = farm_data['NormalizedDailyYield'].mean()
normalized_variance = farm_data['NormalizedDailyYield'].var()
print("Mean of NormalizedDailyYield:", normalized_mean)
print("Standard Deviation of NormalizedDailyYield:", farm_data['NormalizedDailyYield'].std())
print("Variance of NormalizedDailyYield:", normalized_variance)

# Define the target variable
target = 'NormalizedDailyYield'

# Split the data into train and validation sets
train_data, val_data = train_test_split(farm_data, test_size=0.3, random_state=42)

# Define the single feature
features = ['HeatStress']

# Function to optimize hyperparameters using Optuna
def objective(trial):
    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 15),
    }

    # Cross-validation to estimate performance
    model = xgb.XGBRegressor(**param)
    scores = cross_val_score(model, train_data[features], train_data[target], cv=5, scoring='neg_mean_squared_error')
    mse = -scores.mean()
    return mse

# Function to fit the model and calculate reduction due to HeatStress
def fit_xgboost_and_calculate_reduction(train_data, val_data, features, baseline_mean):
    print(f"\nSelected features: {features}")

    # Optimize hyperparameters using Optuna
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=100, timeout=600)
    best_params = study.best_params
    print(f"Best parameters: {best_params}")

    # Train final model using the best parameters
    best_model = xgb.XGBRegressor(**best_params)
    best_model.fit(train_data[features], train_data[target])

    # Predict the NormalizedDailyYield under heat stress (HeatStress = 1)
    val_data_heatstress = val_data.copy()
    val_data_heatstress['HeatStress'] = 1
    y_pred_heatstress = best_model.predict(val_data_heatstress[features])
    avg_production_heatstress = y_pred_heatstress.mean()

    # Predict the NormalizedDailyYield without heat stress (HeatStress = 0)
    val_data_no_heatstress = val_data.copy()
    val_data_no_heatstress['HeatStress'] = 0
    y_pred_no_heatstress = best_model.predict(val_data_no_heatstress[features])
    avg_production_no_heatstress = y_pred_no_heatstress.mean()

    # Calculate the reduction due to heat stress
    reduction_due_to_heatstress = avg_production_heatstress - avg_production_no_heatstress

    print(f"Estimated average production during heat stress: {avg_production_heatstress:.4f}")
    print(f"Estimated average production without heat stress: {avg_production_no_heatstress:.4f}")
    print(f"Reduction in milk production during heat stress: {reduction_due_to_heatstress:.4f}")

    return reduction_due_to_heatstress

# Train the model and calculate the reduction
reduction = fit_xgboost_and_calculate_reduction(train_data, val_data, features, normalized_mean)

# Create a new DataFrame for the current farm's result
new_result = pd.DataFrame([{
    'FarmName_Pseudo': farm_id,
    'FarmHeatStressProduction': reduction
}])

# Check if results_df is empty before concatenation
if results_df.empty:
    results_df = new_result
else:
    results_df = pd.concat([results_df, new_result], ignore_index=True)

results_df

Mean of NormalizedDailyYield: 1.0013067878458104
Standard Deviation of NormalizedDailyYield: 0.13131819898745473
Variance of NormalizedDailyYield: 0.017244469385308756

Selected features: ['HeatStress']
Best parameters: {'lambda': 0.015423882119563461, 'alpha': 0.0005831465692192785, 'subsample': 0.6857847052473738, 'colsample_bytree': 0.6441870107346002, 'learning_rate': 0.0966514851726896, 'n_estimators': 642, 'max_depth': 10}
Estimated average production during heat stress: 0.9967
Estimated average production without heat stress: 1.0027
Reduction in milk production during heat stress: -0.0060


Unnamed: 0,FarmName_Pseudo,FarmHeatStressProduction
0,a624fb9a,-0.011362
1,5c06d92d,-0.005975


In [5]:
# Specify the farm ID for analysis
farm_id = '752efd72'

# Filter data for the specific farm
farm_data = milk_data[milk_data['FarmName_Pseudo'] == farm_id]

# Check if NormalizedDailyYield is centered around 1
normalized_mean = farm_data['NormalizedDailyYield'].mean()
normalized_variance = farm_data['NormalizedDailyYield'].var()
print("Mean of NormalizedDailyYield:", normalized_mean)
print("Standard Deviation of NormalizedDailyYield:", farm_data['NormalizedDailyYield'].std())
print("Variance of NormalizedDailyYield:", normalized_variance)

# Define the target variable
target = 'NormalizedDailyYield'

# Split the data into train and validation sets
train_data, val_data = train_test_split(farm_data, test_size=0.3, random_state=42)

# Define the single feature
features = ['HeatStress']

# Function to optimize hyperparameters using Optuna
def objective(trial):
    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 15),
    }

    # Cross-validation to estimate performance
    model = xgb.XGBRegressor(**param)
    scores = cross_val_score(model, train_data[features], train_data[target], cv=5, scoring='neg_mean_squared_error')
    mse = -scores.mean()
    return mse

# Function to fit the model and calculate reduction due to HeatStress
def fit_xgboost_and_calculate_reduction(train_data, val_data, features, baseline_mean):
    print(f"\nSelected features: {features}")

    # Optimize hyperparameters using Optuna
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=100, timeout=600)
    best_params = study.best_params
    print(f"Best parameters: {best_params}")

    # Train final model using the best parameters
    best_model = xgb.XGBRegressor(**best_params)
    best_model.fit(train_data[features], train_data[target])

    # Predict the NormalizedDailyYield under heat stress (HeatStress = 1)
    val_data_heatstress = val_data.copy()
    val_data_heatstress['HeatStress'] = 1
    y_pred_heatstress = best_model.predict(val_data_heatstress[features])
    avg_production_heatstress = y_pred_heatstress.mean()

    # Predict the NormalizedDailyYield without heat stress (HeatStress = 0)
    val_data_no_heatstress = val_data.copy()
    val_data_no_heatstress['HeatStress'] = 0
    y_pred_no_heatstress = best_model.predict(val_data_no_heatstress[features])
    avg_production_no_heatstress = y_pred_no_heatstress.mean()

    # Calculate the reduction due to heat stress
    reduction_due_to_heatstress = avg_production_heatstress - avg_production_no_heatstress

    print(f"Estimated average production during heat stress: {avg_production_heatstress:.4f}")
    print(f"Estimated average production without heat stress: {avg_production_no_heatstress:.4f}")
    print(f"Reduction in milk production during heat stress: {reduction_due_to_heatstress:.4f}")

    return reduction_due_to_heatstress

# Train the model and calculate the reduction
reduction = fit_xgboost_and_calculate_reduction(train_data, val_data, features, normalized_mean)

# Create a new DataFrame for the current farm's result
new_result = pd.DataFrame([{
    'FarmName_Pseudo': farm_id,
    'FarmHeatStressProduction': reduction
}])

# Check if results_df is empty before concatenation
if results_df.empty:
    results_df = new_result
else:
    results_df = pd.concat([results_df, new_result], ignore_index=True)

results_df

Mean of NormalizedDailyYield: 1.0015880766895515
Standard Deviation of NormalizedDailyYield: 0.1030289029037514
Variance of NormalizedDailyYield: 0.010614954833550634

Selected features: ['HeatStress']
Best parameters: {'lambda': 0.006260847829823792, 'alpha': 0.00013187927661273085, 'subsample': 0.5191754735316721, 'colsample_bytree': 0.9542751717099657, 'learning_rate': 0.059265766776770014, 'n_estimators': 623, 'max_depth': 6}
Estimated average production during heat stress: 0.9933
Estimated average production without heat stress: 1.0048
Reduction in milk production during heat stress: -0.0115


Unnamed: 0,FarmName_Pseudo,FarmHeatStressProduction
0,a624fb9a,-0.011362
1,5c06d92d,-0.005975
2,752efd72,-0.011452


In [6]:
# Specify the farm ID for analysis
farm_id = 'f454e660'

# Filter data for the specific farm
farm_data = milk_data[milk_data['FarmName_Pseudo'] == farm_id]

# Check if NormalizedDailyYield is centered around 1
normalized_mean = farm_data['NormalizedDailyYield'].mean()
normalized_variance = farm_data['NormalizedDailyYield'].var()
print("Mean of NormalizedDailyYield:", normalized_mean)
print("Standard Deviation of NormalizedDailyYield:", farm_data['NormalizedDailyYield'].std())
print("Variance of NormalizedDailyYield:", normalized_variance)

# Define the target variable
target = 'NormalizedDailyYield'

# Split the data into train and validation sets
train_data, val_data = train_test_split(farm_data, test_size=0.3, random_state=42)

# Define the single feature
features = ['HeatStress']

# Function to optimize hyperparameters using Optuna
def objective(trial):
    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 15),
    }

    # Cross-validation to estimate performance
    model = xgb.XGBRegressor(**param)
    scores = cross_val_score(model, train_data[features], train_data[target], cv=5, scoring='neg_mean_squared_error')
    mse = -scores.mean()
    return mse

# Function to fit the model and calculate reduction due to HeatStress
def fit_xgboost_and_calculate_reduction(train_data, val_data, features, baseline_mean):
    print(f"\nSelected features: {features}")

    # Optimize hyperparameters using Optuna
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=100, timeout=600)
    best_params = study.best_params
    print(f"Best parameters: {best_params}")

    # Train final model using the best parameters
    best_model = xgb.XGBRegressor(**best_params)
    best_model.fit(train_data[features], train_data[target])

    # Predict the NormalizedDailyYield under heat stress (HeatStress = 1)
    val_data_heatstress = val_data.copy()
    val_data_heatstress['HeatStress'] = 1
    y_pred_heatstress = best_model.predict(val_data_heatstress[features])
    avg_production_heatstress = y_pred_heatstress.mean()

    # Predict the NormalizedDailyYield without heat stress (HeatStress = 0)
    val_data_no_heatstress = val_data.copy()
    val_data_no_heatstress['HeatStress'] = 0
    y_pred_no_heatstress = best_model.predict(val_data_no_heatstress[features])
    avg_production_no_heatstress = y_pred_no_heatstress.mean()

    # Calculate the reduction due to heat stress
    reduction_due_to_heatstress = avg_production_heatstress - avg_production_no_heatstress

    print(f"Estimated average production during heat stress: {avg_production_heatstress:.4f}")
    print(f"Estimated average production without heat stress: {avg_production_no_heatstress:.4f}")
    print(f"Reduction in milk production during heat stress: {reduction_due_to_heatstress:.4f}")

    return reduction_due_to_heatstress

# Train the model and calculate the reduction
reduction = fit_xgboost_and_calculate_reduction(train_data, val_data, features, normalized_mean)

# Create a new DataFrame for the current farm's result
new_result = pd.DataFrame([{
    'FarmName_Pseudo': farm_id,
    'FarmHeatStressProduction': reduction
}])

# Check if results_df is empty before concatenation
if results_df.empty:
    results_df = new_result
else:
    results_df = pd.concat([results_df, new_result], ignore_index=True)

results_df

Mean of NormalizedDailyYield: 1.001072448009299
Standard Deviation of NormalizedDailyYield: 0.2353243709243429
Variance of NormalizedDailyYield: 0.05537755955093773

Selected features: ['HeatStress']


[W 2024-09-05 10:16:00,667] Trial 62 failed with parameters: {'lambda': 3.298173019519372e-05, 'alpha': 1.1209367157573246e-05, 'subsample': 0.8013585824119855, 'colsample_bytree': 0.9671582576044206, 'learning_rate': 0.07974410134560793, 'n_estimators': 853, 'max_depth': 15} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/user/anaconda3/envs/GIGACOW/lib/python3.11/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/n0/fphw_xw93vv749r_ntt01qd80000gn/T/ipykernel_29764/137233681.py", line 39, in objective
    scores = cross_val_score(model, train_data[features], train_data[target], cv=5, scoring='neg_mean_squared_error')
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/user/anaconda3/envs/GIGACOW/lib/python3.11/site-packages/sklearn/utils/_param_vali

KeyboardInterrupt: 

In [None]:
# Merge the results_df with milk_data on 'FarmName_Pseudo'
milk_data = milk_data.merge(results_df, on='FarmName_Pseudo', how='left')

# Set 'FarmHeatStressProduction' to NaN where 'HeatStress' is 0
milk_data.loc[milk_data['HeatStress'] == 0, 'FarmHeatStressProduction'] = np.nan
milk_data.head(-5)

In [None]:
# Save the new data to a new CSV file called 'XGBMilkFarmYieldData.csv' in same folder
milk_data.to_csv('../Data/MergedData/XGBHeatFarmYieldData.csv', index=False)

### Variables Explanation for `XGBHeatFarmYieldData.csv`

1. **Date**:
   - Description: The date when the milk yield was recorded.
   - Datatype: `datetime`
   - Format: `YYYY-MM-DD`
   - Example: `2022-01-01`

2. **FarmName_Pseudo**:
   - Description: A pseudo-identifier for the farm where the data was collected.
   - Datatype: `str`
   - Example: `a624fb9a`

3. **SE_Number**:
   - Description: A unique identifier for the cow, which has been formatted to include the farm and the animal number.
   - Datatype: `str`
   - Example: `SE-064c0cec-1189`

4. **Age**:
   - Description: The age of the cow in days.
   - Datatype: `Int64`
   - Example: `3095`

5. **BreedName**:
   - Description: The breed name of the cow.
   - Datatype: `str`
   - Example: `02 SLB`

6. **LactationNumber**:
   - Description: The number assigned to the cow's lactation cycle.
   - Datatype: `Int64`
   - Example: `7`

7. **DaysInMilk**:
   - Description: The number of days the cow has been in milk (lactating) at the time of recording.
   - Datatype: `Int64`
   - Example: `191`

8. **YearSeason**:
   - Description: The seasonal period based on the year and the month range.
   - Datatype: `str`
   - Example: `2022-1`
   - YearSeason parameters in yield datasets:
     - 1: Dec-Feb
     - 2: Mar-May
     - 3: Jun-Aug
     - 4: Sep-Nov

9. **DailyYield**:
   - Description: The total amount of milk produced by the cow in a single day.
   - Datatype: `float`
   - Example: `30.77`

10. **PreviousDailyYield**:
    - Description: The total amount of milk produced by the cow on the previous day.
    - Datatype: `float`
    - Example: `0.0`

11. **DailyYieldChange**:
    - Description: The change in daily milk yield from the previous day.
    - Datatype: `float`
    - Example: `0.0`

12. **ExpectedYield**:
    - Description: The expected amount of milk yield based on certain models or predictions.
    - Datatype: `float`
    - Example: `35.914865`

13. **NormalizedDailyYield**:
    - Description: The daily yield normalized to account for various factors.
    - Datatype: `float`
    - Example: `0.856748`

14. **NormalizedDailyYieldChange**:
    - Description: The change in normalized daily yield from the previous day.
    - Datatype: `float`
    - Example: `0.0`

15. **HeatStress**:
    - Description: A binary variable indicating the presence of heat stress on the cow.
    - Datatype: `Int64`
    - Example: `0`

16. **Temp15Threshold**:
    - Description: A binary variable indicating if the temperature exceeded 15 degrees Celsius on the given day.
    - Datatype: `Int64`
    - Example: `0`

17. **HW**:
    - Description: A binary variable indicating the presence of a heatwave on the day.
    - Datatype: `Int64`
    - Example: `0`

18. **cum_HW**:
    - Description: Cumulative number of heatwave days up to the current date.
    - Datatype: `Int64`
    - Example: `0`

19. **MeanTemperature**:
    - Description: The mean temperature recorded on the day.
    - Datatype: `float`
    - Example: `-3.025`

20. **MeanTHI_adj**:
    - Description: The mean adjusted Temperature-Humidity Index for the day.
    - Datatype: `float`
    - Example: `28.012944`

21. **FarmHeatStressMilkProduction**:
    - Description: The relative change in milk production based on farm and heat stress conditions.
    - Datatype: `float`
    - Example: `0.009435`