# Import Libraries

In [None]:
from fastai.vision.all import *
import fastai
from fastai.tabular.all import *
from fastai.data.load import _FakeLoader, _loaders
import torch
import torch.nn.functional as F
from ipywidgets import IntProgress
from glob import glob

import random
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import numpy as np
import os

import fastcore

In [None]:
# Custom functions
from msi_utils_Multimodal import *
from fold_utils_Multimodal import * 
from multimodal_utils import *
from multimodal_model import *


# Import Dataset and Processing

In [None]:
##K-Fold Validation Dataset
ff_GB_results_val = pd.read_csv('/path/kfold_Predictions.csv')


In [None]:
#Code below to split ff_GB_results_val df based on the different folds.

# Assuming your dataframe is named ff_GB_results_val and the first column is named 'Unnamed: 0'

# Find the indices where the value changes from 0 to a non-zero value
indices = ff_GB_results_val.index[(ff_GB_results_val['Unnamed: 0'] == 0) & (ff_GB_results_val['Unnamed: 0'].shift(-1) != 0)].tolist()

# Add the last index to the list to ensure all rows are captured
indices.append(ff_GB_results_val.index[-1] + 1)  # Add 1 to include the last row

# Create a list to store dataframes
dfs = []

# Iterate over indices and split the dataframe
for i in range(len(indices) - 1):
    start_idx = indices[i]
    end_idx = indices[i + 1]
    temp_df = ff_GB_results_val.iloc[start_idx:end_idx]
    dfs.append(temp_df)

# Now, dfs list contains dataframes separated by the condition (value changes from 0 to a non-zero value)


In [None]:
#Hold out set File Locations##
ff_GB_results = pd.read_csv('/path/holdout_predictions.csv')


In [None]:
#Dataset containing all yields in training/val/holdout.
All_Dataset = pd.read_csv('/path/Train_Val_Holdout.csv')


In [None]:
df_ymin, df_ymax = min(All_Dataset['Yield']), max(All_Dataset['Yield'])

print("Min target_yield:", df_ymin)
print("Max target_yield:", df_ymax)


# Individal Module Evaluation

Mixed Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Calculate RMSE_mixed
rmse = np.sqrt(mean_squared_error(ff_GB_results['Yield'], ff_GB_results['mixed_pred']))

# Calculate Percentage RMSE_mixed
percentage_rmse = ((rmse / (df_ymax - df_ymin))*100)

# Calculate R-squared_mixed
r_squared = r2_score(ff_GB_results['Yield'], ff_GB_results['mixed_pred'])

print(f"RMSE_mixed: {rmse}")
print(f"Percentage RMSE_mixed: {percentage_rmse}%")
print(f"R-squared_mixed: {r_squared}")

Image Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Calculate RMSE_mixed
rmse = np.sqrt(mean_squared_error(ff_GB_results['Yield'], ff_GB_results['msi_pred']))

# Calculate Percentage RMSE_mixed
percentage_rmse = ((rmse / (df_ymax - df_ymin))*100)

# Calculate R-squared_mixed
r_squared = r2_score(ff_GB_results['Yield'], ff_GB_results['msi_pred'])

print(f"RMSE_Image: {rmse}")
print(f"Percentage RMSE_Image: {percentage_rmse}%")
print(f"R-squared_Image: {r_squared}")

In [None]:
Tabular Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Calculate RMSE_mixed
rmse = np.sqrt(mean_squared_error(ff_GB_results['Yield'], ff_GB_results['tab_pred']))

# Calculate Percentage RMSE_mixed
percentage_rmse = ((rmse / (df_ymax - df_ymin))*100)

# Calculate R-squared_mixed
r_squared = r2_score(ff_GB_results['Yield'], ff_GB_results['tab_pred'])

print(f"RMSE_Tabular: {rmse}")
print(f"Percentage RMSE_Tabular: {percentage_rmse}%")
print(f"R-squared_Tabular: {r_squared}")

Weighted Predictions

In [None]:
import pandas as pd

# Assuming you have the DataFrame ff_GB_results with columns mixed_pred, msi_pred, and tab_pred
# and weights Weight_mixed, Weight_msi, and Weight_tab defined

# Define the weights
Weight_mixed = 0.24
Weight_msi = 0.34
Weight_tab = 0.42

# Scale the predictions
ff_GB_results['Scaled_mixed_pred'] = ff_GB_results['mixed_pred'] * Weight_mixed
ff_GB_results['Scaled_msi_pred'] = ff_GB_results['msi_pred'] * Weight_msi
ff_GB_results['Scaled_tab_pred'] = ff_GB_results['tab_pred'] * Weight_tab

# Combine the scaled predictions
ff_GB_results['Final_Scaled_Predictions'] = (
    ff_GB_results['Scaled_mixed_pred'] +
    ff_GB_results['Scaled_msi_pred'] +
    ff_GB_results['Scaled_tab_pred']
)

# Display the DataFrame with scaled predictions
print(ff_GB_results)


In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Calculate RMSE_mixed
rmse = np.sqrt(mean_squared_error(ff_GB_results['Yield'], ff_GB_results['Final_Scaled_Predictions']))

# Calculate Percentage RMSE_mixed
percentage_rmse = ((rmse / (df_ymax - df_ymin))*100)

# Calculate R-squared_mixed
r_squared = r2_score(ff_GB_results['Yield'], ff_GB_results['Final_Scaled_Predictions'])

print(f"RMSE_Weighted: {rmse}")
print(f"Percentage RMSE_Weighted: {percentage_rmse}%")
print(f"R-squared_Weighted: {r_squared}")

# Multimodal K-Fold Evaluation

In [None]:
import pandas as pd

# Define the weights
Weight_mixed = 0.24
Weight_msi = 0.34
Weight_tab = 0.42

# Define a list to store the modified dataframes
scaled_dfs = []

# Loop through each dataframe
for df in dfs:
    # Create a copy of the dataframe to avoid SettingWithCopyWarning
    df_copy = df.copy()

    # Scale the predictions
    df_copy['Scaled_mixed_pred'] = df_copy['mixed_pred'] * Weight_mixed
    df_copy['Scaled_msi_pred'] = df_copy['msi_pred'] * Weight_msi
    df_copy['Scaled_tab_pred'] = df_copy['tab_pred'] * Weight_tab

    # Combine the scaled predictions
    df_copy['Final_Scaled_Predictions'] = (
        df_copy['Scaled_mixed_pred'] +
        df_copy['Scaled_msi_pred'] +
        df_copy['Scaled_tab_pred']
    )
    
    # Append the modified dataframe to the list
    scaled_dfs.append(df_copy)

    # Display the modified DataFrame
    print(df_copy)

# Now, scaled_dfs contains the modified dataframes


Mixed Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Define empty lists to store metrics for each dataframe
rmse_list = []
percentage_rmse_list = []
r_squared_list = []

# Loop through each dataframe
for df in dfs:
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(df['target_yield'], df['mixed_pred']))
    # Calculate Percentage RMSE
    percentage_rmse = ((rmse / (df_ymax - df_ymin)) * 100)
    # Calculate R-squared
    r_squared = r2_score(df['target_yield'], df['mixed_pred'])
    # Append metrics to lists
    rmse_list.append(rmse)
    percentage_rmse_list.append(percentage_rmse)
    r_squared_list.append(r_squared)

# Calculate average and standard deviation of metrics
avg_rmse = np.mean(rmse_list)
std_rmse = np.std(rmse_list)
avg_percentage_rmse = np.mean(percentage_rmse_list)
std_percentage_rmse = np.std(percentage_rmse_list)
avg_r_squared = np.mean(r_squared_list)
std_r_squared = np.std(r_squared_list)

# Print results
print(f"Average RMSE_mixed: {avg_rmse}, Standard Deviation RMSE_mixed: {std_rmse}")
print(f"Average Percentage RMSE_mixed: {avg_percentage_rmse}%, Standard Deviation Percentage RMSE_mixed: {std_percentage_rmse}%")
print(f"Average R-squared_mixed: {avg_r_squared}, Standard Deviation R-squared_mixed: {std_r_squared}")


Image Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Define empty lists to store metrics for each dataframe
rmse_list = []
percentage_rmse_list = []
r_squared_list = []

# Loop through each dataframe
for df in dfs:
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(df['target_yield'], df['msi_pred']))
    # Calculate Percentage RMSE
    percentage_rmse = ((rmse / (df_ymax - df_ymin)) * 100)
    # Calculate R-squared
    r_squared = r2_score(df['target_yield'], df['msi_pred'])
    # Append metrics to lists
    rmse_list.append(rmse)
    percentage_rmse_list.append(percentage_rmse)
    r_squared_list.append(r_squared)

# Calculate average and standard deviation of metrics
avg_rmse = np.mean(rmse_list)
std_rmse = np.std(rmse_list)
avg_percentage_rmse = np.mean(percentage_rmse_list)
std_percentage_rmse = np.std(percentage_rmse_list)
avg_r_squared = np.mean(r_squared_list)
std_r_squared = np.std(r_squared_list)

# Print results
print(f"Average RMSE_Image: {avg_rmse}, Standard Deviation RMSE_mixed: {std_rmse}")
print(f"Average Percentage RMSE_Image: {avg_percentage_rmse}%, Standard Deviation Percentage RMSE_mixed: {std_percentage_rmse}%")
print(f"Average R-squared_Image: {avg_r_squared}, Standard Deviation R-squared_mixed: {std_r_squared}")


Tabular Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Define empty lists to store metrics for each dataframe
rmse_list = []
percentage_rmse_list = []
r_squared_list = []

# Loop through each dataframe
for df in dfs:
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(df['target_yield'], df['tab_pred']))
    # Calculate Percentage RMSE
    percentage_rmse = ((rmse / (df_ymax - df_ymin)) * 100)
    # Calculate R-squared
    r_squared = r2_score(df['target_yield'], df['tab_pred'])
    # Append metrics to lists
    rmse_list.append(rmse)
    percentage_rmse_list.append(percentage_rmse)
    r_squared_list.append(r_squared)

# Calculate average and standard deviation of metrics
avg_rmse = np.mean(rmse_list)
std_rmse = np.std(rmse_list)
avg_percentage_rmse = np.mean(percentage_rmse_list)
std_percentage_rmse = np.std(percentage_rmse_list)
avg_r_squared = np.mean(r_squared_list)
std_r_squared = np.std(r_squared_list)

# Print results
print(f"Average RMSE_Tab: {avg_rmse}, Standard Deviation RMSE_mixed: {std_rmse}")
print(f"Average Percentage RMSE_Tab: {avg_percentage_rmse}%, Standard Deviation Percentage RMSE_mixed: {std_percentage_rmse}%")
print(f"Average R-squared_Tab: {avg_r_squared}, Standard Deviation R-squared_mixed: {std_r_squared}")


Weighted Predictions

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Define empty lists to store metrics for each dataframe
rmse_list = []
percentage_rmse_list = []
r_squared_list = []

# Loop through each dataframe
for df in scaled_dfs:
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(df['target_yield'], df['Final_Scaled_Predictions']))
    # Calculate Percentage RMSE
    percentage_rmse = ((rmse / (df_ymax - df_ymin)) * 100)
    # Calculate R-squared
    r_squared = r2_score(df['target_yield'], df['Final_Scaled_Predictions'])
    # Append metrics to lists
    rmse_list.append(rmse)
    percentage_rmse_list.append(percentage_rmse)
    r_squared_list.append(r_squared)

# Calculate average and standard deviation of metrics
avg_rmse = np.mean(rmse_list)
std_rmse = np.std(rmse_list)
avg_percentage_rmse = np.mean(percentage_rmse_list)
std_percentage_rmse = np.std(percentage_rmse_list)
avg_r_squared = np.mean(r_squared_list)
std_r_squared = np.std(r_squared_list)

# Print results
print(f"Average RMSE_Weighted: {avg_rmse}, Standard Deviation RMSE_mixed: {std_rmse}")
print(f"Average Percentage RMSE_Weighted: {avg_percentage_rmse}%, Standard Deviation Percentage RMSE_mixed: {std_percentage_rmse}%")
print(f"Average R-squared_Weighted: {avg_r_squared}, Standard Deviation R-squared_mixed: {std_r_squared}")
