In [1]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath('..'))

In [2]:
from scipy.optimize import curve_fit
import numpy as np
import pandas as pd




In [3]:
import src.preprocessing as pre

In [98]:
meta, weekly_summary, mapping_dict = pre.load_tomato(planting_meta_path='../data/planting_meta.json', weekly_summary_path='../data/weekly_summary.csv')

In [5]:
meta.head()

Unnamed: 0,TransplantDate,Year,WeekTransplanted,Ranch,Variety,Class,Type,Ha,WeekTransplanted_sin,WeekTransplanted_cos,ClimateSeries
2013-02-13_Felicity_ZJL_Z18_6_0.39,2013-02-13,2013,7,ZJL,Felicity,CHE,Cherry Rojo,0.3938,0.748511,0.663123,"[[-0.5605881878, -3.1935067896000002, -0.25850..."
2013-02-13_Shiren_ZJL_Z18_6_0.39,2013-02-13,2013,7,ZJL,Shiren,CHE,Cherry Rojo,0.3938,0.748511,0.663123,"[[-0.5605881878, -3.1935067896000002, -0.25850..."
2013-02-15_Amsterdam_ZJL_Z18_2_0.27,2013-02-15,2013,7,ZJL,Amsterdam,BSUF,Uva Roja,0.27,0.748511,0.663123,"[[0.3146673828, -1.2573452111, -0.258501094600..."
2013-02-15_Felicity_ZJL_Z18_5_0.21,2013-02-15,2013,7,ZJL,Felicity,CHE,Cherry Rojo,0.2138,0.748511,0.663123,"[[0.3146673828, -1.2573452111, -0.258501094600..."
2013-02-15_Olivia_ZJL_Z18_2_0.54,2013-02-15,2013,7,ZJL,Olivia,BSUF,Uva Roja,0.54,0.748511,0.663123,"[[0.3146673828, -1.2573452111, -0.258501094600..."


In [99]:
df = weekly_summary.pivot(columns='WeeksAfterTransplant', values='Kilos')

In [100]:
meta = meta[df.sum(axis=1) > 400]
df = df[df.sum(axis=1) > 400]
df = df[df.nunique(axis=1) > 2]

In [101]:
smoothed = df.fillna(0).T.rolling(window=3,min_periods=1).mean()
smoothed = smoothed * df.sum(axis=1) / smoothed.sum()

In [102]:
smoothed = smoothed.T

Fit Stats

In [103]:
def logistic(t, K, r, t0):
    return K / (1 + np.exp(-r * (t - t0)))

In [137]:
results = []
for i, row in smoothed.iterrows():
    y = row.cumsum().to_numpy()
    total_kilos = y[-1]
    x = np.arange(20)
    k0 = total_kilos
    r0 = 0.01
    t0 = 10
    p0 = [k0,r0,t0]
    bounds = (
    [total_kilos * 0.95, 1e-4, 0],   # lower bounds
    [total_kilos * 1.05, 1.0, len(x)]  # upper bounds
)
    try:
        popt,pcov = curve_fit(logistic,x,y,p0=p0,bounds=bounds)
    except:
        print(y)
    eigvals, eigvecs = np.linalg.eigh(pcov)  # safer than cholesky
    delta = 1.96 * (eigvecs @ np.sqrt(np.clip(eigvals, 0, None)) * np.ones(len(popt)))
    results.append([popt,delta])

In [138]:
results

[[array([2.22362034e+04, 8.90759570e-01, 1.55545055e+01]),
  array([ 1.90163637e+02, -2.59112090e-02,  9.85571609e-03])],
 [array([2.36210866e+04, 8.47166308e-01, 1.50690174e+01]),
  array([ 4.18653473e+02, -5.50034851e-02,  1.79134399e-02])],
 [array([7.43777624e+03, 6.72225291e-01, 1.53846411e+01]),
  array([ 6.13143883e+02, -2.38632065e-02,  4.86035645e-01])],
 [array([1.92454500e+04, 6.60420814e-01, 1.58552673e+01]),
  array([ 1.82847645e+03, -1.37102320e-01,  2.41581055e-01])],
 [array([1.09275668e+04, 1.00000000e+00, 1.41320949e+01]),
  array([3.60654341e+02, 2.47261797e-02, 2.08764628e-01])],
 [array([4.19097000e+04, 7.96436633e-01, 1.63761121e+01]),
  array([ 3.38630114e+03, -1.44933106e-01,  1.66202651e-01])],
 [array([4.48205277e+03, 9.33185209e-01, 1.51866030e+01]),
  array([2.78037575e+02, 8.43571354e-04, 3.43526317e-01])],
 [array([3.2886000e+03, 1.0000000e+00, 1.6312435e+01]),
  array([ 2.84952956e+02, -3.54398306e-02,  3.84390267e-01])],
 [array([2.40996000e+04, 7.408017

Construct A Dataset Object

In [30]:
#imports
import torch
from torch.utils.data import Dataset
#class
class HarvestDataset(Dataset):
    def __init__(self, 
                 features,         # (N, 5)
                 ranch_ids,        # (N,)
                 class_ids,        # (N,)
                 type_ids,         # (N,)
                 variety_ids,      # (N,)
                 climate_data,     # (N, 100, 3)
                 Y_kilos = None          # (N, 20)
                ):
    

        # Convert to tensors
        self.features = torch.tensor(features, dtype=torch.float32)
        self.ranch_ids = torch.tensor(ranch_ids, dtype=torch.long)
        self.class_ids = torch.tensor(class_ids, dtype=torch.long)
        self.type_ids = torch.tensor(type_ids, dtype=torch.long)
        self.variety_ids = torch.tensor(variety_ids, dtype=torch.long)
        self.climate_data = torch.tensor(climate_data, dtype=torch.float32)
        self.Y_kilos = torch.tensor(Y_kilos, dtype=torch.float32)

    def __getitem__(self, idx):
        return (
            self.features[idx],
            self.ranch_ids[idx],
            self.class_ids[idx],
            self.type_ids[idx],
            self.variety_ids[idx],
            self.climate_data[idx],
            self.Y_kilos[idx])
    

Pytorch Model