In [None]:
#| hide
from hybridpredictmaize22.GEMdataset import *
from hybridpredictmaize22.GEMlearn import *

%load_ext autoreload
%autoreload 2

In [None]:
#| hide
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from torch.utils.data import DataLoader

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import KNNImputer

from sklearn.preprocessing import StandardScaler
from pathlib import Path

In [None]:
trait_csv='./data/Training_Data/1_Training_Trait_Data_2014_2021.csv'
weather_csv = './data/Training_Data/4_Training_Weather_Data_2014_2021.csv'
snp_folder = './data/snpCompress/PCS_10/'


phenotype = pd.read_csv(trait_csv)
weather = pd.read_csv(weather_csv)
genotype = collect_snps(Path(snp_folder))

print(f'{phenotype.shape[0]} plots with {phenotype.shape[1]} features')
print(f'{weather.shape[0]} daily weather measurements with {weather.shape[1]} features')
print(f'{genotype[0].shape[0]} hybrids')

#clip days per year
weather = clip_weatherdays(weather)
#add year column
weather['Year'] = [x.split('_')[1] for x in weather['Env']] #Store Year in a new column

#removes yield data where no weather data
setYield = set(phenotype['Env'])
setWeather = set(weather['Env'])
only_yield = setYield - setWeather
only_weather = setWeather - setYield
phenotype = phenotype.iloc[[x not in only_yield for x in phenotype['Env']],:]
#removes yield data where no genotype data
setSNP = set(genotype[0])
setYield = set(phenotype['Hybrid'])
only_yield = setYield - setSNP
phenotype = phenotype.iloc[[x not in only_yield for x in phenotype['Hybrid']],:]

#remove rows w/o yields
phenotype = phenotype.loc[np.isnan(phenotype['Yield_Mg_ha'])==False,:]

Weather = weather.reset_index()
Yield = phenotype.reset_index()



136012 plots with 26 features
77431 daily weather measurements with 18 features
4928 hybrids


In [None]:
#Create a GEM dataset
testYear = 2019

gem = GemDataset(
W=WT(Weather,testYear=testYear),
Y=ST(Yield,testYear=testYear),
G=Genotype,)

In [None]:
tr_ds = GemDataset(gem.W.Tr, gem.Y.Tr, gem.SNP)
te_ds = GemDataset(gem.W.Te, gem.Y.Te, gem.SNP)
tr_dl = DataLoader(tr_ds, batch_size=4)
te_dl = DataLoader(te_ds, batch_size=4)
dls = DataLoaders(tr_dl,te_dl)

In [None]:
sum(np.isnan(gem.Y.Tr[0]))

array([ 5186, 21277, 22973,  9464,  9434, 17978,     0])

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        if x.dim() == 3:
            x = x.view(x.shape[0], x.shape[1] * x.shape[2])
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

# Create a model with input_size = 784, hidden_size = 256, and num_classes = 2 (binary classification)
model = MLP(input_size=784, hidden_size=256, num_classes=2)

In [None]:
class Ensemble(nn.Module):
    def __init__(self):
        super(Ensemble, self).__init__()
        self.g_model = MLP(100,50,25)
        self.w_model = MLP(4800,50,25)
        self.hidden = nn.LazyLinear(50)
        self.out = nn.LazyLinear(1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        
        
    def forward(self, x):
        g, w = x
        w = w.view(w.shape[0], w.shape[1], w.shape[2])
        g = self.g_model(g)
        w = self.w_model(w)
        x = torch.concat((g,w),axis=1)
        x = self.hidden(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.out(x)
        return x
e = Ensemble()
#e((learn.batch[1].type(torch.float32),learn.batch[2].type(torch.float32)))



In [None]:
wmlp = MLP(4800, 100,1)

In [None]:
wmlp(w.type(torch.float32))

tensor([[-0.0896],
        [-0.1707],
        [-0.1355],
        [-0.0956]], grad_fn=<AddmmBackward0>)

In [None]:
w.shape

torch.Size([4, 300, 16])

In [None]:
g.shape

torch.Size([4, 100])

In [None]:
import torch.nn.functional as F
import torch.optim as optim
import fastcore.all as fc
from functools import partial

class Learner():
    def __init__(self, model, dls=(0,), loss_func=F.mse_loss, lr=0.1, cbs=None, opt_func=optim.SGD):
        cbs = fc.L(cbs)
        self.cb_ctx = partial(callback_ctx, self.callback)
        fc.store_attr()

    def one_epoch(self, train):
        self.model.train(train)
        self.dl = self.dls.train if train else self.dls.valid
        with self.cb_ctx('epoch'):
            for self.iter,self.batch in enumerate(self.dl):
                with self.cb_ctx('batch'):
                    self.predict()
                    self.get_loss()
                    if self.training:
                        self.backward()
                        self.step()
                        self.zero_grad()
    
    def fit(self, n_epochs=1, train=True, valid=True, cbs=None, lr=None):
        cbs = fc.L(cbs)
        # `add_cb` and `rm_cb` were added in lesson 18
        for cb in cbs: self.cbs.append(cb)
        try:
            self.n_epochs = n_epochs
            self.epochs = range(n_epochs)
            self.opt = self.opt_func(self.model.parameters(), self.lr if lr is None else lr)
            with self.cb_ctx('fit'):
                for self.epoch in self.epochs:
                    if train: self.one_epoch(True)
                    if valid: torch.no_grad()(self.one_epoch)(False)
        finally:
            for cb in cbs: self.cbs.remove(cb)

    def __getattr__(self, name):
        if name in ('predict','get_loss','backward','step','zero_grad'): return partial(self.callback, name)
        raise AttributeError(name)

    def callback(self, method_nm): run_cbs(self.cbs, method_nm, self)
    
    @property
    def training(self): return self.model.training

In [None]:
#| export
class SingleBatchCB(Callback):
    def __init__(self,batches):
        self.batches = batches
        order = 1
    def before_fit(self,learn): self.count = 0
    def after_batch(self, learn):
        self.count += 1
        #print(learn.loss)
        if self.count > self.batches:
            print(f'{self.count} batches')
            raise CancelFitException()

In [None]:
#|export
class DeviceCB(Callback):
    def __init__(self, device=def_device): fc.store_attr()
    def before_fit(self, learn): learn.model.to(self.device)
    def before_batch(self, learn): learn.batch = to_device(learn.batch, device=self.device)

In [None]:
#|export
class TrainCB(Callback):
    def predict(self, learn):
        learn.preds = learn.model([learn.batch[1].type(torch.float32),learn.batch[2].type(torch.float32)])
    def get_loss(self, learn):
        #print(learn.preds.squeeze().shape,learn.batch[0][:,-1].type(torch.float32).shape)
        learn.loss = learn.loss_func(learn.preds,learn.batch[0][:,-1].type(torch.float32))
    def backward(self, learn): learn.loss.backward()
    def step(self, learn): learn.opt.step()
    def zero_grad(self, learn): learn.opt.zero_grad()

cbs = [TrainCB()]
model = Ensemble()
learn = Learner(model,dls, cbs=cbs, lr=.0001)
learn.fit(1)

  learn.loss = learn.loss_func(learn.preds,learn.batch[0][:,-1].type(torch.float32))
