In [None]:
import logging
# set seed
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
import utils as ut
import experiment as exp
from evaluation import *
from sklearn.metrics import mean_squared_error
from torch.utils.tensorboard import SummaryWriter
import torch
import random
#define fixed_hyperparams and create a config gen
from configurations import RandomConfigGen, Configuration
from torch import nn
from deep_net import RandomNet
from experiment import run_experiment
import regex as re
from pathlib import *
import experiment as ex
from sk_models import StandardScaler

seed = 1
torch.manual_seed(seed)
random.seed(seed + 1)
np.random.seed(seed + 2)
random_state = np.random.RandomState(seed)
import gc
torch.cuda.empty_cache()
gc.collect()

print(f"GPU detected is {torch.cuda.get_device_name(0)}")

In [None]:
file_name = "mango_684_990.csv" #"mango_684_990.csv" #"mango_729_975.csv" #fitlered=513-1050
id_cols =['Set','Season','Region','Date','Type','Cultivar','Pop','Temp','FruitID']#
output_cols = ['DM']
data_path = Path('D:/workspace/lazydeep/data/soil_data/')
log_path = Path("D:/workspace/lazydeep/experiments/mango_cnn") #1.01/")
if not log_path.exists():
    log_path.mkdir()

data_file = data_path / file_name
log_dir = log_path / re.sub(r'\.(?=csv$)[^.]+$', '',file_name)
if not log_dir.exists():
    log_dir.mkdir()
print(f"Output directory is {log_dir}")

In [None]:
data = pd.read_csv(data_file)
data = data.sample(frac=1)
data = ut.sample_data(data,random_state)
nrow, ncol = data.shape
n_features = ncol - 1-len(id_cols)
dataset = TabularDataset(data,id_cols = id_cols, cat_cols=None, output_cols=output_cols, ignore_cols= None)
print(f"Dataset shape is {data.shape}")

In [None]:
# set logging, in this case the root logger
ut.setup_logger(logger_name="",file_name=log_dir/"log.txt")
ut.setup_logger(logger_name="summary",file_name=log_dir/"summary.txt")
tb = SummaryWriter(log_dir/"tb")
summary_logger = logging.getLogger("summary")

eval_ = MangoesSplitter(preprocessing=preprocessing,tensorboard=None,time=True,random_state=random_state)

In [None]:
#take our splits on mangoes 
train_ind, val_ind, test_ind = data.split_by_col(col = 'Set',train_key="Cal",val_key='Tuning',test_key='Val Ext')
train_data, val_data, test_data = data.split(train_ind, val_ind, test_ind, preprocessing=self.preprocessing)

In [None]:
def train_batch(model,opt,X,y,loss_fun = torch.nn.MSELoss()):
    preds = model(X)
    loss = loss_fun(preds,y)
    
    opt.zero_grad()
    loss.backward()
    opt.step()
    
    return loss
    
def train_loop(model,opt,train_dataset,val_dataset,n_epochs=100,bs=32):
    #setup
    train_loader = DataLoader(train_data, batch_size=bs, shuffle=True)
    n_train = len(train_loader.dataset)
    n_batches_train = len(train_loader)
    
    train_scores_batch = []
    train_scores_epoch = []
    val_scores_epoch = []
    
    for epoch in range(n_epochs):
        model.train()
        epoch_total = 0.0
        
        #train a batch and average the batch scores 
        for batch_ind, (X_batch, y_batch) in enumerate(train_loader):
            X_batch = X_batch.to(self.device).float()
            y_batch = y_batch.to(self.device).float()
            
            batch_loss = train_batch(model,opt,X_batch,y_batch)
            train_scores_batch.append(epoch,batch_ind,batch_loss)
            epoch_total += train_scores
        train_scores_epoch.append(epoch_total_train/n_batches
        
        #validate
        val_score = test_loop(model,val_dataset)                   
        val_scores_epoch.append(val_score)
        
          
    return train_scores_epoch,val_scores_epoch

def test_loop(model,dataset):
    #setup
    model.eval()
    
    test_loader = DataLoader(train_data, batch_size=None, shuffle=False)
    test_record = 0.0
    
    n_test = len(test_loader.dataset)
    n_batches = len(test_loader)
        
    for batch_ind, (X_batch, y_batch) in enumerate(test_loader):
        X_batch = X_batch.to(self.device).float()
        y_batch = y_batch.to(self.device).float()

        batch_loss = train_batch(model,opt,X_batch,y_batch)
        test_record += batch_loss
            
    return test_record/n_batches
    

In [None]:
class MangoCNN(nn.Module):
    
    def __init__(self,input_size=256,deep_outputs=[36,18,12]):
        super(nn.Module, self).__init__(deep_layers)
        
        self.deep_outputs=deep_outputs
        self.input_size=input_size
        
        self.conv = None 
        self.deep_block = None 
        self.head = None
             
        
    def setup():
        self.conv = torch.nn.Conv1d(in_channels=1, out_channels=1, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
        
        deep_layers = []
        last_size = 256
        for i in deep_outputs:
            lin = nn.Linear(last_size,i)
            val = nn.init.kaiming_uniform_(lin, mode='fan_in', nonlinearity='relu')
            print(val)            
            act = nn.ELU()                 
                            
            deep_layers.append(lin)
            deep_layers.append(act)
                            
            last_size=i
        self.deep_block = nn.Sequential(deep_layers)
        self.head = nn.Linear(last_size,1)
        
        
    def forward(self, X, *args, **kargs):
        X_t = self.conv(X)
        X_t = self.deep-block(X_t)
        y_hat = self.head(X_t)
        return y_hat
    

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
model = nn.Linear()

In [None]:
#elu activations
#heNormal initis 
#adam optimiser, lr = 0.01*bs/256
#l2 reg on loss function
#lr decrease on lateu by a factor of 2 after 25 epochs

In [None]:
def savitsky_golay(X,kernel_size=3,polynomial = 2, derivative=0):
    from scipy.signal import savgol_filter
    X = savgol_filter(X,kernel_size,polynomialr,derivative=0)
    return X

def norm(X):
    X = np.linalg.norm(X,axis=1)
    return X

def snv(X):
    X = np.linalg.norm(X,axis=0)
    return X
    
def msc(X):
    #take average
    mean_spectra = np.mean(X,axis=1)
    pass