In [99]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler,Normalizer

from tqdm import tqdm

import matplotlib.pyplot as plt

In [100]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [101]:
df = pd.read_csv('train_data.csv',index_col='Unnamed: 0')

In [102]:
df.head()

Unnamed: 0,t2m,NDVI,boundaryLayerHeight,rh,totalPrecipitation,windSpeed,windDirection,SWIR1,NIR,red,...,day_sin,season_cos,season_sin,week_cos,week_sin,month_cos,month_sin,Grasses_avg_daily,Grasses_std_daily,Grasses
0,-0.369523,0.4982,102.408676,88.53255,0.000732,0.649779,-135.5698,7903.0,8511.0,2964.0,...,0.017213,6.123234000000001e-17,1.0,1.0,-2.449294e-16,0.866025,0.5,0.0,0.0,0.0
1,0.845408,0.4982,167.71355,94.78888,0.001186,1.490278,-153.75153,7482.0,6773.0,3226.0,...,0.017213,6.123234000000001e-17,1.0,1.0,-2.449294e-16,0.866025,0.5,0.0,0.0,0.0
2,-3.842715,0.4982,62.99692,84.02357,0.000781,0.8237,162.49753,6292.0,7443.0,2565.0,...,0.017213,6.123234000000001e-17,1.0,1.0,-2.449294e-16,0.866025,0.5,0.0,0.0,0.0
3,0.307328,0.4982,165.39359,96.01055,0.000915,1.141481,-117.63472,9590.0,10635.0,3852.0,...,0.017213,6.123234000000001e-17,1.0,1.0,-2.449294e-16,0.866025,0.5,0.0,0.0,0.0
4,-11.214022,0.4982,27.41276,83.98948,0.00088,0.661277,156.37183,5678.0,8548.0,1672.0,...,0.017213,6.123234000000001e-17,1.0,1.0,-2.449294e-16,0.866025,0.5,0.0,0.0,0.0


In [103]:
class MyDataset(Dataset):
    def __init__(self,dataset,transform=None):
        # data loading
        X = dataset.iloc[:,:-1].values.astype('float32')
        self.y = dataset.iloc[:,-1].values.reshape(-1,1).astype('float32')
        self.n_samples = dataset.shape[0]
        
        # scaling the features
        scaler = StandardScaler()
        self.X_norm = scaler.fit_transform(X)
        
        self.transform = transform

    def __len__(self):
        return self.n_samples
        
    def __getitem__(self,index):
        sample = self.X_norm[index],self.y[index]
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample

In [104]:
class ToTensor:
    def __call__(self,sample):
        inputs,targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)

In [105]:
composed = transforms.Compose([ToTensor()])

In [106]:
dataset = MyDataset(df,transform=composed)

In [107]:
feature, label = dataset[0]
feature.shape, label.shape

(torch.Size([38]), torch.Size([1]))

In [136]:
# Constants
input_size = 38
hidden_size = 4
output_size = 1
learning_rate = 0.00001
n_epochs = 50
batch_size = 100

In [137]:
train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True)

In [138]:
# Model
class PollenModel(nn.Module):
    def __init__(self,input_dim,hidden_size,output_dim):
        super(PollenModel,self).__init__()
        
        self.pollen = nn.Sequential(
                                    nn.Linear(input_dim,hidden_size),
                                    nn.ReLU(inplace=True),
                                    nn.Linear(hidden_size,output_dim)
                                    )
    def forward(self,x):
        return self.pollen(x)

In [139]:
model = PollenModel(input_size,hidden_size,output_size)

In [140]:
# Loss & Optemizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [141]:
n_total_steps = len(train_loader)

for epoch in range(n_epochs):
    for i, (features,value) in enumerate(tqdm(train_loader)):
        
        features = features.reshape(-1,38).to(device)
        value = value.to(device)
        
        model = model.to(device)
        criterion = criterion.to(device)
    
        # Predict
        y_pred = model(features)
        # Compare
        loss = torch.sqrt(criterion(y_pred,value))
        # Learn
        dw = loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if (i+1) % 1023 ==0:
            print(f'epoch {epoch+1} / {n_epochs}, loss = {loss.item():.4f}')

100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 449.86it/s]


epoch 1 / 50, loss = 40.9829


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 453.11it/s]


epoch 2 / 50, loss = 39.2096


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.31it/s]


epoch 3 / 50, loss = 54.2486


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 451.70it/s]


epoch 4 / 50, loss = 17.8126


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 459.12it/s]


epoch 5 / 50, loss = 24.8899


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.30it/s]


epoch 6 / 50, loss = 11.7124


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 446.28it/s]


epoch 7 / 50, loss = 111.7205


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 448.66it/s]


epoch 8 / 50, loss = 23.3251


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 450.35it/s]


epoch 9 / 50, loss = 41.8705


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 441.83it/s]


epoch 10 / 50, loss = 137.9099


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 443.67it/s]


epoch 11 / 50, loss = 41.0906


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 444.22it/s]


epoch 12 / 50, loss = 16.4881


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 443.19it/s]


epoch 13 / 50, loss = 19.7729


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 440.33it/s]


epoch 14 / 50, loss = 26.3586


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 445.80it/s]


epoch 15 / 50, loss = 45.6441


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 443.46it/s]


epoch 16 / 50, loss = 27.2612


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 449.51it/s]


epoch 17 / 50, loss = 32.5524


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.94it/s]


epoch 18 / 50, loss = 38.4630


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 461.35it/s]


epoch 19 / 50, loss = 69.5542


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 448.05it/s]


epoch 20 / 50, loss = 17.2002


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 449.37it/s]


epoch 21 / 50, loss = 26.4920


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 450.94it/s]


epoch 22 / 50, loss = 18.3832


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 448.89it/s]


epoch 23 / 50, loss = 32.3652


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 446.72it/s]


epoch 24 / 50, loss = 17.4748


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 451.25it/s]


epoch 25 / 50, loss = 10.0911


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 451.45it/s]


epoch 26 / 50, loss = 36.7035


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 448.25it/s]


epoch 27 / 50, loss = 28.6909


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 445.77it/s]


epoch 28 / 50, loss = 23.7438


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.79it/s]


epoch 29 / 50, loss = 8.9670


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 449.91it/s]


epoch 30 / 50, loss = 28.7463


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 443.50it/s]


epoch 31 / 50, loss = 30.7502


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 450.94it/s]


epoch 32 / 50, loss = 22.7214


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 451.83it/s]


epoch 33 / 50, loss = 47.6088


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 444.42it/s]


epoch 34 / 50, loss = 7.9798


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.99it/s]


epoch 35 / 50, loss = 34.6011


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.55it/s]


epoch 36 / 50, loss = 16.0125


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 440.41it/s]


epoch 37 / 50, loss = 33.7731


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 439.87it/s]


epoch 38 / 50, loss = 29.2380


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 443.07it/s]


epoch 39 / 50, loss = 88.7390


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.87it/s]


epoch 40 / 50, loss = 37.6415


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 445.87it/s]


epoch 41 / 50, loss = 108.1250


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 444.00it/s]


epoch 42 / 50, loss = 24.2728


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 448.61it/s]


epoch 43 / 50, loss = 25.8423


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 455.13it/s]


epoch 44 / 50, loss = 24.7841


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 449.09it/s]


epoch 45 / 50, loss = 18.7231


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 467.39it/s]


epoch 46 / 50, loss = 99.6558


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 438.29it/s]


epoch 47 / 50, loss = 16.1268


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 446.34it/s]


epoch 48 / 50, loss = 55.7244


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 443.95it/s]


epoch 49 / 50, loss = 71.3639


100%|█████████████████████████████████████| 1023/1023 [00:02<00:00, 447.99it/s]

epoch 50 / 50, loss = 25.2449





In [142]:
with torch.no_grad():
    list_corr = []
    list_pred = []
    for features,value in train_loader:
        
        features = features.reshape(-1,38).to(device)
        value = value.cpu().numpy()
    #     y_pred = model(features)
    #     predict = y_pred.cpu().numpy()
    #     list_corr.append(value)
    #     list_pred.append(predict)
    # predict = np.concatenate(list_pred)
    # value = np.concatenate(list_corr)
    # plt.scatter(predict,value)
    # plt.show()
    sys.exit()

NameError: name 'sys' is not defined

In [146]:
y_pred = model(features[0])

In [150]:
value[0],y_pred

(array([0.], dtype=float32),
 tensor([-0.5983], device='cuda:0', grad_fn=<AddBackward0>))

In [148]:
value

array([[  0.],
       [  1.],
       [  0.],
       [ 42.],
       [  0.],
       [  0.],
       [  4.],
       [  0.],
       [  0.],
       [  0.],
       [ 34.],
       [  0.],
       [  0.],
       [  0.],
       [  2.],
       [ 32.],
       [  3.],
       [  8.],
       [  0.],
       [ 14.],
       [  0.],
       [  0.],
       [  0.],
       [  2.],
       [  4.],
       [  6.],
       [  0.],
       [  0.],
       [  4.],
       [  0.],
       [  0.],
       [  0.],
       [  6.],
       [  2.],
       [  1.],
       [  0.],
       [  0.],
       [ 18.],
       [ 62.],
       [  9.],
       [  0.],
       [  6.],
       [  0.],
       [  0.],
       [  0.],
       [282.],
       [  0.],
       [  0.],
       [ 20.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [ 38.],
       [  0.],
       [  0.],
       [  4.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [  0.],
       [  