<a href="https://colab.research.google.com/github/hosseinm/Interview/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
'''Regression with PyTorch.'''
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import numpy as np
import argparse
import math
import pdb
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from google.colab import drive # importing data from my google drive to colab
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [10]:
batchsize = 20000
epochs = 1000
lr = 0.5
train_loss = 0
torch.manual_seed(1)    # reproducible
class FeatureDataset(Dataset):

    def __init__(self, csv_file):
        self.data_frame = pd.read_csv(csv_file)
    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        m_data = self.data_frame.iloc[idx, :10]
        label = self.data_frame.iloc[idx, -1]
        label = np.array(label).astype(np.float32)
        m_data = np.array(m_data).astype(np.float32)
        m_data = torch.from_numpy(m_data)
        label = torch.from_numpy(label)
        return m_data,label

trainloader_all = FeatureDataset(csv_file='/content/gdrive/MyDrive/datascience/ML/train.csv')
testloader_all = FeatureDataset(csv_file='/content/gdrive/MyDrive/datascience/ML/test_gt.csv')  
trainloader = DataLoader(trainloader_all, batch_size= batchsize,
                        shuffle=True)
testloader = DataLoader(testloader_all, batch_size=batchsize,
                        shuffle=True)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
print(device)



cuda


In [12]:

# this is one way to define a network
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_output):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(in_features=n_feature, out_features=20)
        self.bn1 = nn.BatchNorm1d(num_features=20)
        self.linear2 = nn.Linear(in_features=20, out_features=10)
        self.bn2 = nn.BatchNorm1d(num_features=10)
        self.linear3 = nn.Linear(in_features=10, out_features = n_output)       

    def forward(self, x):
        x = F.relu(self.bn1(self.linear1(x)))
        x = F.relu(self.bn2(self.linear2(x)))    # activation function for hidden layer
        x = self.linear3(x)             # linear output
        return x

net = Net(n_feature=10, n_output=1)     # define the network
optimizer = torch.optim.Adadelta(filter(lambda p: p.requires_grad, net.parameters()), lr= lr, weight_decay=5e-4)
loss_func = torch.nn.L1Loss() # definr loss function
print(net)

Net(
  (linear1): Linear(in_features=10, out_features=20, bias=True)
  (bn1): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear2): Linear(in_features=20, out_features=10, bias=True)
  (bn2): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear3): Linear(in_features=10, out_features=1, bias=True)
)


In [13]:
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    net.to(device)
    train_loss = 0
    for batch_idx, (ii, targets) in enumerate(trainloader):
        ii, targets = ii.to(device), targets.to(device)
        feat = net(ii)
        loss = loss_func(feat.squeeze(1),targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    mae = train_loss / len(trainloader)
    print("Train: epoch number : ", epoch , "Train MAE:", mae)
    f = open("result.txt", "a") 
    f.write(str(train_loss))
    f.write(str(mae))
    f.write('\n')
    f.close()
    return train_loss,mae



def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    best_mae = 100
    net.eval()
    data1 = []
    with torch.no_grad():
        for batch_idx, (ii, targets) in enumerate(testloader):
            ii, targets = ii.to(device), targets.to(device)            
            feat = net(ii)
            loss = loss_func(feat.squeeze(1),targets)
            test_loss += loss.item()
            data1.append(loss) # copy test label
    mae = test_loss / len(testloader)
    print("Test: epoch number : ", epoch ,'Test MAE: ', mae)
    f = open("result.txt", "a")
    f.write(str(test_loss))
    f.write(str(mae))
    f.write('\n')
    f.close()

    if mae < best_mae:
      #### Save model######
      torch.save({'epoch': epoch, 'model': net,'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss}, 'checkpoint.pt')    
      #### Save Test csv######
      np.savetxt('test_pred.csv', data1, delimiter=',', fmt='%d' , header='Y')
      best_mae = mae
    return test_loss, mae
print("Train/Test")

Train/Test


In [15]:
### run train/test####
result = {}
result['train_loss'] = []
for epoch in range(epochs):
    '''lr = lr * (0.1 ** (epoch // 20))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr '''       
    train_loss,acc = train(epoch)
    test_loss, best_mae = test(epoch)
    result['train_loss'].append(train_loss)

result['best_acc'] = best_mae
np.save('model.npy', result)


Epoch: 0
Train: epoch number :  0 Train MAE: 69.20969085693359
Test: epoch number :  0 Test MAE:  69.14335632324219

Epoch: 1
Train: epoch number :  1 Train MAE: 68.79210815429687
Test: epoch number :  1 Test MAE:  68.66207885742188

Epoch: 2
Train: epoch number :  2 Train MAE: 68.24540405273437
Test: epoch number :  2 Test MAE:  68.00198364257812

Epoch: 3
Train: epoch number :  3 Train MAE: 67.53038330078125
Test: epoch number :  3 Test MAE:  67.0678939819336

Epoch: 4
Train: epoch number :  4 Train MAE: 66.59428100585937
Test: epoch number :  4 Test MAE:  65.82014465332031

Epoch: 5
Train: epoch number :  5 Train MAE: 65.37195281982422
Test: epoch number :  5 Test MAE:  64.08300018310547

Epoch: 6
Train: epoch number :  6 Train MAE: 63.77793579101562
Test: epoch number :  6 Test MAE:  61.9691162109375

Epoch: 7
Train: epoch number :  7 Train MAE: 61.71867446899414
Test: epoch number :  7 Test MAE:  58.99465560913086

Epoch: 8
Train: epoch number :  8 Train MAE: 59.09872894287109
Te

KeyboardInterrupt: ignored