<a href="https://colab.research.google.com/github/hosseinm/Interview/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''Regression with PyTorch.'''
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import numpy as np
import argparse
import math
import pdb
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from google.colab import drive # importing data from my google drive to colab
drive.mount('/content/gdrive')

In [None]:
batchsize = 2000
epochs = 400
lr = 0.1
train_loss = 0
torch.manual_seed(1)    # reproducible
class FeatureDataset(Dataset):

    def __init__(self, csv_file):
        self.data_frame = pd.read_csv(csv_file)
    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        m_data = self.data_frame.iloc[idx, :10]
        label = self.data_frame.iloc[idx, -1]
        if len(m_data)<10:
            label = 0
        label = np.array(label).astype(np.float32)
        m_data = np.array(m_data).astype(np.float32)
        m_data = torch.from_numpy(m_data)
        label = torch.from_numpy(label)
        return m_data, label

trainloader_all = FeatureDataset(csv_file='/content/gdrive/MyDrive/datascience/ML/train.csv')# Add the train data address
testloader_all = FeatureDataset(csv_file='/content/gdrive/MyDrive/datascience/ML/test.csv')  # add test data address
trainloader = DataLoader(trainloader_all, batch_size= batchsize,
                        shuffle=True)
testloader = DataLoader(testloader_all, batch_size=batchsize,
                        shuffle=True)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
print(device)



In [None]:

# this is one way to define a network
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_output):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(in_features=n_feature, out_features=20)
        self.bn1 = nn.BatchNorm1d(num_features=20)
        self.linear2 = nn.Linear(in_features=20, out_features=10)
        self.bn2 = nn.BatchNorm1d(num_features=10)
        self.linear3 = nn.Linear(in_features=10, out_features = n_output)       

    def forward(self, x):
        x = F.relu(self.bn1(self.linear1(x)))
        x = F.relu(self.bn2(self.linear2(x)))    # activation function for hidden layer
        x = self.linear3(x)             # linear output
        return x

net = Net(n_feature=10, n_output=1)     # define the network
optimizer = torch.optim.Adadelta(filter(lambda p: p.requires_grad, net.parameters()), lr= lr, weight_decay=5e-4)
loss_func = torch.nn.L1Loss() # definr loss function
print(net)

In [None]:
####Train#####
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    net.to(device)
    best_mae = 100
    train_loss = 0
    for batch_idx, (ii, targets) in enumerate(trainloader):
        ii, targets = ii.to(device), targets.to(device)
        feat = net(ii)
        loss = loss_func(feat.squeeze(1),targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    mae = train_loss / len(trainloader)
    print("Train: epoch number : ", epoch , "Train MAE:", mae)
    f = open("result.txt", "a") 
    f.write(str(train_loss))
    f.write(str(mae))
    f.write('\n')
    f.close()
    if mae < best_mae:
      #### Save model######
      torch.save({'epoch': epoch, 'model': net,'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss}, 'checkpoint.pt')    
      best_mae = mae
    return train_loss, mae
print("Train")

for epoch in range(epochs):
    lr = lr * (0.1 ** (epoch // 20))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr       
    train_loss,mae = train(epoch)



In [None]:
### run train/test####

def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    net.eval()
    data_test = []
    with torch.no_grad():
        for batch_idx, (ii, targets) in enumerate(testloader):
            ii, targets = ii.to(device), targets.to(device)            
            feat = net(ii)
            label = feat.squeeze(1)
            data_test = np.append(data_test, label.detach().cpu().numpy(), axis=0)
    return data_test
print("Train/Test")

     
label = test(epoch)
np.savetxt('test_pred.csv', label, delimiter=',', fmt='%d' , header='Y')


In [None]:
############ Using pre-trained model for data with label Y and compute MAE ###############
## the model saved in checkpoint.pt 

"""def test_pretarin(epoch):
    global best_acc
    test_loss = 0
    best_mae = 100
    data1 = []
    model = Net(n_feature=10, n_hidden=2, n_output=1)
    checkpoint = torch.load('checkpoint.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    with torch.no_grad():

        for batch_idx, (ii, targets) in enumerate(testloader):
            ii, targets = ii.to(device), targets.to(device)
            feat = model(ii)
            loss = loss_func(feat.squeeze(1), targets)
            test_loss += loss.item()
            label = feat.squeeze(1)
            data_test = np.append(data_test, label.detach().cpu().numpy(), axis=0)

    mae = test_loss / len(testloader)
    print("Test: epoch number : ", epoch, 'Test MAE: ', mae)        
    return test_loss, mae


test_loss, best_mae = test_pretarin(0)
np.savetxt('test_pred_with_label.csv', label, delimiter=',', fmt='%d' , header='Y')"""
