### 1. Deep Learning on Tabular Data

In [153]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,LabelEncoder
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader,SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import KFold

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [154]:
df = pd.read_csv("bank-additional-full.csv",sep=";")
sc = LabelEncoder()
stdscl = StandardScaler()

In [167]:
embed = {}
df.iloc[:,-1] = sc.fit_transform(df.iloc[:,-1]).astype(float)
col = df.loc[:,['job','marital','education','default','housing','loan','contact','month','day_of_week','poutcome']]
for i in col:
    df.loc[:,i] = sc.fit_transform(df.loc[:,i])
    embed[i] = df.loc[:,i]
df.iloc[:,:-1] = stdscl.fit_transform(df.iloc[:,:-1])
embed = pd.DataFrame(embed)
embed = stdscl.fit_transform(embed)
df = pd.DataFrame(df)
embed = pd.DataFrame(embed)

In [176]:
class NeuralNetwork(nn.Module):
    def __init__(self,nlayers,input, hidden,labels):
        super(NeuralNetwork,self).__init__()
        self.n = nlayers
        self.full_layer = {}
        self.relu = nn.ReLU()
        self.last_layer = nn.Linear(hidden,labels)
        for i in range(self.n):
            if (i==0):
                self.full_layer["fc{}".format(i)] = nn.Linear(input, hidden)
            else:
                self.full_layer["fc{}".format(i)] = nn.Linear(hidden, hidden)
    
    def forward(self,X):
        for i in range(self.n):
            output = self.full_layer["fc{}".format(i)](X)
            output = self.relu(output)
        output = self.last_layer(output)
        output = self.relu(output)
        return output

In [177]:
kf = KFold(n_splits=3)

In [179]:
for fold,(train_id,test_id) in enumerate(kf.split(df)):
    no_layers = 5
    no_x = 20
    no_hidden = 20
    no_label = 2
    epochs = 20
    mu = 0.01
    nn_model = NeuralNetwork(no_layers,no_x,no_hidden,no_label).to(device)
    loss_func = nn.CrossEntropyLoss()
    df_tensor = torch.tensor(df.to_numpy())
    ada_optimizer = Adam(nn_model.parameters(),lr=mu)
    sampler_train = SubsetRandomSampler(train_id)
    sampler_test = SubsetRandomSampler(test_id)
    train_tensor = DataLoader(dataset=df_tensor,batch_size=20,sampler=sampler_train)
    test_tensor = DataLoader(dataset=df_tensor,batch_size=20,sampler=sampler_test)
    nn_model = nn_model.float()
    writer = SummaryWriter()
    total_loss = 0.0
    for e in range(epochs):
        #print(f'Epoch {e+1}/{epochs}')
        for n,d in enumerate(train_tensor):
            features,labels = d[:,:-1].float(),d[:,-1].long()
            output = nn_model(features)
            loss = loss_func(output,labels)
            total_loss += loss
            writer.add_scalar("Loss/train", loss, n)
            ada_optimizer.zero_grad()
            loss.backward()
            ada_optimizer.step()
    file = f'./{fold}-fold-model.pth'
    torch.save(nn_model,file)
    #nn_model = torch.load(f'./{fold}-fold-model.pth')
    correct = 0.0
    total = 0.0
    res = {}
    total_loss_t = 0.0
    with torch.no_grad():
        for n,d in enumerate(test_tensor):
            features,labels = d[:,:-1].float(),d[:,-1].long()
            output = nn_model(features)
            _,predict = torch.max(output.data,1)
            loss = loss_func(output,labels)
            writer.add_scalar("Loss/test", loss, n)
            total += labels.size(0)
            correct += (predict==labels).sum().item()
        res[fold] = (correct/total)*100
    print("Accuracy for each fold")
    for i,j in res.items():
        print(f'Fold No:{i}       Accuracy:{j}%')
    writer.flush()

Accuracy for each fold
Fold No:0       Accuracy:96.25637290604516%
Accuracy for each fold
Fold No:1       Accuracy:94.05637701216403%
Accuracy for each fold
Fold No:2       Accuracy:75.89045087042028%


### Loss/Epoch for Training
# ![Loss/Epoch for Training](tensorboard_fold_train.png)

### Loss/Epoch for Testing
# ![Loss/Epoch for Testing](tensorboard_fold_test.png)

### 2. NLP - Word2Vec Model

In [195]:
with open("raw_text.txt","r") as file:
    text = file.read()
words = text.strip().split(" ")

In [261]:
embedding_size = 100
ln_layer = 128

In [257]:
class Neural_CBOW(nn.Module):
    def __init__(self,no_words):
        super(Neural_CBOW,self).__init__()
        self.embed = nn.Embedding(num_embeddings=no_words,embedding_dim=embedding_size)
        self.linear_layer = nn.Linear(embedding_size,ln_layer)
        self.relu = nn.ReLU()
    def forward(self,X):
        output_cbow = self.embed(X)
        output_cbow = self.linear_layer(X)
        output_cbow = self.relu(X)
        return output_cbow

In [258]:
def get_context(words):
    input_cbow = []
    output_cbow = []
    for w in range(0,len(words),5):
        words[w].strip("\n")
        if(w+4 < len(words)):
            input_cbow.append(words[w])
            input_cbow.append(words[w+1])
            output_cbow.append(words[w+2])
            input_cbow.append(words[w+3])
            input_cbow.append(words[w+4])
        else:
            break
    return input_cbow,output_cbow

In [259]:
data_cbow,targets_cbow = get_context(words)
print(len(targets_cbow))
data_cbow = sc.fit_transform(data_cbow)
targets_cbow = sc.fit_transform(targets_cbow)
data_cbow = np.array(data_cbow).reshape((len(data_cbow),1))
targets_cbow = np.array(targets_cbow).reshape((len(targets_cbow),1))
train_data_cbow = data_cbow[:29]
test_data_cbow = data_cbow[29:58]
data_cbow_t = np.append(train_data_cbow,targets_cbow,axis=1)
data_cbow_tt = np.append(test_data_cbow,targets_cbow,axis=1)
datas_tensor_train = torch.tensor(data_cbow_t)
datas_tensor_test = torch.tensor(data_cbow_tt)

29


In [274]:
cbow_model = Neural_CBOW(len(words)).to(device)
loss_func = nn.CrossEntropyLoss()
ada_optimizer = Adam(nn_model.parameters(),lr=mu)
train_cbow = DataLoader(dataset=datas_tensor_train,batch_size=5,shuffle=True)
test_cbow = DataLoader(dataset=datas_tensor_test,batch_size=5,shuffle=True)
nn_model = nn_model.float()
for e in range(50):
    features,label = train_cbow
    output = nn_model(features)
    loss = loss_func(output,labels)
    ada_optimizer.zero_grad()
    loss.backward()
    ada_optimizer.step()
    correct = 0.0
    total = 0.0
    res = {}
    total_loss_t = 0.0
    with torch.no_grad():
        features,labels = test_cbow
        output = nn_model(features)
        _,predict = torch.max(output.data,1)
        loss = loss_func(output,labels)
        total += labels.size(0)
        correct += (predict==labels).sum().item()
    if(e%10 == 0):
        print(f'Epoch {e}/{50} --> Accuracy: {(correct/total)*100}')

ValueError: too many values to unpack (expected 2)

## References
1. https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
2. https://pytorch.org/tutorials/recipes/recipes/tensorboard_with_pytorch.html
3. https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-pytorch.md