In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torch.onnx 
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import seaborn as sns
from pathlib import Path
import numpy as np

In [None]:
df = pd.read_json('../data/data.json')

In [None]:
df_shape = df.shape
print(f'Rows and columns in one JSON file is {df_shape}')

In [None]:
df_rows = df.head(10)
print(f'First 10 columns in one JSON file is {df_rows}')

In [None]:
print(f'The column names are :')
print('#########')
for col in df.columns:
    print(col)

In [None]:
cols = df.filter(regex='nam').columns

print(cols)

In [None]:
print("The #rows and #columns are ", df.shape[0] , " and ", df.shape[1])
print("The years in this dataset are: ", df.year.unique())
print("The artists covered in this dataset are: ", list(df.artist.unique()))
print("The genders covered are: ", list(df.gender.unique()))

In [None]:
counts = pd.DataFrame({'Count':df.gender.value_counts()})
counts

In [None]:
counts.sort_values(by=['gender'],ascending=True).head(15)

In [None]:
df.rename(columns={'bought':'is_bought'}, inplace=True)

In [None]:
df.isnull().sum(axis=0)

In [None]:
df = df[~df.likes.isnull()]
df.isnull().sum(axis = 0)

In [None]:
sns.countplot(x = 'is_bought', data=df)

In [None]:
data_orig = df.copy()
data = df[['is_bought', 'likes','name', 'artist', 'year', 'gender']]
categorical_columns  = ['name', 'artist', 'year','gender']
for c in categorical_columns:
    data[c] = data[c].astype('category')
    
print(f'The column names are :')
print('#########')
for col in data.columns:
    print(col)

print(f'The column types are :')
print('#########')
for col in data.dtypes:
    print(col)

In [None]:
data_dummies = pd.get_dummies(data[categorical_columns], drop_first=True)
data_dummies = data_dummies.replace({True: 1, False: 0})
not_categorical_columns  = ['is_bought','likes']
data = pd.concat([data, data_dummies], axis = 1)
data.drop(categorical_columns,axis=1, inplace=True)

In [None]:
print("The #rows and #columns are ", data.shape[0] , " and ", data.shape[1])

In [None]:
print(f'The column names are :')
print('#########')
for col in data.columns:
    print(col)

In [None]:
data.rename(columns = {'is_bought':'target'}, inplace=True )

In [None]:
features = ['likes']

In [None]:
X = data[features]
Y = data['target']

Config

In [None]:
batch_size = 10
num_of_epochs = 1000
learning_rate=0.01
weight_decay=0.0001
test_size = 0.33
random_state=42

Train / Test Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=random_state)

Logger

In [None]:
class Logger:
    def __init__(self)-> None:
        self.writer = SummaryWriter()

    def __flush__(self) -> None:
        self.writer.flush

    def __del__(self) -> None:
        self.writer.flush()
        self.writer.close()

Tensors

In [None]:
class Data(Dataset):
  def __init__(self, X: np.ndarray, y: np.ndarray) -> None:  
    self.X = torch.from_numpy(X.astype(np.float32))
    self.y = torch.from_numpy(y.astype(np.float32))
    self.len = self.X.shape[0]

  def __getitem__(self, index: int) -> tuple:
    return self.X[index], self.y[index]
  
  def __len__(self) -> int:
    return self.len

Loader

In [None]:
class Loader:
    def __init__(self, train:Data, test:Data) -> None:
        self.train = DataLoader(train, batch_size=batch_size, shuffle=True)
        self.test = DataLoader(test, batch_size=batch_size, shuffle=True)

Train / Test Data Loader

In [None]:
traindata = Data(X_train.values, y_train.values)
testdata = Data(X_test.values, y_test.values)

# create Loader to read the data within batch sizes and put into memory. 
loader = Loader(traindata, testdata)

Model Architecture:

In [None]:
class LinearRegression(nn.Module): # all the dependencies from torch will be given to this class [parent class] # nn.Module contains all the building block of neural networks:
  def __init__(self,input_dim):
    super(LinearRegression,self).__init__()   # building connection with parent and child classes
    self.fc1=nn.Linear(input_dim,10)          # hidden layer 1
    self.fc2=nn.Linear(10,5)                  # hidden layer 2
    self.fc3=nn.Linear(5,3)                   # hidden layer 3
    self.fc4=nn.Linear(3,1)                   # last layer

  def forward(self,d):
    out=torch.relu(self.fc1(d))              # input * weights + bias for layer 1
    out=torch.relu(self.fc2(out))            # input * weights + bias for layer 2
    out=torch.relu(self.fc3(out))            # input * weights + bias for layer 3
    out=self.fc4(out)                        # input * weights + bias for last layer
    return out                               # final outcome

Model setup:

In [None]:
input_dim = X_train.shape[1]
torch.manual_seed(42)  # to make initilized weights stable:
model = LinearRegression(input_dim)

Loss & Optimizer

In [None]:
# Define the loss function with Mean Squared Error loss and an optimizer with Adam optimizer
loss = nn.MSELoss()
optimizers=optim.Adam(params=model.parameters(),lr=learning_rate, weight_decay = weight_decay)

Using TensorBoard

In [None]:
logger = Logger()

Model Load

In [None]:
def load()-> None:
    # let's create a dummy input tuple  
    dummy_input = (1)

    # we can load the saved model and do the inference again 
    load_model=LinearRegression(dummy_input)
    load_model.load_state_dict(torch.load('saved/Network.pth'))

Model Save

In [None]:
# Function to save the model 
def save() -> None:
    filename=Path('saved')
    filename.mkdir(parents=True,exist_ok=True)    
    model_name='Network.pth' 

    saving_path=filename/model_name   
    torch.save(obj=model.state_dict(),f=saving_path)

Model Export

In [None]:
#Function to Convert to ONNX 
def export(): 

    # set the model to inference mode 
    model.eval() 

    # let's create a dummy input tensor  
    dummy_input = torch.randn(1)  

    # export the model   
    torch.onnx.export(model,         # model being run 
         dummy_input,       # model input (or a tuple for multiple inputs) 
         "saved/Network.onnx",       # where to save the model  
         export_params=True,  # store the trained parameter weights inside the model file 
         opset_version=11,    # the ONNX version to export the model to 
         do_constant_folding=True,  # whether to execute constant folding for optimization 
         input_names = ['input'],   # the model's input names 
         output_names = ['output'], # the model's output names 
         dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes 
                                'output' : {0 : 'batch_size'}}) 
    print(" ") 
    print('Model has been converted to ONNX') 

Model Inspect

In [None]:
#Function to Inspect the model on TensorBoard 
def inspect() -> None:
    
    # let's create a dummy input tensor  
    dummy_input = torch.randn(1)  

    # we can inspect the model using TensorBoard
    logger.writer.add_graph(model, dummy_input)

Model Test

In [None]:
# Function to test the model with the test dataset and print the accuracy for the test records
def test() -> None:
    
    model.eval()
    accuracy = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in loader.test:
            inputs, targets = data
            # run the model on the test set to predict labels
            
            outputs = model(inputs)
            
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            accuracy += (predicted == targets).sum().item()
    
    # compute the accuracy over all test records
    accuracy = (100 * accuracy / total)
    return(accuracy)

Model Train

In [None]:
# Training Function 
def train(num_of_epochs: float = 1000) -> None:

  best_accuracy = 0.0

  # define your execution device
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  
  print("The model will be running on", device, "device")

  model.to(device)

  # loop over the dataset multiple times
  for epoch in range(num_of_epochs):

    running_loss = 0.0     
   
    for i, data in enumerate(loader.train, 0):  

      # get the inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      targets = targets.reshape((targets.shape[0], 1))

      # zero the parameter gradients
      optimizers.zero_grad()

      # predict classes using records from the training set
      outputs=model(inputs) 

      # compute the loss based on model output and real targets
      loss_value=loss(outputs, targets)    
     
      # backpropagate the loss
      loss_value.backward()  

      # adjust parameters based on the calculated gradients
      optimizers.step() 

      # let's print statistics for every batch
      running_loss += loss_value.item()     
     
      if i == data.__len__():         

        # log the running loss
        logger.writer.add_scalar('training loss',running_loss / data.__len__(),epoch * len(loader.train) + i)      
          
       # print once per epoch
        print('[%d, %5d] loss: %.3f' % (epoch, i , running_loss / num_of_epochs))

        # zero the loss
        running_loss = 0.0      
     

    # compute and print the average accuracy fo this epoch when tested over all test records
    accuracy = test()
    print('For epoch', epoch,'the test accuracy over the whole test set is %d %%' % (accuracy))
      
    # we want to save the model if the accuracy is the best
    if accuracy > best_accuracy:
      save()     
      best_accuracy = accuracy  

  logger.__flush__()

  print('Finished Training')

In [None]:
train()

In [None]:
load()

In [None]:
export()

In [None]:
inspect()

In [None]:
del logger