Dependencies

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torch.onnx 
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import seaborn as sns
from pathlib import Path
import numpy as np

Dataset Load

In [None]:
df = pd.read_json('../data/data.json')

Dataset Size

In [None]:
df_shape = df.shape
print(f'Rows and columns in one JSON file is {df_shape}')

10 First Rows

In [None]:
df_rows = df.head(10)
print(f'First 10 columns in one JSON file is {df_rows}')

Dataset Columns

In [None]:
print(f'The column names are :')
print('#########')
for col in df.columns:
    print(col)

Dataset Columns where Column Name Like "nam"

In [None]:
cols = df.filter(regex='nam').columns

print(cols)

Dataset General Information

In [None]:
print("The #rows and #columns are ", df.shape[0] , " and ", df.shape[1])
print("The years in this dataset are: ", df.year.unique())
print("The artists covered in this dataset are: ", list(df.artist.unique()))
print("The genders covered are: ", list(df.gender.unique()))

Count of Dataset Rows Group by "gender"

In [None]:
counts = pd.DataFrame({'Count':df.gender.value_counts()})
counts

Count of Dataset Rows Ordered, Group by "gender"

In [None]:
counts.sort_values(by=['gender'],ascending=True)

Column "bought" renaming

In [None]:
df.rename(columns={'bought':'is_bought'}, inplace=True)

Normalization of "null" values of Dataset

In [None]:
df = df[~df.likes.isnull()]
df.isnull().sum(axis = 0)

Class Distribution

In [None]:
sns.countplot(x = 'is_bought', data=df)

Cathegorical Column Analysis

In [None]:
data_orig = df.copy()
data = df[['is_bought', 'likes','name', 'artist', 'year', 'gender']]
categorical_columns  = ['name', 'artist', 'year','gender']
for c in categorical_columns:
    data[c] = data[c].astype('category')
    
print(f'The column names are :')
print('#########')
for col in data.columns:
    print(col)

print(f'The column types are :')
print('#########')
for col in data.dtypes:
    print(col)

Cathegorical Column Removal

In [None]:
data_dummies = pd.get_dummies(data[categorical_columns], drop_first=True)
data_dummies = data_dummies.replace({True: 1, False: 0})
not_categorical_columns  = ['is_bought','likes']
data = pd.concat([data, data_dummies], axis = 1)
data.drop(categorical_columns,axis=1, inplace=True)

Normalized Dataset General Information

In [None]:
print("The #rows and #columns are ", df.shape[0] , " and ", df.shape[1])

10 First Normalized Rows

In [None]:
df_rows = df.head(10)
print(f'First 10 columns in one JSON file is {df_rows}')

Normalized Dataset Columns

In [None]:
print(f'The column names are :')
print('#########')
for col in data.columns:
    print(col)

Column "bought" renaming

In [None]:
data.rename(columns = {'is_bought':'target'}, inplace=True )

X & Y

In [None]:
features = ['likes']

X = data[features]
Y = data['target']

Model setup

In [None]:
batch_size = 10
num_of_epochs = 1000
learning_rate=0.01
weight_decay=0.0001
test_size = 0.33
random_state=42

Train / Test Dataset Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=random_state)

Logger TensorBoard

In [None]:
class Logger:
    def __init__(self)-> None:
        self.writer = SummaryWriter()

    def __flush__(self) -> None:
        self.writer.flush

    def __del__(self) -> None:
        self.writer.flush()
        self.writer.close()

Tensors

In [None]:
class Data(Dataset):
  def __init__(self, X: np.ndarray, y: np.ndarray) -> None:  
    self.X = torch.from_numpy(X.astype(np.float32))
    self.y = torch.from_numpy(y.astype(np.float32))
    self.len = self.X.shape[0]

  def __getitem__(self, index: int) -> tuple:
    return self.X[index], self.y[index]
  
  def __len__(self) -> int:
    return self.len

Train / Test Batch Loader

In [None]:
class Loader:
    def __init__(self, train:Data, test:Data) -> None:
        self.train = DataLoader(train, batch_size=batch_size, shuffle=True)
        self.test = DataLoader(test, batch_size=batch_size, shuffle=True)

Train / Test Batch Loader Initialization

In [None]:
traindata = Data(X_train.values, y_train.values)
testdata = Data(X_test.values, y_test.values)

# create Loader to read the data within batch sizes and put into memory. 
loader = Loader(traindata, testdata)

Model Architecture

In [None]:
class LinearRegression(nn.Module): # all the dependencies from torch will be given to this class [parent class] # nn.Module contains all the building block of neural networks:
  def __init__(self,input_dim):
    super(LinearRegression,self).__init__()   # building connection with parent and child classes
    self.fc1=nn.Linear(input_dim,10)          # hidden layer 1
    self.fc2=nn.Linear(10,5)                  # hidden layer 2
    self.fc3=nn.Linear(5,3)                   # hidden layer 3
    self.fc4=nn.Linear(3,1)                   # last layer

  def forward(self,d):
    out=torch.relu(self.fc1(d))              # input * weights + bias for layer 1
    out=torch.relu(self.fc2(out))            # input * weights + bias for layer 2
    out=torch.relu(self.fc3(out))            # input * weights + bias for layer 3
    out=self.fc4(out)                        # input * weights + bias for last layer
    return out                               # final outcome

Model Initialization

In [None]:
input_dim = X_train.shape[1]
torch.manual_seed(42)  # to make initilized weights stable:
model = LinearRegression(input_dim)

Loss Function, Learning Rate & Optimizer

In [None]:
# define the loss function with Mean Squared Error loss and an optimizer with Adam optimizer
loss = nn.MSELoss()
optimizers=optim.Adam(params=model.parameters(),lr=learning_rate, weight_decay = weight_decay)

Logger Initialization

In [None]:
logger = Logger()

Model Reload

In [None]:
def reload()-> None:
    # create a dummy input tuple  
    dummy_input = (1)

    # load the saved model and do the inference again 
    load_model=LinearRegression(dummy_input)
    load_model.load_state_dict(torch.load('saved/Network.pth'))

Model Parameters Save

In [None]:
def save() -> None:
    filename=Path('saved')
    filename.mkdir(parents=True,exist_ok=True)    
    model_name='Network.pth' 

    saving_path=filename/model_name   
    torch.save(obj=model.state_dict(),f=saving_path)

    print(" ") 
    print('Model has been converted to PTH') 

Model Export

In [None]:
def export(): 

    # set the model to inference mode 
    model.eval() 

    # create a dummy input tensor  
    dummy_input = torch.randn(1)  

    # export the model   
    torch.onnx.export(model,        # model being run 
         dummy_input,               # model input (or a tuple for multiple inputs) 
         "saved/Network.onnx",      # where to save the model  
         export_params=True,        # store the trained parameter weights inside the model file 
         opset_version=11,          # the ONNX version to export the model to 
         do_constant_folding=True,  # whether to execute constant folding for optimization 
         input_names = ['input'],   # the model's input names 
         output_names = ['output'], # the model's output names 
         dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes 
                                'output' : {0 : 'batch_size'}}) 
    print(" ") 
    print('Model has been converted to ONNX') 

Model Inspection

In [None]:
def inspect() -> None:
    
    # create a dummy input tensor  
    dummy_input = torch.randn(1)  

    # inspect the model using TensorBoard
    logger.writer.add_graph(model, dummy_input)

Model Test

In [None]:
def test(num_of_epochs: float = 1000) -> None:
    
    # enable model evaluation
    model.eval()
    accuracy = 0.0
    total = 0.0  
    best_accuracy = 0.0
 
    # define execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    logger.writer.add_text('config/model', model.__class__.__name__)
    logger.writer.add_text('config/optimizer', optimizers.__class__.__name__)
    logger.writer.add_text('config/device', str(device)) 
    
    model.to(device)

    # loop over the dataset multiple times
    for epoch in range(num_of_epochs):

        batch_loss = 0.0           
        
        # disable gradient computation
        with torch.no_grad():
            for b, data in enumerate(loader.test, 0):        
            
                # get the inputs
                inputs, targets = data            

                # run the model on the test set to predict labels                
                outputs = model(inputs)

                loss_value = loss(outputs, targets)
              
                batch_loss += loss_value.item()     

                if b == data.__len__():         

                    # log epoch loss
                    logger.writer.add_scalar('test/batch/loss',batch_loss / data.__len__(),epoch * len(loader.train) + b)             

                    # zero the loss
                    batch_loss = 0.0      

                global_step = epoch * len(loader.test) + b
                

                # log the batch loss
                logger.writer.add_scalar('test/loss', loss_value.item(), global_step=global_step)
                logger.writer.add_scalar('test/confidence', torch.mean(torch.max(torch.softmax(outputs, dim=1), dim=1)[0]).item(), global_step=global_step)

                logger.writer.add_histogram('test/outputs', torch.max(outputs, dim=1)[0], global_step=global_step)
                logger.writer.add_histogram('test/confidences', torch.max(torch.softmax(outputs, dim=1), dim=1)[0], global_step=global_step)
                
                
                # the label with the highest energy will be our prediction
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                accuracy += (predicted == targets).sum().item()
        
    # compute the accuracy over all test records
    accuracy = (100 * accuracy / total)

    if accuracy > best_accuracy:
      save()     
      best_accuracy = accuracy 
      
    logger.__flush__()

    print(" ")
    print('Model has been tested')

Model Train

In [None]:
def train(num_of_epochs: float = 1000) -> None:
 
  # define execution device
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  logger.writer.add_text('config/model', model.__class__.__name__)
  logger.writer.add_text('config/optimizer', optimizers.__class__.__name__)
  logger.writer.add_text('config/device', str(device)) 
 
  model.to(device)

  # loop over the dataset multiple times
  for epoch in range(num_of_epochs):

    batch_loss = 0.0     
   
    for b, data in enumerate(loader.train, 0):  

      # get the inputs
      inputs, targets = data      

      # zero the parameter gradients
      optimizers.zero_grad()

      # predict classes using records from the training set
      outputs=model(inputs) 

      # compute the loss based on model output and real targets
      loss_value=loss(outputs, targets)    
     
      # backpropagate the loss
      loss_value.backward()  

      # adjust parameters based on the calculated gradients
      optimizers.step() 

      global_step = epoch * len(loader.train) + b

      # log the batch loss
      logger.writer.add_scalar('train/loss', loss_value.item(), global_step=global_step)
      logger.writer.add_scalar('train/confidence', torch.mean(torch.max(torch.softmax(outputs, dim=1), dim=1)[0]).item(), global_step=global_step)

      logger.writer.add_histogram('train/outputs', torch.max(outputs, dim=1)[0], global_step=global_step)
      logger.writer.add_histogram('train/confidences', torch.max(torch.softmax(outputs, dim=1), dim=1)[0], global_step=global_step)
     
      batch_loss += loss_value.item()     
     
      if b == data.__len__():         

        # log the epoch loss
        logger.writer.add_scalar('train/batch/loss',batch_loss / data.__len__(),epoch * len(loader.train) + b)             

        # zero the loss
        batch_loss = 0.0   

  logger.__flush__()

print(" ")
print('Model has been trained')

Trigger Train

In [None]:
train()

Trigger Test

In [None]:
test()

Trigger Reload

In [None]:
reload()

Trigger Export

In [None]:
export()

Trigger Inspect

In [None]:
inspect()

Logger Close

In [None]:
del logger