In [1]:
#importing required libraries
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split



In [2]:
#function to preprocess the data 
def data_preprocessing(task_1a_dataframe):
    encoded_dataframe=task_1a_dataframe.copy()
    #mapping some columns as numbers for the neural network
    encoded_dataframe['Education']=encoded_dataframe['Education'].map({'Bachelors':1,'Masters':2,'PHD':3})
    encoded_dataframe['City']=encoded_dataframe['City'].map({'Bangalore':1,'New Delhi':2,'Pune':3})
    encoded_dataframe['Marital Status']=encoded_dataframe['Marital Status'].map({'Married':2,'Single':1})
    encoded_dataframe['Gender']=encoded_dataframe['Gender'].map({'Male':0,'Female':1})
    encoded_dataframe['EverBenched']=encoded_dataframe['EverBenched'].map({'No':0,'Yes':1})
    scaler=StandardScaler()
    encoded_dataframe[['Age','JoiningYear']]=scaler.fit_transform(encoded_dataframe[['Age','JoiningYear']])
    return encoded_dataframe

In [3]:
#function to extract required inputs and outputs from the data
def identify_features_and_targets(encoded_dataframe):
    targets=encoded_dataframe[['LeaveOrNot']]
    features=encoded_dataframe.drop('LeaveOrNot',axis=1)
    features_and_targets=[features,targets]
    return features_and_targets

In [4]:
#function to load the data as tensors
def load_as_tensors(features_and_targets):
    X=features_and_targets[0]
    Y=features_and_targets[1]
    x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.1,random_state=42) #splitting the data for training and testing
    x_train=np.array(x_train)
    y_train=np.array(y_train)
    x_test=np.array(x_test)
    y_test=np.array(y_test)
    x_train=torch.FloatTensor(x_train)
    x_test=torch.FloatTensor(x_test)
    y_train=torch.LongTensor(y_train)
    y_test=torch.LongTensor(y_test)
    data_train=TensorDataset(x_train,y_train)
    dataloader=DataLoader(data_train,batch_size=100,shuffle=True)
    tensors_and_iterable_training_data=[x_train,y_train,x_test,y_test,dataloader]
    return tensors_and_iterable_training_data

In [19]:
#defining the layers in the network
class Predictor(nn.Module):
    def __init__(self,f_in=9,hidden1=20,hidden2=15,hidden3=8,f_out=1):
        super(Predictor, self).__init__()
        self.hiddenlayer1=nn.Linear(f_in,hidden1)
        self.relu = nn.ReLU()
        self.hiddenlayer2=nn.Linear(hidden1,hidden2)
        self.relu = nn.ReLU()
        self.hiddenlayer3=nn.Linear(hidden2,hidden3)
        self.relu = nn.ReLU()
        self.outputlayer=nn.Linear(hidden3,f_out)
        self.sigmoid=nn.Sigmoid()   
    def forward(self, predicted_output):
        predicted_output=F.relu(self.hiddenlayer1(predicted_output))
        #predicted_output=self.relu(predicted_output)
        predicted_output=F.relu(self.hiddenlayer2(predicted_output))
        predicted_output=F.relu(self.hiddenlayer3(predicted_output))
        predicted_output=F.sigmoid(self.outputlayer(predicted_output))
        #predicted_output=self.sigmoid(predicted_output) 
        return predicted_output

In [6]:
#binary loss fucntion to be used as output is in binary format
def model_loss_function():
    loss_function=nn.BCELoss()
    return loss_function

In [7]:
def model_optimizer(model):
    optimizer=optim.Adam(model.parameters(),lr=0.001)
    return optimizer

In [21]:
def model_number_of_epochs():
    number_of_epochs=50
    return number_of_epochs

In [9]:
def training_function(model, number_of_epochs, tensors_and_iterable_training_data, loss_function, optimizer):
    x_train,y_train,x_test,y_test,dataloader=tensors_and_iterable_training_data
    for epoch in range(number_of_epochs):
        model.train()  #train the model on the inputs provided
        for batch_x,batch_y in dataloader:
            output=model.forward(batch_x)
            loss=loss_function(output,batch_y.float())
            #backpass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{number_of_epochs}], Loss: {loss.item():.4f}')  #printing loss after each epoch
    return model

In [10]:
def validation_function(trained_model, tensors_and_iterable_training_data):
    trained_model.eval()
    correct=0
    total=0
    with torch.no_grad():
        x_test=tensors_and_iterable_training_data[2]
        y_test=tensors_and_iterable_training_data[3]
        output=trained_model(x_test)
        predicted=(output>0.5).float()
        #_,predicted=torch.max(output.data,1)
        total+=y_test.size(0)
        correct+=(predicted==y_test).sum().item()
    model_accuracy=correct/total   #calculating the accuracy of the model on new data
    return model_accuracy


In [23]:
if __name__ == "__main__":

	# reading the provided dataset csv file using pandas library and 
	# converting it to a pandas Dataframe
	task_2_dataframe = pd.read_csv('task_2_dataset.csv')

	# data preprocessing and obtaining encoded data
	encoded_dataframe = data_preprocessing(task_2_dataframe)

	# selecting required features and targets
	features_and_targets = identify_features_and_targets(encoded_dataframe)

	# obtaining training and validation data tensors and the iterable
	# training data object
	tensors_and_iterable_training_data = load_as_tensors(features_and_targets)
	
	# model is an instance of the class that defines the architecture of the model
	model = Predictor()

	# obtaining loss function, optimizer and the number of training epochs
	loss_function = model_loss_function()
	optimizer = model_optimizer(model)
	number_of_epochs = model_number_of_epochs()

	# training the model
	trained_model = training_function(model, number_of_epochs, tensors_and_iterable_training_data, 
					loss_function, optimizer)

	# validating and obtaining accuracy
	model_accuracy = validation_function(trained_model,tensors_and_iterable_training_data)
	print(f"Accuracy on the test set = {model_accuracy}")

	X_train_tensor = tensors_and_iterable_training_data[0]
	x = X_train_tensor[0]
	jitted_model = torch.jit.save(torch.jit.trace(model, (x)), "task_2_trained_model.pth")

Epoch [1/50], Loss: 0.6325
Epoch [2/50], Loss: 0.6579
Epoch [3/50], Loss: 0.6229
Epoch [4/50], Loss: 0.5853
Epoch [5/50], Loss: 0.5212
Epoch [6/50], Loss: 0.5252
Epoch [7/50], Loss: 0.5579
Epoch [8/50], Loss: 0.5720
Epoch [9/50], Loss: 0.5695
Epoch [10/50], Loss: 0.5592
Epoch [11/50], Loss: 0.5077
Epoch [12/50], Loss: 0.5755
Epoch [13/50], Loss: 0.5666
Epoch [14/50], Loss: 0.5651
Epoch [15/50], Loss: 0.5117
Epoch [16/50], Loss: 0.5294
Epoch [17/50], Loss: 0.4314
Epoch [18/50], Loss: 0.4918
Epoch [19/50], Loss: 0.6115
Epoch [20/50], Loss: 0.4687
Epoch [21/50], Loss: 0.3818
Epoch [22/50], Loss: 0.5269
Epoch [23/50], Loss: 0.5106
Epoch [24/50], Loss: 0.4477
Epoch [25/50], Loss: 0.4684
Epoch [26/50], Loss: 0.4353
Epoch [27/50], Loss: 0.3859
Epoch [28/50], Loss: 0.4169
Epoch [29/50], Loss: 0.4703
Epoch [30/50], Loss: 0.4611
Epoch [31/50], Loss: 0.3816
Epoch [32/50], Loss: 0.4796
Epoch [33/50], Loss: 0.3175
Epoch [34/50], Loss: 0.3871
Epoch [35/50], Loss: 0.4650
Epoch [36/50], Loss: 0.3729
E

In [12]:
data = {
    'Education': ['Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Masters', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 
                  'Masters', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Masters', 
                  'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Masters', 'Bachelors', 'Bachelors'],
    'JoiningYear': [2018, 2014, 2018, 2015, 2016, 2017, 2017, 2016, 2018, 2017, 
                    2017, 2014, 2016, 2012, 2016, 2014, 2013, 2014, 2017, 2016, 
                    2015, 2014, 2013, 2016, 2013, 2016, 2016, 2012, 2017, 2012],
    'City': ['New Delhi', 'Bangalore', 'Bangalore', 'Pune', 'Pune', 'New Delhi', 'Pune', 'Bangalore', 'Pune', 'Bangalore', 
             'Pune', 'Pune', 'Bangalore', 'Bangalore', 'Pune', 'Bangalore', 'Pune', 'New Delhi', 'Pune', 'Bangalore', 
             'Bangalore', 'Bangalore', 'Pune', 'New Delhi', 'Bangalore', 'Bangalore', 'Bangalore', 'Bangalore', 'Bangalore', 'New Delhi'],
    'PaymentTier': [2, 3, 3, 2, 3, 2, 2, 3, 3, 3, 
                    3, 3, 3, 3, 3, 3, 1, 3, 2, 3, 
                    3, 1, 2, 2, 3, 3, 3, 3, 1, 3],
    'Age': [34, 34, 26, 30, 23, 37, 29, 40, 32, 34, 
            24, 34, 22, 37, 34, 23, 28, 38, 37, 27, 
            23, 30, 31, 22, 30, 37, 34, 27, 29, 29],
    'Gender': ['Female', 'Female', 'Male', 'Female', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 
               'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Male', 
               'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male'],
     'Marital Status': [
    'Married', 'Married', 'Married', 'Married', 'Married', 'Single', 'Married', 'Married', 'Single', 'Married',
    'Married', 'Married', 'Single', 'Single', 'Single', 'Married', 'Single', 'Married', 'Married', 'Single',
    'Single', 'Married', 'Single', 'Married', 'Single', 'Single', 'Single', 'Married', 'Single', 'Single'],
    'EverBenched': ['No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'No', 
                    'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 
                    'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No'],
    'ExperienceInCurrentDomain': [0, 2, 4, 0, 1, 2, 2, 5, 5, 0, 
                                  2, 4, 0, 4, 3, 1, 3, 2, 0, 5, 
                                  1, 3, 2, 0, 3, 2, 2, 5, 3, 3],
    'LeaveOrNot': [1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 
                   1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 
                   0, 0, 1, 1, 1, 0, 1, 1, 0, 0]
}

task_1a_dataframe = pd.DataFrame(data)
encoded_dataframe2 = data_preprocessing(task_1a_dataframe)


In [13]:
features_and_targets2 = identify_features_and_targets(encoded_dataframe2)

In [14]:
tensors_and_iterable_training_data2 = load_as_tensors(features_and_targets2)
trained_model = torch.jit.load('task_2_trained_model.pth')
model_accuracy = validation_function(trained_model,tensors_and_iterable_training_data2)
print(f"Accuracy is = {model_accuracy}")

Accuracy is = 0.6666666666666666
