### Language Identification with Multilayer Perceptron Classifier (MLPC)

In [1]:
#Check the GPU
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [2]:
#Installing emoji package
!pip install emoji

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
#Importing all necessary packages and libraries

from google.colab import drive
import pandas as pd
import numpy as np
import re
import emoji
from sklearn.feature_extraction.text import CountVectorizer
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.stem import WordNetLemmatizer
from scipy import sparse
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import TransformerMixin, BaseEstimator
import pickle
from sklearn.model_selection import train_test_split
import re
import scipy
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [4]:
#Function to load the train and test datasets to pandas DataFrames
def load_df(file_path):
    df = pd.read_csv(file_path, on_bad_lines="skip", sep = "\t")
    df = pd.DataFrame(np.vstack([df.columns, df]))
    df.rename(columns={0: "text", 1: "language"}, inplace=True)
    return df

In [5]:
#Mounting google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
#Changing the working directory to the drive folder where you store the data
%cd /content/drive/My Drive/ML4NLP_Assignment1

/content/drive/My Drive/ML4NLP_Assignment1


In [7]:
#Loading the train and test datasets to pandas Dataframe
train_df_path = "/content/drive/My Drive/ML4NLP_Assignment1/train_dev_set+-+train_dev_set.csv"
test_df_path = "/content/drive/My Drive/ML4NLP_Assignment1/test_set+-+test_set.csv"
train_df = load_df(train_df_path)
test_df = load_df(test_df_path)

In [8]:
#Assigning the text of tweets to X variable in train and test datasets 
#Assigning the language of tweets to y variable (label) in train and test datasets
X_train = train_df.text
y_train = train_df.language
X_test = test_df.text
y_test = test_df.language

In [9]:
label_encoder = LabelEncoder()
full_y = np.append(y_train, y_test)
full_y = label_encoder.fit_transform(full_y)
y_train = full_y[:y_train.size]
y_test = full_y[y_train.size:]

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

In [11]:
y_train = list(map(int, y_train))
y_val = list(map(int, y_val))
y_test = list(map(int, y_test))

## Defining Extra Features

In [12]:
class Preprocessing(BaseEstimator, TransformerMixin):
    def __init__(self, purpose):
        self.purpose = purpose

    # LOWERCASING
    def convert_lowercase(self, text):
        text = text.str.lower()
        return text

    # REMOVING HTML TAGS AND URLS
    def remove_html_url(self, text):
        re_html = re.compile("<.*?>")
        re_url = re.compile("https?://\S+|www\.\S+")
        wo_html_str = re_html.sub(r"", text)
        wo_html_url_str = re_url.sub("", wo_html_str)
        return wo_html_url_str

    # REMOVING EMOJIS
    def remove_emoji(self, text):
        text = emoji.demojize(text)
        return text

    # LEMMATISATION
    def lemmatisation(self, text):
        lemmatiser = WordNetLemmatizer()
        return lemmatiser.lemmatize(text)

    def fit(self, X, y):
        return self

    def transform(self, X):
        res = X.apply(self.remove_html_url)
        res = res.apply(self.remove_emoji)
        if self.purpose == "preprocess":
            res = self.convert_lowercase(res)
            res = res.apply(self.lemmatisation)
        return res

In [13]:
preprocess = Preprocessing("generate_extra_features")
X_train_extra_feature = preprocess.fit_transform(X_train, y_train)
X_val_extra_feature = preprocess.transform(X_val)
X_test_extra_feature = preprocess.transform(X_test)

In [14]:
# Function to count the number of spaces in a sentence

def count_space(text):
    return text.count(" ")
X_train_num_space = X_train_extra_feature.apply(count_space)
X_val_num_space = X_val_extra_feature.apply(count_space)
X_test_num_space = X_test_extra_feature.apply(count_space)
X_num_space = [X_train_num_space, X_val_num_space, X_test_num_space]

In [15]:
#Function to calculate the Average Word Length 
def avg_word_len(text):
    text = re.sub(r"[^\w\s]", "", text)
    num_char = len(text) - text.count(" ")
    num_word = len(text.split())
    try:
        res = num_char/num_word
    except:
        res = 0
    return res
X_train_avg_word_len = X_train_extra_feature.apply(avg_word_len)
X_val_avg_word_len = X_val_extra_feature.apply(avg_word_len)
X_test_avg_word_len = X_test_extra_feature.apply(avg_word_len)
X_avg_word_len = [X_train_avg_word_len, X_val_avg_word_len, X_test_avg_word_len]

In [16]:
# Function to count the number of capital letters in a sentence

def num_capital(text):
    return len(re.findall(r"[A-Z]", text))
X_train_num_capital = X_train_extra_feature.apply(num_capital)
X_val_num_capital = X_val_extra_feature.apply(num_capital)
X_test_num_capital = X_test_extra_feature.apply(num_capital)
X_num_capital = [X_train_num_capital, X_val_num_capital, X_test_num_capital]

## Preprocessing

In [17]:
class ExtraFeature(BaseEstimator, TransformerMixin):
    def __init__(self, X_feature):
        self.X_feature = X_feature


    def fit(self, X, y):
        return self

    def transform(self, X):
        features = []
        if X.shape[0] == len(self.X_feature[0]):
            features.append(self.X_feature[0])
        elif X.shape[0] == len(self.X_feature[1]):
            features.append(self.X_feature[1])
        elif X.shape[0] == len(self.X_feature[2]):
            features.append(self.X_feature[2])
        return np.array(features).T
    
    # def get_feature_names_out(self):
    #     return np.array([self.feature_name])

## Saving pre-processed data


In [18]:
count_vect = Preprocessing("preprocess")
X_train_1 = count_vect.fit_transform(X_train, y_train)
X_val_1 = count_vect.transform(X_val)
X_test_1 = count_vect.transform(X_test)

In [19]:
count_vect = CountVectorizer()
X_train_2 = count_vect.fit_transform(X_train_1)
X_val_2 = count_vect.transform(X_val_1)
X_test_2 = count_vect.transform(X_test_1)

In [20]:
union = FeatureUnion([("tfidf", TfidfTransformer()), \
                    ("num_space", ExtraFeature(X_num_space)), \
                    ("avg_word_len", ExtraFeature(X_avg_word_len)), \
                    ("num_capital", ExtraFeature(X_num_capital))])
union_fit_train = union.fit_transform(X_train_2, y_train)
union_fit_val = union.transform(X_val_2)
union_fit_test = union.transform(X_test_2)

In [21]:
scipy.sparse.save_npz("/content/drive/My Drive/ML4NLP_Assignment1/X_train2.npz", union_fit_train)
scipy.sparse.save_npz("/content/drive/My Drive/ML4NLP_Assignment1/X_val2.npz", union_fit_val)
scipy.sparse.save_npz("/content/drive/My Drive/ML4NLP_Assignment1/X_test2.npz", union_fit_test)

## Loading pre-processed data

In [22]:
import scipy.sparse

In [23]:
#We are loading the pre-processed train, test, and validation datasets
X_train = scipy.sparse.load_npz("X_train2.npz")
X_test = scipy.sparse.load_npz("X_test2.npz")
X_val = scipy.sparse.load_npz("X_val2.npz")

## Implementing and training the MLPClassifier

In [24]:
from sklearn.neural_network import MLPClassifier

## First set of parameters

In [None]:
#We are trying out the first combination of parameters for the MLPC on the Train dataset
clf = MLPClassifier(activation='relu', solver='adam', learning_rate='constant', max_iter=20)
clf.fit(X_train, y_train)



MLPClassifier(max_iter=20)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Function calculate the accuracy on our predictions on the validation dataset
def accuracy(confusion_matrix):
   diagonal_sum = confusion_matrix.trace()
   sum_of_all_elements = confusion_matrix.sum()
   return diagonal_sum / sum_of_all_elements

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.869969627942293


## Different set of hyperparameters

In [None]:
#We try a different learning rate
clf = MLPClassifier(activation='relu', solver='adam', learning_rate='adaptive', max_iter=20)
clf.fit(X_train, y_train)



MLPClassifier(learning_rate='adaptive', max_iter=20)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8593394077448747


## Different set of hyperparameters

In [None]:
#We try a different solver
clf = MLPClassifier(activation='relu', solver='sgd', learning_rate='adaptive', max_iter=20, verbose=True, early_stopping=True)
clf.fit(X_train, y_train)

Iteration 1, loss = 2.29290719
Validation score: 0.497996
Iteration 2, loss = 1.91293485
Validation score: 0.492301
Iteration 3, loss = 1.85840961
Validation score: 0.496098
Iteration 4, loss = 1.83054102
Validation score: 0.495254
Iteration 5, loss = 1.81349379
Validation score: 0.499895
Iteration 6, loss = 1.80023308
Validation score: 0.497996
Iteration 7, loss = 1.78889639
Validation score: 0.494621
Iteration 8, loss = 1.77965954
Validation score: 0.496098
Iteration 9, loss = 1.77175370
Validation score: 0.500527
Iteration 10, loss = 1.76410064
Validation score: 0.502426
Iteration 11, loss = 1.75695260
Validation score: 0.500527
Iteration 12, loss = 1.75052056
Validation score: 0.502004
Iteration 13, loss = 1.74418907
Validation score: 0.505590
Iteration 14, loss = 1.73787278
Validation score: 0.504746
Iteration 15, loss = 1.73288347
Validation score: 0.503480
Iteration 16, loss = 1.72726457
Validation score: 0.503480
Iteration 17, loss = 1.72261032
Validation score: 0.506433
Iterat



MLPClassifier(early_stopping=True, learning_rate='adaptive', max_iter=20,
              solver='sgd', verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.5186028853454822


## Different set of hyperparameters

In [None]:
#We try a different activation function
clf = MLPClassifier(activation='tanh', solver='adam', learning_rate='constant', max_iter=20, verbose=True)

In [None]:
clf.fit(X_train, y_train)

Iteration 1, loss = 1.70415255
Iteration 2, loss = 0.88865955
Iteration 3, loss = 0.54529338
Iteration 4, loss = 0.32236137
Iteration 5, loss = 0.19160281
Iteration 6, loss = 0.12182755
Iteration 7, loss = 0.08376082
Iteration 8, loss = 0.06208866
Iteration 9, loss = 0.04911463
Iteration 10, loss = 0.04035194
Iteration 11, loss = 0.03477388
Iteration 12, loss = 0.02975043
Iteration 13, loss = 0.02625765
Iteration 14, loss = 0.02645950
Iteration 15, loss = 0.02329319
Iteration 16, loss = 0.02183325
Iteration 17, loss = 0.02144269
Iteration 18, loss = 0.02017548
Iteration 19, loss = 0.01844078
Iteration 20, loss = 0.01875031




MLPClassifier(activation='tanh', max_iter=20, verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8652239939255885


## Different set of hyperparameters

In [None]:
#We try different hidden layers arrangement 
clf = MLPClassifier(activation='relu', solver='adam', learning_rate='constant', max_iter=20, verbose=True, hidden_layer_sizes = (45,2,11))

In [None]:
clf.fit(X_train, y_train)

Iteration 1, loss = 2.72139405
Iteration 2, loss = 2.05269069
Iteration 3, loss = 1.93477353
Iteration 4, loss = 1.74244210
Iteration 5, loss = 1.40683003
Iteration 6, loss = 1.21251371
Iteration 7, loss = 1.09295559
Iteration 8, loss = 1.00493040
Iteration 9, loss = 0.93313048
Iteration 10, loss = 0.88030748
Iteration 11, loss = 0.83810530
Iteration 12, loss = 0.80250620
Iteration 13, loss = 0.77098082
Iteration 14, loss = 0.74107704
Iteration 15, loss = 0.71891657
Iteration 16, loss = 0.69721402
Iteration 17, loss = 0.67877613
Iteration 18, loss = 0.66962503
Iteration 19, loss = 0.65894824
Iteration 20, loss = 0.64686639




MLPClassifier(hidden_layer_sizes=(45, 2, 11), max_iter=20, verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.5263857251328777


## Trying out more training iterations with the best combination of hyperparameters

In [None]:
#Allowing for 100 training iterations
clf = MLPClassifier(activation='relu', solver='adam', learning_rate='constant', max_iter=100, verbose=True)
clf.fit(X_train, y_train)

Iteration 1, loss = 1.86643251
Iteration 2, loss = 1.04719360
Iteration 3, loss = 0.71290731
Iteration 4, loss = 0.50117845
Iteration 5, loss = 0.35464935
Iteration 6, loss = 0.25571680
Iteration 7, loss = 0.18787229
Iteration 8, loss = 0.14495395
Iteration 9, loss = 0.11123667
Iteration 10, loss = 0.08997660
Iteration 11, loss = 0.07498049
Iteration 12, loss = 0.06319099
Iteration 13, loss = 0.05650948
Iteration 14, loss = 0.05521076
Iteration 15, loss = 0.04220891
Iteration 16, loss = 0.03804936
Iteration 17, loss = 0.03520397
Iteration 18, loss = 0.03178273
Iteration 19, loss = 0.03157789
Iteration 20, loss = 0.04013598
Iteration 21, loss = 0.03112189
Iteration 22, loss = 0.03640576
Iteration 23, loss = 0.02979038
Iteration 24, loss = 0.02676703
Iteration 25, loss = 0.02483379
Iteration 26, loss = 0.02297110
Iteration 27, loss = 0.02380501
Iteration 28, loss = 0.02174981
Iteration 29, loss = 0.02130688
Iteration 30, loss = 0.02045635
Iteration 31, loss = 0.01872849
Iteration 32, los



MLPClassifier(max_iter=100, verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8686408504176157


## Optimizing the hyperparameters via GridSearchCV

In [None]:
mlp_gs = MLPClassifier(max_iter=1)

In [None]:
#Setting the parameters we want to test
parameter_space = {
    'hidden_layer_sizes': [(10,30,10),(20,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'momentum': np.arange(0.1, 1.1, 0.1),
    'alpha': [0.1, 0.05],
    'learning_rate': ['constant','adaptive'],
}

In [None]:
#Implementing the GridSearch algorithm
grid = GridSearchCV(mlp_gs, parameter_space, n_jobs=-1, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)



GridSearchCV(cv=5, estimator=MLPClassifier(max_iter=1), n_jobs=-1,
             param_grid={'activation': ['tanh', 'relu'], 'alpha': [0.1, 0.05],
                         'hidden_layer_sizes': [(10, 30, 10), (20,)],
                         'learning_rate': ['constant', 'adaptive'],
                         'momentum': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
                         'solver': ['sgd', 'adam']},
             scoring='accuracy')

In [None]:
#Displaying the best combination of parameters found via GridSearchCV
print('Best parameters found:\n', grid.best_params_)

Best parameters found:
 {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (20,), 'learning_rate': 'constant', 'momentum': 0.8, 'solver': 'adam'}


## Repeating the training with the hyperparametes found by GridSearchCv

In [None]:
clf = MLPClassifier(activation='relu', solver='adam', alpha=0.0001, learning_rate='constant', momentum = 0.8,  max_iter=20, verbose=True)

In [None]:
clf.fit(X_train, y_train)

Iteration 1, loss = 1.84114316
Iteration 2, loss = 1.03768488
Iteration 3, loss = 0.72586904
Iteration 4, loss = 0.52004368
Iteration 5, loss = 0.37168588
Iteration 6, loss = 0.26343380
Iteration 7, loss = 0.19423632
Iteration 8, loss = 0.14691283
Iteration 9, loss = 0.11809418
Iteration 10, loss = 0.09419642
Iteration 11, loss = 0.07597847
Iteration 12, loss = 0.06326646
Iteration 13, loss = 0.05727993
Iteration 14, loss = 0.05380513
Iteration 15, loss = 0.04255692
Iteration 16, loss = 0.03924480
Iteration 17, loss = 0.03641198
Iteration 18, loss = 0.03153515
Iteration 19, loss = 0.03200428
Iteration 20, loss = 0.02847202




MLPClassifier(max_iter=20, momentum=0.8, verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8669324221716022


## Repeating the GridSearch, with a different hyperparameters space

In [None]:
mlp_gs = MLPClassifier(max_iter=1)

In [None]:
#Now we want to check different hidden layers arrangements, and activation functions
parameter_space = {
    'hidden_layer_sizes': [(10,30,10),(50,50,50),(50,100,50)],
    'activation': ['tanh', 'relu'],
}

In [None]:
#Implementing the GridSearch algorithm
grid = GridSearchCV(mlp_gs, parameter_space, n_jobs=-1, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)



GridSearchCV(cv=5, estimator=MLPClassifier(max_iter=1), n_jobs=-1,
             param_grid={'activation': ['tanh', 'relu'],
                         'hidden_layer_sizes': [(10, 30, 10), (50, 50, 50),
                                                (50, 100, 50)]},
             scoring='accuracy')

In [None]:
#Displaying the best combination of parameters found via GridSearchCV
print('Best parameters found:\n', grid.best_params_)

Best parameters found:
 {'activation': 'tanh', 'hidden_layer_sizes': (50, 100, 50)}


## Repeating the training with the hyperparameters newly found via GridSearch

In [None]:
clf = MLPClassifier(activation='tanh', solver='adam', alpha=0.0001, learning_rate='constant', momentum = 0.8,  max_iter=20, hidden_layer_sizes = (50, 100, 50), verbose=True)

In [None]:
clf.fit(X_train, y_train)

Iteration 1, loss = 1.66349406
Iteration 2, loss = 0.64929960
Iteration 3, loss = 0.26051677
Iteration 4, loss = 0.11499098
Iteration 5, loss = 0.06915517
Iteration 6, loss = 0.05184725
Iteration 7, loss = 0.05603417
Iteration 8, loss = 0.04884299
Iteration 9, loss = 0.03092477
Iteration 10, loss = 0.02520644
Iteration 11, loss = 0.02124375
Iteration 12, loss = 0.01834753
Iteration 13, loss = 0.01840530
Iteration 14, loss = 0.01528731
Iteration 15, loss = 0.01462724
Iteration 16, loss = 0.01453732
Iteration 17, loss = 0.01474474
Iteration 18, loss = 0.01260616
Iteration 19, loss = 0.01218674
Iteration 20, loss = 0.01252766




MLPClassifier(activation='tanh', hidden_layer_sizes=(50, 100, 50), max_iter=20,
              momentum=0.8, verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.863705391040243


## Trying a different hidden layers arrangement 

In [None]:
#We re-implement the classifer with the following hidden layer sizes: 100, 200, 100
clf = MLPClassifier(activation='tanh', solver='adam', alpha=0.0001, learning_rate='constant', momentum = 0.8,  max_iter=20, hidden_layer_sizes = (100, 200, 100), verbose=True)

In [None]:
clf.fit(X_train, y_train)

Iteration 1, loss = 1.29984218
Iteration 2, loss = 0.43278266
Iteration 3, loss = 0.14477420
Iteration 4, loss = 0.05739199
Iteration 5, loss = 0.03672329
Iteration 6, loss = 0.02412209
Iteration 7, loss = 0.02011321
Iteration 8, loss = 0.01743804
Iteration 9, loss = 0.01570441
Iteration 10, loss = 0.01397362
Iteration 11, loss = 0.01356790
Iteration 12, loss = 0.01291282
Iteration 13, loss = 0.01325305
Iteration 14, loss = 0.01097549
Iteration 15, loss = 0.01026714
Iteration 16, loss = 0.01166393
Iteration 17, loss = 0.01099745
Iteration 18, loss = 0.01449707
Iteration 19, loss = 0.01298245
Iteration 20, loss = 0.01139396




MLPClassifier(activation='tanh', hidden_layer_sizes=(100, 200, 100),
              max_iter=20, momentum=0.8, verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8654138192862566


## Repeating the GridSearch with different hidden layer arrangements to investigate

In [None]:
mlp_gs = MLPClassifier(max_iter=1)

In [None]:
#Now we want to check different hidden layers arrangements and different alpha parameter values
parameter_space = {
    'hidden_layer_sizes': [(50,100,50), (100,200,100), (50,100,100,50)],
    'alpha': [0.0001, 0.001, 0.1],
}

In [None]:
grid = GridSearchCV(mlp_gs, parameter_space, n_jobs=-1, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)



GridSearchCV(cv=5, estimator=MLPClassifier(max_iter=1), n_jobs=-1,
             param_grid={'alpha': [0.0001, 0.001, 0.1],
                         'hidden_layer_sizes': [(50, 100, 50), (100, 200, 100),
                                                (50, 100, 100, 50)]},
             scoring='accuracy')

In [None]:
#Displaying the best combination of parameters found via GridSearchCV
print('Best parameters found:\n', grid.best_params_)

Best parameters found:
 {'alpha': 0.0001, 'hidden_layer_sizes': (100, 200, 100)}


## Repeating the training with the hyperparameters newly found via GridSearch

In [None]:
clf = MLPClassifier(activation='relu', solver='adam', alpha=0.0001, learning_rate='constant', momentum = 0.8,  max_iter=50, hidden_layer_sizes = (100, 200, 100), verbose=True)

In [None]:
clf.fit(X_train, y_train)

Iteration 1, loss = 1.56659767
Iteration 2, loss = 0.60225747
Iteration 3, loss = 0.23427524
Iteration 4, loss = 0.16593730
Iteration 5, loss = 0.09740545
Iteration 6, loss = 0.05014438
Iteration 7, loss = 0.07440469
Iteration 8, loss = 0.03056804
Iteration 9, loss = 0.02555814
Iteration 10, loss = 0.03492749
Iteration 11, loss = 0.02399094
Iteration 12, loss = 0.01818937
Iteration 13, loss = 0.01814736
Iteration 14, loss = 0.01779146
Iteration 15, loss = 0.01594482
Iteration 16, loss = 0.03517959
Iteration 17, loss = 0.02166577
Iteration 18, loss = 0.01362684
Iteration 19, loss = 0.01502089
Iteration 20, loss = 0.01212371
Iteration 21, loss = 0.01150019
Iteration 22, loss = 0.03835310
Iteration 23, loss = 0.01441109
Iteration 24, loss = 0.01106157
Iteration 25, loss = 0.00819181
Iteration 26, loss = 0.04096672
Iteration 27, loss = 0.03085683
Iteration 28, loss = 0.01299028
Iteration 29, loss = 0.01010163
Iteration 30, loss = 0.00752485
Iteration 31, loss = 0.00788954
Iteration 32, los



MLPClassifier(hidden_layer_sizes=(100, 200, 100), max_iter=50, momentum=0.8,
              verbose=True)

In [None]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [None]:
#Building the confusion matrix to enable accuracy analysis
cm = confusion_matrix(y_pred, y_val)

In [None]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8602885345482156


## Repeat the traning with the best combination of hyperparameters found, save the model, test on the test dataset

In [26]:
#We are trying out the first combination of parameters for the MLPC on the Train dataset
clf = MLPClassifier(activation='relu', solver='adam', max_iter=100, verbose=True)
clf.fit(X_train, y_train)

Iteration 1, loss = 1.84235211
Iteration 2, loss = 1.03498724
Iteration 3, loss = 0.71169678
Iteration 4, loss = 0.51038777
Iteration 5, loss = 0.36557818
Iteration 6, loss = 0.26555945
Iteration 7, loss = 0.19551413
Iteration 8, loss = 0.14825969
Iteration 9, loss = 0.11698325
Iteration 10, loss = 0.09192101
Iteration 11, loss = 0.07899369
Iteration 12, loss = 0.06669863
Iteration 13, loss = 0.06022274
Iteration 14, loss = 0.04892317
Iteration 15, loss = 0.04478357
Iteration 16, loss = 0.04214142
Iteration 17, loss = 0.04469740
Iteration 18, loss = 0.03468129
Iteration 19, loss = 0.03157869
Iteration 20, loss = 0.02952970
Iteration 21, loss = 0.02794740
Iteration 22, loss = 0.03586552
Iteration 23, loss = 0.02536344
Iteration 24, loss = 0.02354640
Iteration 25, loss = 0.02417799
Iteration 26, loss = 0.02294071
Iteration 27, loss = 0.03124970
Iteration 28, loss = 0.02697228
Iteration 29, loss = 0.02118440
Iteration 30, loss = 0.02133598
Iteration 31, loss = 0.01949051
Iteration 32, los



MLPClassifier(max_iter=100, verbose=True)

In [27]:
#Making predictions on the validation dataset
y_pred = clf.predict(X_val)

In [28]:
#Building the confusion matrix to enable accuracy analysis
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_pred, y_val)

In [29]:
#Function calculate the accuracy on our predictions on the validation dataset
def accuracy(confusion_matrix):
   diagonal_sum = confusion_matrix.trace()
   sum_of_all_elements = confusion_matrix.sum()
   return diagonal_sum / sum_of_all_elements

In [30]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8714882308276386


In [31]:
import pickle

In [32]:
pickle.dump(clf, open("/content/drive/My Drive/ML4NLP_Assignment1/v3_model_mlp_bestparams.sav", "wb"))

## Load the saved model

In [33]:
loaded_model = pickle.load(open("/content/drive/My Drive/ML4NLP_Assignment1/v3_model_mlp_bestparams.sav", "rb"))

In [34]:
y_pred = loaded_model.predict(X_val)

In [None]:
#y_val

In [35]:
print(y_pred)

[32 32 50 ... 32 13 66]


In [36]:
#Building the confusion matrix to enable accuracy analysis
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_pred, y_val)

In [37]:
print(cm)

[[188   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   2   0]
 [  0   0   0 ...   0   0   0]]


In [38]:
#Function calculate the accuracy on our predictions on the validation dataset
def accuracy(confusion_matrix):
   diagonal_sum = confusion_matrix.trace()
   sum_of_all_elements = confusion_matrix.sum()
   return diagonal_sum / sum_of_all_elements

In [39]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8714882308276386


## Testing on the test dataset (y_test)

In [40]:
y_test_pred = loaded_model.predict(X_test)

In [41]:
#Building the confusion matrix to enable accuracy analysis
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test_pred, y_test)

In [42]:
#Displaying the accuracy of our classifier
print("Accuracy of MLPClassifier : " + str(accuracy(cm)))

Accuracy of MLPClassifier : 0.8628765060240964
