<a href="https://colab.research.google.com/github/jan-kreischer/UZH_ML4NLP/blob/main/Project-01/submission/ex01_mlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exercise 01 - Part 02

## Importing all the libraries

In [11]:
import csv
import re
import numpy as np

import pandas as pd
pd.set_option('display.max_rows', 200)  
pd.set_option('display.max_columns', 200)   
pd.set_option('display.width', 4000) 

from io import StringIO
import requests
import matplotlib.pyplot as plt

import warnings

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

warnings.filterwarnings('ignore')

## 1. Data Acquisition
In this assignment we are not going to do all the data cleaning and preprocessing again.  
We are just loading the saved dataset from the first exercise.

In [12]:
dataset = pd.read_csv('./dataset.csv')

In [13]:
dataset.shape

(67642, 2)

## 2. Data Preparation

In [14]:
TARGET_COLUMN = 'label'
TWEET_COLUMN = 'tweet'

In [15]:
X = dataset[TWEET_COLUMN]
y = dataset[TARGET_COLUMN]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)

In [16]:
# Vectorize with ngram_range 1 to 3
vectorizer = TfidfVectorizer(analyzer='char', ngram_range=(1,3))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [17]:
print(type(X_train_vec))

<class 'scipy.sparse.csr.csr_matrix'>


## 3. Model Training

In [None]:
# All parameters we are individually testing
# If the computational power would be high enough we could
# use GridSearchCV to easily find the best hyperparameters
# However running this grid search CV exceeds colabs max runtime.
parameters = {
        'hidden_layer_sizes': [100, 500],
        'solver': ['adam', 'sgd'],
        'activation': ['tanh', 'relu'],
}

## Run by Jan

### Configuration 01

In [None]:
mlp_clf = MLPClassifier(early_stopping=True, hidden_layer_sizes=(100), solver='adam', activation='tanh', max_iter=100, verbose=True)
mlp_clf.fit(X_train_vec, y_train)

Iteration 1, loss = 1.35136123
Validation score: 0.864488
Iteration 2, loss = 0.38750770
Validation score: 0.898160
Iteration 3, loss = 0.20907587
Validation score: 0.912779
Iteration 4, loss = 0.12316765
Validation score: 0.914586
Iteration 5, loss = 0.07781613
Validation score: 0.918857
Iteration 6, loss = 0.05258100
Validation score: 0.919021
Iteration 7, loss = 0.03794585
Validation score: 0.919678
Iteration 8, loss = 0.02905713
Validation score: 0.919021
Iteration 9, loss = 0.02351217
Validation score: 0.918693
Iteration 10, loss = 0.01983701
Validation score: 0.916064
Iteration 11, loss = 0.01737863
Validation score: 0.917050
Iteration 12, loss = 0.01551853
Validation score: 0.917050
Iteration 13, loss = 0.01421724
Validation score: 0.915243
Iteration 14, loss = 0.01312494
Validation score: 0.915079
Iteration 15, loss = 0.01225299
Validation score: 0.914586
Iteration 16, loss = 0.01154947
Validation score: 0.914750
Iteration 17, loss = 0.01087968
Validation score: 0.915900
Iterat

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=100, learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=100,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=True,
              warm_start=False)

In [None]:
print(classification_report(y_test, mlp_clf.predict(X_test_vec)))

              precision    recall  f1-score   support

          ar       0.98      0.98      0.98       256
      arlatn       1.00      1.00      1.00         1
          az       1.00      1.00      1.00        10
          bg       1.00      1.00      1.00         2
          bn       1.00      1.00      1.00         5
          bs       1.00      1.00      1.00        11
          ca       0.00      0.00      0.00         4
          cs       1.00      1.00      1.00         4
          cy       1.00      1.00      1.00         3
          da       1.00      1.00      1.00         4
          de       1.00      0.79      0.88        28
          dv       1.00      1.00      1.00         8
          el       1.00      0.80      0.89         5
          en       0.93      0.97      0.95      2357
          es       0.92      0.95      0.93       728
          et       1.00      1.00      1.00         3
          eu       1.00      1.00      1.00         3
          fa       1.00    

### Configuration 02

In [None]:
mlp_clf = MLPClassifier(early_stopping=True, hidden_layer_sizes=(100), solver='adam', activation='relu', max_iter=100, verbose=True)
mlp_clf.fit(X_train_vec, y_train)

Iteration 1, loss = 1.44080992
Validation score: 0.869087
Iteration 2, loss = 0.40336973
Validation score: 0.904895
Iteration 3, loss = 0.22184401
Validation score: 0.920828
Iteration 4, loss = 0.13248716
Validation score: 0.926248
Iteration 5, loss = 0.08481837
Validation score: 0.926577
Iteration 6, loss = 0.05784939
Validation score: 0.926413
Iteration 7, loss = 0.04195377
Validation score: 0.926248
Iteration 8, loss = 0.03211862
Validation score: 0.927070
Iteration 9, loss = 0.02582995
Validation score: 0.925756
Iteration 10, loss = 0.02167531
Validation score: 0.924606
Iteration 11, loss = 0.01884304
Validation score: 0.925099
Iteration 12, loss = 0.01690268
Validation score: 0.924770
Iteration 13, loss = 0.01537346
Validation score: 0.924606
Iteration 14, loss = 0.01423894
Validation score: 0.924606
Iteration 15, loss = 0.01328507
Validation score: 0.924934
Iteration 16, loss = 0.01248132
Validation score: 0.924934
Iteration 17, loss = 0.01183086
Validation score: 0.923784
Iterat

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=100, learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=100,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=True,
              warm_start=False)

In [None]:
print(classification_report(y_test, mlp_clf.predict(X_test_vec)))

              precision    recall  f1-score   support

          ar       0.98      0.97      0.98       256
      arlatn       1.00      1.00      1.00         1
          az       1.00      1.00      1.00        10
          bg       1.00      1.00      1.00         2
          bn       1.00      1.00      1.00         5
          bs       1.00      1.00      1.00        11
          ca       0.00      0.00      0.00         4
          cs       1.00      1.00      1.00         4
          cy       1.00      1.00      1.00         3
          da       1.00      1.00      1.00         4
          de       1.00      0.82      0.90        28
          dv       1.00      1.00      1.00         8
          el       1.00      0.80      0.89         5
          en       0.93      0.96      0.95      2357
          es       0.92      0.95      0.94       728
          et       1.00      1.00      1.00         3
          eu       1.00      1.00      1.00         3
          fa       1.00    

### Configuration 03

In [None]:
mlp_clf = MLPClassifier(early_stopping=True, hidden_layer_sizes=(100), solver='sgd', activation='tanh', max_iter=100, verbose=True)
mlp_clf.fit(X_train_vec, y_train)

Iteration 1, loss = 3.83790675
Validation score: 0.346091
Iteration 2, loss = 2.80005137
Validation score: 0.346091
Iteration 3, loss = 2.43424634
Validation score: 0.346091
Iteration 4, loss = 2.29328226
Validation score: 0.346091
Iteration 5, loss = 2.21992028
Validation score: 0.346091
Iteration 6, loss = 2.16930773
Validation score: 0.366130
Iteration 7, loss = 2.12569797
Validation score: 0.480289
Iteration 8, loss = 2.08346862
Validation score: 0.515769
Iteration 9, loss = 2.04120553
Validation score: 0.526938
Iteration 10, loss = 1.99883010
Validation score: 0.530388
Iteration 11, loss = 1.95691272
Validation score: 0.529074
Iteration 12, loss = 1.91601130
Validation score: 0.528581
Iteration 13, loss = 1.87660695
Validation score: 0.527431
Iteration 14, loss = 1.83893518
Validation score: 0.527102
Iteration 15, loss = 1.80301735
Validation score: 0.527102
Iteration 16, loss = 1.76876771
Validation score: 0.525953
Iteration 17, loss = 1.73596789
Validation score: 0.527267
Iterat

In [None]:
print(classification_report(y_test, mlp_clf.predict(X_test_vec)))

              precision    recall  f1-score   support

          ar       0.97      0.95      0.96       274
      arlatn       0.00      0.00      0.00         5
          az       0.00      0.00      0.00         5
          bg       0.00      0.00      0.00         3
          bn       0.00      0.00      0.00         2
          bs       0.00      0.00      0.00         5
          ca       0.00      0.00      0.00         3
          cs       0.00      0.00      0.00         5
          cy       0.00      0.00      0.00         5
          da       0.00      0.00      0.00         1
          de       0.00      0.00      0.00        19
          dv       0.00      0.00      0.00         2
          el       0.00      0.00      0.00         1
          en       0.84      0.97      0.90      2355
          es       0.67      0.90      0.77       763
          et       0.00      0.00      0.00         4
          eu       0.00      0.00      0.00         5
          fi       0.00    

### Configuration 04

In [None]:
mlp_clf = MLPClassifier(early_stopping=True, hidden_layer_sizes=(100), solver='sgd', activation='relu', max_iter=100, verbose=True)
mlp_clf.fit(X_train_vec, y_train)

Iteration 1, loss = 4.02919301
Validation score: 0.346748
Iteration 2, loss = 3.14377492
Validation score: 0.346748
Iteration 3, loss = 2.56392484
Validation score: 0.346748
Iteration 4, loss = 2.35278089
Validation score: 0.346748
Iteration 5, loss = 2.26528485
Validation score: 0.346748
Iteration 6, loss = 2.21389089
Validation score: 0.346748
Iteration 7, loss = 2.17213758
Validation score: 0.348883
Iteration 8, loss = 2.13207392
Validation score: 0.453515
Iteration 9, loss = 2.09104169
Validation score: 0.498357
Iteration 10, loss = 2.04880234
Validation score: 0.517247
Iteration 11, loss = 2.00590778
Validation score: 0.521189
Iteration 12, loss = 1.96306010
Validation score: 0.520696
Iteration 13, loss = 1.92126695
Validation score: 0.519054
Iteration 14, loss = 1.88095186
Validation score: 0.518397
Iteration 15, loss = 1.84241353
Validation score: 0.518233
Iteration 16, loss = 1.80577731
Validation score: 0.518068
Iteration 17, loss = 1.77081258
Validation score: 0.519218
Iterat

**This code crashed after running for around 5 hours and we were not able to print the classification report. However, since the validation score is just at 78% eprcent after 91 iterations, we will not rerun it since it wont achieve the performance of the models being trained with Adam.**

In [None]:
classification_report(y_test, mlp_clf.predict(X_test_vec))