# Notebook imports

In [15]:
import time
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline

In [4]:
data = pd.read_csv('./Data/ml-100k/u.data',sep='\t', header=None)
data.columns = ['user_id', 'item_id', 'rating', 'timestamp']
data.drop(labels=['timestamp'], axis=1, inplace=True)
data_processed = pd.get_dummies(data, columns=['user_id','item_id'])
data_processed['rating'] = data_processed['rating'] >= 4
y = data_processed['rating']
X = data_processed.drop(labels=['rating'], axis=1)

X_dev, X_test, y_dev, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

data_processed.head()

Unnamed: 0,rating,user_id_1,user_id_2,user_id_3,user_id_4,user_id_5,user_id_6,user_id_7,user_id_8,user_id_9,...,item_id_1673,item_id_1674,item_id_1675,item_id_1676,item_id_1677,item_id_1678,item_id_1679,item_id_1680,item_id_1681,item_id_1682
0,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
start_time = time.time()
mlp = MLPClassifier(solver='lbfgs', random_state=42, max_iter=1000).fit(X_dev, y_dev)
print(f'Training time: {(time.time() - start_time) / 60} mins')
print(mlp.score(X_dev, y_dev))
print(mlp.score(X_test, y_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training time: 5.837252024809519 mins
0.8169875
0.6877


In [10]:
start_time = time.time()
mlp = MLPClassifier(solver='lbfgs', random_state=42, max_iter=1000, 
                    hidden_layer_sizes=(100, 100, 100))
mlp.fit(X_dev, y_dev)
print(f'Training time: {(time.time() - start_time) / 60} mins')
print(mlp.score(X_dev, y_dev))
print(mlp.score(X_test, y_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training time: 8.471844387054443 mins
0.7254
0.70545


In [11]:
start_time = time.time()
mlp = MLPClassifier(solver='lbfgs', random_state=42,
                    verbose=True, activation='tanh')
mlp.fit(X_dev, y_dev)
print(f'Training time: {(time.time() - start_time) / 60} mins')
print(mlp.score(X_dev, y_dev))
print(mlp.score(X_test, y_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training time: 6.192696289221446 mins
0.7488375
0.7096


In [13]:
start_time = time.time()
mlp = MLPClassifier(solver='lbfgs', random_state=42, 
                    activation='tanh', hidden_layer_sizes=(100, 100, 100))
mlp.fit(X_dev, y_dev)
print(f'Training time: {(time.time() - start_time) / 60} mins')
print(mlp.score(X_dev, y_dev))
print(mlp.score(X_test, y_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training time: 8.721183927853902 mins
0.7470875
0.71265


In [None]:
start_time = time.time()
pipe = make_pipeline(MLPClassifier(solver='lbfgs', random_state=42))
param_grid = {'mlpclassifier_hidden_layer_sizes':
              [(100, 100, 100), (500, 500, 500), 
               (100, 100, 100, 100), (500, 500, 500, 500)]}
grid = GridSearchCV(pipe, param_grid, cv=3, return_train_score=True)
grid.fit(X_dev, y_dev)
print(f'Training time: {(time.time() - start_time) / 60} mins')