# Neural Networks

In [None]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPRegressor
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

In [None]:
rng = np.random.RandomState(2)

## Read in datasets and split them

In [None]:
import os
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')
    base_dir = "./drive/My Drive/Colab Notebooks/" # You may need to change this, depending on where your notebooks are on Google Drive
else:
    base_dir = "."
dataset_dir = os.path.join(base_dir, "datasets")

In [None]:
df_housing = pd.read_csv(os.path.join(dataset_dir, "housing.csv"))

In [None]:
features = ["BasementArea", "GroundFloorArea", "Bedrooms", "Condition", ]

X_housing = df_housing[features].values
y_housing = df_housing["SalePrice"].values

In [None]:
X_train_housing, X_test_housing, y_train_housing, y_test_housing = \
    train_test_split(X_housing, y_housing, test_size=0.2, random_state=rng)

In [None]:
df_cs1109 = pd.read_csv(os.path.join(dataset_dir, "cs1109.csv"))

In [None]:
features = ["lect", "lab"]

X_cs1109 = df_cs1109[features]

label_encoder = LabelEncoder()
y_cs1109 = label_encoder.fit_transform(df_cs1109["outcome"])

In [None]:
X_train_cs1109, X_test_cs1109, y_train_cs1109, y_test_cs1109 = \
    train_test_split(X_cs1109, y_cs1109, test_size=0.2, stratify=df_cs1109["outcome"], random_state=rng)

In [None]:
# Load the dataset (a dictionary) and get the features DataFrame and target values from the dictionary
iris = load_iris(as_frame=True)
iris_df = iris.data
iris_y = iris.target

# Split off the test set: 20% of the dataset.
train_iris_df, test_iris_df, train_iris_y, test_iris_y = \
    train_test_split(iris_df, iris_y, test_size=0.2, stratify=iris_y, random_state=rng)

# Convert to numpy arrays
X_train_iris = train_iris_df.values
y_train_iris = train_iris_y.values
X_test_iris = test_iris_df.values
y_test_iris = test_iris_y.values

In [None]:
def check_fit(model, X_train, y_train, cv, metric):
    scores = cross_validate(model, X_train, y_train, cv=cv, scoring=metric, return_train_score=True, n_jobs=-1)
    return scores["train_score"].mean(), scores["test_score"].mean()

## Models for Comparison

In [None]:
linear_model = LinearRegression()

logistic_model = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", LogisticRegression(penalty=None, random_state=rng))
])

## Neural Networks in scikit-learn

scikit-learn's support for  neural networks has improved a lot with the introduction of two classes `MLPRegressor` and `MLPClassifier`. These classes are great for structured datasets (tabular datasets). We can put the `MLPRegressor` or `MLPClassifier` at the end of a pipeline that exploits sckit-learn's fantastic set of preprocessing classes (scalers, missing value imputers, and so on).

When it comes to unstructured datasets (e.g. images), we would not currently consider using scikit-learn. We will use keras (next lecture).

Both `MLPRegressor` and `MLPClassifier` have lots of arguments that we can set to control the training - but mostly we will stick with their default values. In any case, I'm not trying to get the best performance - I'm just showing you what the code looks like.

### Regression on housing data

In [None]:
# Linear Regression - for comparison

train_err, val_err = check_fit(linear_model, X_train_housing, y_train_housing, cv=10, metric="neg_mean_absolute_error")
train_err, val_err

In [None]:
# We'll try  a neural network with one hidden layer with 8 neurons in that layer.

regressor_network = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", MLPRegressor(hidden_layer_sizes=[8], max_iter=800, learning_rate_init=0.1, random_state=rng))
])

In [None]:
train_err, val_err = check_fit(regressor_network, X_train_housing, y_train_housing, cv=10, metric="neg_mean_absolute_error")
train_err, val_err

In [None]:
# Now we'll try  a neural network with two hidden layers with 8 neurons in each layer.

regressor_network = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", MLPRegressor(hidden_layer_sizes=[8, 8], max_iter=800, learning_rate_init=0.1, random_state=rng))
])

In [None]:
train_err, val_err = check_fit(regressor_network, X_train_housing, y_train_housing, cv=10, metric="neg_mean_absolute_error")
train_err, val_err

### Binary classification on student data

In [None]:
# Logistic Regression - for comparison

train_acc, val_acc = check_fit(logistic_model, X_train_cs1109, y_train_cs1109, cv=10, metric="accuracy")
train_acc, val_acc

In [None]:
# We'll try a neural network with one hidden layer with 8 neurons in that layer.

binary_classifier_network = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", MLPClassifier(hidden_layer_sizes=[8], max_iter=800, learning_rate_init=0.1, random_state=rng))
])

In [None]:
train_acc, val_acc = check_fit(binary_classifier_network, X_train_cs1109, y_train_cs1109, cv=10, metric="accuracy")
train_acc, val_acc

In [None]:
# Now we'll try a neural network with two hiddens layer with 8 and 4 neurons respetively.

binary_classifier_network = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", MLPClassifier(hidden_layer_sizes=[8,4], max_iter=800, learning_rate_init=0.1, random_state=rng))
])

In [None]:
train_acc, val_acc = check_fit(binary_classifier_network, X_train_cs1109, y_train_cs1109, cv=10, metric="accuracy")
train_acc, val_acc

### Multi-class classification on Iris data

In [None]:
# Logistic Regression - for comparison

train_acc, val_acc = check_fit(logistic_model, X_train_iris, y_train_iris, cv=10, metric="accuracy")
train_acc, val_acc

In [None]:
# We'll try a neural network with one hidden layer with 8 neurons in that layer.

multiclass_classifier_network = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", MLPClassifier(hidden_layer_sizes=[8], max_iter=800, learning_rate_init=0.1, random_state=rng))
])

In [None]:
train_acc, val_acc = check_fit(multiclass_classifier_network, X_train_iris, y_train_iris, cv=10, metric="accuracy")
train_acc, val_acc

In [None]:
# Now we'll try a neural network with three hidden layers with 8, 4 and 4 neurons respectively.
# Note I needed a smaller learning rate to get the same level of performance

multiclass_classifier_network = Pipeline([
    ("scaler", StandardScaler()),
    ("predictor", MLPClassifier(hidden_layer_sizes=[8, 4, 2], max_iter=800, learning_rate_init=0.01, random_state=rng))
])

In [None]:
train_acc, val_acc = check_fit(multiclass_classifier_network, X_train_iris, y_train_iris, cv=10, metric="accuracy")
train_acc, val_acc