# Supervised neural networks - Modeling

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sqlalchemy import create_engine
from sqlalchemy_utils import create_database, database_exists
import warnings
warnings.filterwarnings('ignore')

from config import usr, pwd, url, port, db, table
import helpers as hp

## Load dataset
Load the dataset from the SQL database.

In [2]:
engine = create_engine(f"postgresql+psycopg2://{usr}:{pwd}@{url}:{port}/{db}")

In [3]:
query = f'''
SELECT
    *
FROM
    {table};
'''

In [4]:
df = pd.read_sql_query(query, con=engine)

Copy raw `DataFrame` to a new `DataFrame` to use for modeling purposes.

In [5]:
df_model = df.copy()

## Model
Create the neural network using Scikit-Learn.
### Subset
Use only a subset of the full dataset to spead up modeling.

In [6]:
df_model = df_model.sample(frac=0.01)

### Features and target
Split dataset into features and target.

In [7]:
features = df_model.columns.tolist()
features.remove('Department')

In [8]:
print(features)

['Gender', 'URL', 'ThumbnailURL', 'Height', 'Width', 'Date', 'Nationality', 'YearAcquired']


In [9]:
X = df_model.loc[:, features]
y = df_model['Department']

### Dummies
Convert categorical features to dummy separately.

In [10]:
X = pd.get_dummies(X, sparse=True)

In [11]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1086 entries, 42421 to 66354
Columns: 485 entries, URL to Nationality_\(multiple_nationalities\)
dtypes: Sparse[uint8, 0](480), bool(2), float64(3)
memory usage: 52.0 KB


### Train/test split
Split features and target into training and test sets. Stratify `y` to ensure it is equally sampled.

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)

### Run models
Try running model using a few different network configurations.

In [13]:
hp.run_mlp(X_train, X_test, y_train, y_test, (1000,))

Training score: 0.655
Test score: 0.642


In [14]:
hp.run_mlp(X_train, X_test, y_train, y_test, (10,))

Training score: 0.771
Test score: 0.697


In [15]:
hp.run_mlp(X_train, X_test, y_train, y_test, (1000, 100, 10))

Training score: 0.253
Test score: 0.257


In [16]:
hp.run_mlp(X_train, X_test, y_train, y_test, (10, 100, 1000))

Training score: 0.621
Test score: 0.633
