In [None]:
"""
4/28/2023
MLPRegressor (neural network regressor) 

The same dataset as xgBoostRegressor
"""

Collecting smart-open
  Using cached smart_open-6.3.0-py3-none-any.whl (56 kB)
Installing collected packages: smart-open
Successfully installed smart-open-6.3.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [4]:
import smart_open

with smart_open.open('s3://vector-services-ied-np/z-engine-weight-training/balanced_set.csv', 'r') as data_handler:
    data = pd.read_csv(data_handler)

ImportError: You are trying to use the s3 functionality of smart_open
but you do not have the correct s3 dependencies installed. Try:

    pip install smart_open[s3]



In [7]:
# Feature dataset 

columns = [ 'AverageWorkHistoryScore',
 'ComutabilityScore',
 'DescriptionClusterToClusterScore',
 'EliteSkillScore',
 'FeeEligibilityScore',
 'JobSkillsScore',
 'JobSoftwareSkillsScore',
 'LatestWorkHistoryDescriptionVectorScore',
 'LatestWorkHistoryTitleVectorScore',
 'NrmlzdDescClusterScore',
 'NrmlzdSkillClusterScore',
 'NrmlzdTitleClusterScore',
 'RelevantMonthsOfExperience',
 'SkillClusterToClusterScore',
 'TitleClusterToClusterScore',
 'YearsOfExperienceScore']
features = data[columns]
target= data[['placed flag']]

In [8]:
# Split data into the same train and test datasets as in xgBoostRegressor

X=features
y=np.ravel(target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) 

In [9]:
from sklearn.preprocessing import StandardScaler

# Scale feature values
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) 

In [13]:
# MLPRegressor architecture, default: 1 hidden layer with 1000 nodes
nn_1 = MLPRegressor(
    activation='relu',
    alpha=0.001,
    random_state=20,
    early_stopping=False
)

# Train the model
nn_1.fit(X_train, y_train)

MLPRegressor(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=20, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [11]:
# Regression metrics
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE

def metrics(y_test, y_pred):
    mse = MSE(y_test, y_pred)
    rmse = np.sqrt(MSE(y_test, y_pred))
    mae = MAE(y_test, y_pred)
    print("MSE: % f" %(mse))
    print("RMSE : % f" %(rmse))
    print("MAE : % f" %(mae))

In [14]:
y_pred = nn_1.predict(X_test)
print("with MLPRegressor, one hidden layer")
print("")
metrics(y_test, y_pred)

with MLPRegressor, one hidden layer

MSE:  0.182398
RMSE :  0.427080
MAE :  0.368869


In [15]:
# Ten hidden layers
nn_10 = MLPRegressor(
    activation='relu',
    hidden_layer_sizes=(10, 100),
    alpha=0.001,
    random_state=20,
    early_stopping=False
)

# Train the model
nn_10.fit(X_train, y_train)

MLPRegressor(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(10, 100), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=20, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [16]:
y_pred_10 = nn_10.predict(X_test)
print("with MLPRegressor, ten hidden layers")
print("")
metrics(y_test, y_pred_10)

with MLPRegressor, ten hidden layers

MSE:  0.181810
RMSE :  0.426392
MAE :  0.364709
