## Importing Packages

In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
%matplotlib inline

## Reading CSV

In [2]:
gastos_cartao = pd.read_csv("base_gastos_cartao.csv")

In [3]:
gastos_cartao

Unnamed: 0,Gastos_Cartao,Idade,Renda,Impostos,Segmento
0,510,35,1120,60,C
1,490,30,1120,60,C
2,470,32,1040,60,C
3,460,31,1200,60,C
4,500,36,1120,60,C
5,540,39,1360,120,C
6,460,34,1120,90,C
7,500,34,1200,60,C
8,440,29,1120,60,C
9,490,31,1200,30,C


In [4]:
gastos_cartao.shape

(150, 5)

In [5]:
gastos_cartao.head()

Unnamed: 0,Gastos_Cartao,Idade,Renda,Impostos,Segmento
0,510,35,1120,60,C
1,490,30,1120,60,C
2,470,32,1040,60,C
3,460,31,1200,60,C
4,500,36,1120,60,C


## Neural Network

#### Import Train Test Split

In [6]:
from sklearn.model_selection import train_test_split

#### Creating matrix of features and target
#### Repair the categorical feature

In [8]:
X = pd.concat([gastos_cartao[['Idade', 'Renda', 'Impostos']], pd.get_dummies(gastos_cartao.Segmento)], axis=1)
y = gastos_cartao.Gastos_Cartao
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [9]:
print(X_train.shape)
print(X_test.shape)

(105, 6)
(45, 6)


#### Import neural network for regression

In [10]:
from sklearn.neural_network import MLPRegressor

#### Import method to put all features in the same scale

In [11]:
from sklearn.preprocessing import StandardScaler

#### Using just the train data to get the parameters to scaler

In [12]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

#### Aplying the scaler for the train and test data

In [13]:
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#### Creating the Neural network model with specific parameters

In [14]:
mlp = MLPRegressor(hidden_layer_sizes=(30,30,30))

#### Fitting the model in the training data

In [15]:
mlp.fit(X_train,y_train)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

#### Calculating the predictions in the test data set

In [16]:
predictions = mlp.predict(X_test)

#### Calculating Mean Square Error

In [17]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, predictions)
print("MSE: %.4f" % mse)

MSE: 11833.1414


## Let's try to improve the model optimizing the hyper-parameters

#### Importing packages

In [18]:
from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV

#### Creating the grid of hyper-parameters

In [19]:
tuned_parameters = [{'hidden_layer_sizes': [(1,), (5,), (10,), (5,5,)],
                     'activation' : ['identity', 'logistic', 'tanh', 'relu'],
                     'learning_rate': ['constant', 'adaptive'],
                     'alpha': [0.0001, 0.001, 0.01, 0.1, 1]}]

#### Testing all the combinations

In [20]:
clf = GridSearchCV(MLPRegressor(), tuned_parameters, cv=3, scoring='r2')
clf.fit(X_train, y_train)































GridSearchCV(cv=3, error_score='raise',
       estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'hidden_layer_sizes': [(1,), (5,), (10,), (5, 5)], 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'learning_rate': ['constant', 'adaptive'], 'alpha': [0.0001, 0.001, 0.01, 0.1, 1]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='r2', verbose=0)

#### Printing the performance of all combinations

In [21]:
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
        % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
mse = mean_squared_error(y_test, clf.predict(X_test))
print("MSE: %.4f" % mse)
print()

Best parameters set found on development set:

{'activation': 'relu', 'alpha': 1, 'hidden_layer_sizes': (5, 5), 'learning_rate': 'constant'}

Grid scores on development set:

-56.053 (+/-32.124) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (1,), 'learning_rate': 'constant'}
-55.763 (+/-31.321) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (1,), 'learning_rate': 'adaptive'}
-55.529 (+/-31.509) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (5,), 'learning_rate': 'constant'}
-55.440 (+/-31.600) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (5,), 'learning_rate': 'adaptive'}
-55.459 (+/-31.614) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10,), 'learning_rate': 'constant'}
-55.560 (+/-31.621) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10,), 'learning_rate': 'adaptive'}
-54.991 (+/-30.988) for {'activation': 'identity', 'alpha': 0.0001, 'hidd