In [90]:
import pandas as pd
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler  
from sklearn.neural_network import MLPClassifier 
from sklearn.metrics import classification_report, confusion_matrix 

In [91]:
#Naming the features

df = ('updated_joined_df.csv')
df2= ('Y_data.csv')

In [92]:
# Reading the dataset through the Pandas function and dropping the date
signals = pd.read_csv(df)
signals.drop('Date', axis=1)
positive_outcomes= pd.read_csv(df2)
positive_outcomes.drop('Date', axis=1)
positive_outcomes = positive_outcomes[:-1]

In [93]:
#Assign the 9 features to X
X = signals
# Assign the binary positive return signal to variable "Y"
y = positive_outcomes

In [94]:
X.head()

Unnamed: 0,Date,bollinger_signal,mfi_signal,nvi_signal,rsi_signal,vi_signal,crossover_signal,vol_trend_signal,kst_signal,nlp_signal
0,2015-10-05,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,1
1,2015-10-06,0.0,1.0,0.0,1,1.0,-1.0,0.0,0.0,1
2,2015-10-07,0.0,1.0,0.0,-1,1.0,1.0,1.0,1.0,-1
3,2015-10-08,0.0,1.0,0.0,-1,1.0,1.0,1.0,1.0,0
4,2015-10-09,0.0,1.0,1.0,-1,1.0,1.0,1.0,1.0,0


In [95]:
y.head()

Unnamed: 0,Date,positive_return
0,2015-10-05,0.0
1,2015-10-06,0.0
2,2015-10-07,1.0
3,2015-10-08,1.0
4,2015-10-09,1.0


In [96]:
#Display all categories or classes:
y.positive_return.unique()

array([0., 1.])

In [97]:
#transforming categorial into numerical values
le = preprocessing.LabelEncoder()
y = y.apply(le.fit_transform)
X = X.apply(le.fit_transform)

In [98]:
#Droping the date column
y = y.drop(columns=['Date'])
y.head(1300)

Unnamed: 0,positive_return
0,0
1,0
2,1
3,1
4,1
...,...
1254,1
1255,1
1256,0
1257,1


In [99]:
#dropping the date column
X = X.drop(columns=['Date'])
X.head(1300)

Unnamed: 0,bollinger_signal,mfi_signal,nvi_signal,rsi_signal,vi_signal,crossover_signal,vol_trend_signal,kst_signal,nlp_signal
0,1,1,1,2,1,1,1,0,2
1,1,2,1,2,2,0,1,0,2
2,1,2,1,0,2,2,2,1,0
3,1,2,1,0,2,2,2,1,1
4,1,2,2,0,2,2,2,1,1
...,...,...,...,...,...,...,...,...,...
1254,0,1,0,1,2,2,0,1,1
1255,0,1,0,1,2,2,2,1,1
1256,0,1,0,1,2,2,2,1,0
1257,0,1,0,1,2,2,0,1,1


In [103]:
#Setting the parameters you want 'GridSearchCV' to run through in order to create the optimal MLPClassifier 
parameter_space = {
    'hidden_layer_sizes': [(10,10,10) , (50,50,50), (50,100,50), (100,100,100)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive','invscaling'],
}

In [104]:
#Import GridsearchCV
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train.values.ravel())

GridSearchCV(cv=3, estimator=MLPClassifier(max_iter=1000), n_jobs=-1,
             param_grid={'activation': ['identity', 'logistic', 'tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(10, 10, 10), (50, 50, 50),
                                                (50, 100, 50),
                                                (100, 100, 100)],
                         'learning_rate': ['constant', 'adaptive',
                                           'invscaling'],
                         'solver': ['lbfgs', 'sgd', 'adam']})

In [105]:
# Best paramete set
print('Best parameters found:\n', clf.best_params_)

# All results
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

Best parameters found:
 {'activation': 'logistic', 'alpha': 0.05, 'hidden_layer_sizes': (100, 100, 100), 'learning_rate': 'constant', 'solver': 'adam'}
0.665 (+/-0.061) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10, 10), 'learning_rate': 'constant', 'solver': 'lbfgs'}
0.656 (+/-0.075) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10, 10), 'learning_rate': 'constant', 'solver': 'sgd'}
0.660 (+/-0.065) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10, 10), 'learning_rate': 'constant', 'solver': 'adam'}
0.665 (+/-0.061) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10, 10), 'learning_rate': 'adaptive', 'solver': 'lbfgs'}
0.648 (+/-0.034) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10, 10), 'learning_rate': 'adaptive', 'solver': 'sgd'}
0.659 (+/-0.054) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10, 10), 'learni

In [438]:
# train and test split (80% of  dataset into  training set and  other 20% into test data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [439]:
# Feature scaling
scaler = StandardScaler()  
scaler.fit(X_train)
X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)

In [440]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(max_iter=1000)

In [441]:
# the MLP- Multilayer Perceptron
mlp = MLPClassifier(activation= 'identity', alpha= 0.0001, hidden_layer_sizes= (100, 100, 100), learning_rate= 'constant', solver= 'lbfgs')
mlp.fit(X_train, y_train.values.ravel())

MLPClassifier(activation='identity', hidden_layer_sizes=(100, 100, 100),
              solver='lbfgs')

In [442]:
#predictions
predictions = mlp.predict(X_test)

In [443]:
#print predictions
print(predictions)

[1 1 0 0 0 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 0 1 0 0 0 0 1
 0 1 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 0 1 1 0 1 1 1 0 1 1 0 1 1 0 1
 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 0 1 0 1 1 0 0 1 1 1 0
 1 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1 0 0 1 0
 1 1 1 0 1 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 1 0
 0 0 0 0 0 0 0 1 1 0 1 0 1 1 1 0 0 0 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 0
 1 0 1 0 1 0 0 1 1 0 0 1 0 0 1 1 1 0 0 0 1 1 0 1 1 0 1 1 1 1]


In [308]:
# evaluation of algorithm performance in classifying positive returns
print(confusion_matrix(y_test,predictions))  
print(classification_report(y_test,predictions))

[[92 42]
 [32 86]]
              precision    recall  f1-score   support

           0       0.74      0.69      0.71       134
           1       0.67      0.73      0.70       118

    accuracy                           0.71       252
   macro avg       0.71      0.71      0.71       252
weighted avg       0.71      0.71      0.71       252

