In [25]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.utils.random import sample_without_replacement
from sklearn.model_selection import GridSearchCV

In [6]:
train = pd.read_csv(
    "https://raw.githubusercontent.com/sibirbil/IMO2020/master/UygulamaDersleri/07_Yapay_Sinir_Aglari/ann-train.data", sep= " ", header=None)

In [8]:
train.drop(columns = [22, 23], inplace=True, axis=1)

In [9]:
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,0.73,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0.00060,0.0150,0.120,0.082,0.146,3
1,0.24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00025,0.0300,0.143,0.133,0.108,3
2,0.47,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00190,0.0240,0.102,0.131,0.078,3
3,0.64,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00090,0.0170,0.077,0.090,0.085,3
4,0.23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00025,0.0260,0.139,0.090,0.153,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3767,0.77,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00006,0.0206,0.125,0.107,0.117,3
3768,0.41,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00130,0.0250,0.125,0.114,0.109,3
3769,0.88,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.01300,0.0174,0.123,0.099,0.124,2
3770,0.64,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00078,0.0206,0.106,0.088,0.121,3


In [10]:
test = pd.read_csv("https://raw.githubusercontent.com/sibirbil/IMO2020/master/UygulamaDersleri/07_Yapay_Sinir_Aglari/ann-test.data", sep= " ", header=None)

In [12]:
test.drop(columns=[22,23], inplace=True, axis=1)

In [13]:
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,0.29,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00610,0.0280,0.111,0.131,0.0850,2
1,0.32,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00130,0.0190,0.084,0.078,0.1070,3
2,0.35,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00000,0.0310,0.239,0.100,0.2390,3
3,0.21,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00100,0.0180,0.087,0.088,0.0990,3
4,0.22,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0.00040,0.0220,0.134,0.135,0.0990,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3423,0.59,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0.00250,0.0208,0.079,0.099,0.0800,3
3424,0.51,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.10600,0.0060,0.005,0.089,0.0055,1
3425,0.51,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0.00076,0.0201,0.090,0.067,0.1340,3
3426,0.35,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0.00280,0.0201,0.090,0.089,0.1010,3


In [14]:
X_train = train.iloc[:,range(21)]
X_test = test.iloc[:,range(21)]

y_train = train.iloc[:, 21]
y_test = test.iloc[:, 21]

In [16]:
# distribution among classes
print(sum(y_train == 1))
print(sum(y_train == 2))
print(sum(y_train == 3))

93
191
3488


In [17]:
# neural nets 
nn_model = MLPClassifier(hidden_layer_sizes = (5,),
                        activation = "tanh",
                        solver = "sgd",
                        max_iter = 5000,
                        random_state=3)

In [19]:
nn_model.fit(X_train, y_train)
y_pred = nn_model.predict(X_test)

In [20]:
print(nn_model.score(X_test, y_test))
print(classification_report(y_test, y_pred))
# hepsine 3 dedi ve verideki 3 classı ile aynı oranda tahmin etti

0.927071178529755
              precision    recall  f1-score   support

           1       0.00      0.00      0.00        73
           2       0.00      0.00      0.00       177
           3       0.93      1.00      0.96      3178

    accuracy                           0.93      3428
   macro avg       0.31      0.33      0.32      3428
weighted avg       0.86      0.93      0.89      3428



  'precision', 'predicted', average, warn_for)


In [31]:
# gizli katmandaki düğüm sayısını bulmak için cross-validation
neuron_params = range(2, 11)
cv = KFold(n_splits=10, shuffle=True, random_state=3)
param_performance = []

for c in neuron_params:
    cv_performance = []
    for train, valid in cv.split(X_train, y_train):
        cv_nn_model = MLPClassifier(hidden_layer_sizes=(c,),
                                    activation="tanh",
                                    solver="sgd",
                                    max_iter=500,
                                    random_state=5,
                                    learning_rate_init = 0.01)
        
        cv_nn_model.fit(X_train.iloc[train, :],
                        y_train.iloc[train])
        
        r2_score = cv_nn_model.score(X_train.iloc[valid, :],
                                     y_train.iloc[valid])
        
        cv_performance.append(r2_score)
        
    cv_performance = np.array(cv_performance)
    param_performance.append(cv_performance.mean())







In [32]:
num_best_neuron = neuron_params[np.argmax(param_performance)]
num_best_neuron

5

In [34]:
best_nn_model = MLPClassifier(hidden_layer_sizes=(c,),
                                    activation="tanh",
                                    solver="sgd",
                                    max_iter=500,
                                    random_state=5,
                                    learning_rate_init = 0.01)

best_nn_model.fit(X_test, y_test)
r2_score_test = best_nn_model.score(X_test, y_test)
print(r2_score_test)
print(classification_report(y_test, best_nn_model.predict(X_test)))

0.9387397899649942
              precision    recall  f1-score   support

           1       0.85      0.68      0.76        73
           2       0.00      0.00      0.00       177
           3       0.94      1.00      0.97      3178

    accuracy                           0.94      3428
   macro avg       0.60      0.56      0.58      3428
weighted avg       0.89      0.94      0.91      3428





#dengeli datasetle train etme kısmı eksik