# Default parametrelerle denenen modellerin hyper parametre tuningi ile tekrar sonuçlarının kontrol edilmesi

## Kütüphanelerin import edilmesi

In [38]:
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
from sklearn import metrics

from tensorflow.keras.utils import to_categorical

## GridSearch İşlemi Uygulanacak Makine Öğrenimi Modellerinin Tanımlanması

### KNN Grid Search Uygulanması ve optimum parametrelerin bulunması

In [39]:
df = pd.read_csv('data/clustered_data.csv').drop(['id', 'player_name', 'position'], axis=1)
X = df.drop(['classes'], axis = 1).values
y = df['classes']
n_cols = len(np.unique(y))

sc = StandardScaler()
X = sc.fit_transform(X)

pca = PCA(2)
X = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.36, random_state=14)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

knn = KNeighborsClassifier()
decision_tree = DecisionTreeClassifier()

k_range = list(range(1, 31))
param_grid = dict(n_neighbors=k_range)

grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy',
                    return_train_score=False, verbose=1)


grid_search=grid.fit(X_train, y_train)

print(grid_search.best_params_)
n_neighbors = grid_search.best_params_['n_neighbors']


Fitting 10 folds for each of 30 candidates, totalling 300 fits
{'n_neighbors': 3}


In [3]:
accuracy = grid_search.best_score_ *100
print("Parametre tuningi sonucu çıkan optimum başarım : {:.2f}%".format(accuracy) )

Parametre tuningi sonucu çıkan optimum başarım : 98.14%


In [55]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

knn = KNeighborsClassifier(n_neighbors=n_neighbors)

knn.fit(X_train, y_train)

y_pred=knn.predict(X_test) 

test_accuracy=accuracy_score(y_test,y_pred)*100
test_precision = precision_score(y_test, y_pred, pos_label='positive', average='weighted') * 100
test_recall = recall_score(y_test, y_pred, pos_label='positive', average='weighted') * 100
test_f1 = f1_score(y_test, y_pred, pos_label='positive', average='weighted') * 100
acc_knn = np.array([grid_search.best_params_['n_neighbors'], test_accuracy, test_precision, test_recall, test_f1]).reshape(1,-1)
df_acc_knn = pd.DataFrame(acc_knn, columns=['n_neighbors', 'acc', 'precision', 'recall', 'f1'])
df_acc_knn.to_csv('knn_acc_recall_f1_precision.csv')
print("Bu parametrenin test seti üzerindeki başarısı : {:.2f}%".format(test_accuracy) )

Bu parametrenin test seti üzerindeki başarısı : 98.12%




In [56]:
cm = confusion_matrix(
    y_test.argmax(axis=1), y_pred.argmax(axis=1))
plt.figure(figsize=(12, 12))
sns.heatmap(pd.DataFrame(cm),
                        annot=True, cmap="YlGnBu", fmt='g')
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

plt.savefig('knn_optimum_parameters_cm.png')
plt.clf()


<Figure size 864x864 with 0 Axes>

### Decision Tree GridSearch Uygulanması ve optimum parametrelerin bulunması

In [57]:
df = pd.read_csv('data/clustered_data.csv').drop(['id', 'player_name', 'position'], axis=1)
X = df.drop(['classes'], axis = 1).values
y = df['classes']
n_cols = len(np.unique(y))

sc = StandardScaler()
X = sc.fit_transform(X)

pca = PCA(2)
X = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.36, random_state=14)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

decision_tree = DecisionTreeClassifier()

param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': [
    4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 20, 30, 40, 50, 70, 90, 120, 150]}

grid = GridSearchCV(decision_tree, param_grid, cv=10, scoring='accuracy',
                    return_train_score=False, verbose=1)


grid_search = grid.fit(X_train, y_train)

print(grid_search.best_params_)
criterion = grid_search.best_params_['criterion']
max_depth = grid_search.best_params_['max_depth']

Fitting 10 folds for each of 36 candidates, totalling 360 fits
{'criterion': 'gini', 'max_depth': 120}


In [58]:
accuracy = grid_search.best_score_ *100
print("Parametre tuningi sonucu çıkan optimum başarım : {:.2f}%".format(accuracy) )

Parametre tuningi sonucu çıkan optimum başarım : 97.80%


In [59]:
from sklearn.metrics import confusion_matrix, accuracy_score

decision_tree = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth)

decision_tree.fit(X_train, y_train)

y_pred=decision_tree.predict(X_test) 

test_accuracy=accuracy_score(y_test,y_pred)*100
test_precision = precision_score(y_test, y_pred, pos_label='positive', average='weighted') * 100
test_recall = recall_score(y_test, y_pred, pos_label='positive', average='weighted') * 100
test_f1 = f1_score(y_test, y_pred, pos_label='positive', average='weighted') * 100
acc_dt = np.array([grid_search.best_params_['criterion'], grid_search.best_params_['max_depth'], test_accuracy, test_precision, test_recall, test_f1]).reshape(1,-1)
df_acc_dt = pd.DataFrame(acc_dt, columns=['criterion','max_depth','acc', 'precision', 'recall', 'f1'])
df_acc_dt.to_csv('dt_acc_recall_f1_precision.csv')
print("Bu parametrenin test seti üzerindeki başarısı : {:.2f}%".format(test_accuracy) )

Bu parametrenin test seti üzerindeki başarısı : 97.89%




In [60]:
cm = confusion_matrix(
    y_test.argmax(axis=1), y_pred.argmax(axis=1))
plt.figure(figsize=(12, 12))
sns.heatmap(pd.DataFrame(cm),
                        annot=True, cmap="YlGnBu", fmt='g')
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

plt.savefig('decision_tree_optimum_parameters_cm.png')
plt.clf()

<Figure size 864x864 with 0 Axes>

### Ann ile çeşitli batch-size değerlerinin denenmesi
[8, 16, 32, 64, 128, 256] değerleriyle denemeler yapılmış ve test seti üzerindeki başarıları kıyaslanmıştır.

In [97]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.decomposition import PCA
from sklearn import metrics

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from keras import callbacks
import tensorflow as tf
import keras

def createModel(n_cols):

    model = keras.Sequential([

        keras.layers.Dense(units=128, activation='relu'),
        keras.layers.Dense(units=128, activation='relu'),
        keras.layers.Dense(units=64, activation='relu'),
        keras.layers.Dropout(0.25),
        keras.layers.Dense(units=32, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(units=8, activation='relu'),
        keras.layers.Dense(units=n_cols, activation='softmax'),
    ]
    )

    optim = Adam(lr=0.001)
    model.compile(optimizer=optim, loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [101]:
def ann_model(X_train, y_train, X_test, y_test, batch_size):
        early_stopping = callbacks.EarlyStopping(
        min_delta=0.00001,
        patience=6,
        restore_best_weights=True)
        model = createModel(n_cols=n_cols)

        history = model.fit(X_train, y_train, batch_size=batch_size, epochs=1000, callbacks=[
                        early_stopping], validation_split=0.2, verbose = 2)
        model.save('model/model_sc_pca_batch_size_'+str(batch_size)+'.h5')
        plt.figure(figsize=(12, 12))
        plt.subplot(211)
        plt.title('Loss')
        plt.plot(history.history['loss'], label='train')
        plt.plot(history.history['val_loss'], label='test')
        plt.legend()
        
        plt.subplot(212)
        plt.title('Accuracy')
        plt.plot(history.history['accuracy'], label='train')
        plt.plot(history.history['val_accuracy'], label='test')
        plt.legend()
        plt.savefig('ann_default_parameters_acc_loss_batch_size_'+str(batch_size)+'.png')
        plt.clf()
        y_pred = model.predict(X_test)
        cnf_matrix = metrics.confusion_matrix(
                y_test.argmax(axis=1), y_pred.argmax(axis=1))
        plt.figure(figsize=(12, 12))
        sns.heatmap(pd.DataFrame(cnf_matrix),
                        annot=True, cmap="YlGnBu", fmt='g')
        plt.title('Confusion matrix', y=1.1)
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')

        plt.savefig('ann_default_parameters_cm_batch_size_'+str(batch_size)+'.png')
        plt.clf()
        FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
        FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
        TP = np.diag(cnf_matrix)
        TN = cnf_matrix.sum() - (FP + FN + TP)

        acc = (TP + TN) / (TP + FP + FN + TN)
        sen = TP / (TP + FN)
        pre = TP / (TP + FP)
        spe = TN / (TN + FP)
        f1 = (2 * (pre * sen)) / (pre + sen)

        acc_scores_ann.append([batch_size, np.mean(acc), np.mean(sen), np.mean(pre), np.mean(spe),np.mean(f1)])

        print("batch_size {} -> Bu parametrenin test seti üzerindeki başarısı : {:.2f}%".format(batch_size, test_accuracy))

        

In [102]:
acc_scores_ann = []
df = pd.read_csv('data/clustered_data.csv').drop(['id', 'player_name', 'position'], axis=1)
X = df.drop(['classes'], axis = 1).values
y = df['classes']
n_cols = len(np.unique(y))

sc = StandardScaler()
X = sc.fit_transform(X)

pca = PCA(2)
X = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.36, random_state=14)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [103]:
batches = [8, 16, 32, 64, 128, 256]
for i in batches:
    ann_model(X_train, y_train, X_test, y_test, i)

acc_scores_ann = np.array(acc_scores_ann)
acc_scores_ann

Epoch 1/1000


  super(Adam, self).__init__(name, **kwargs)


473/473 - 5s - loss: 1.2772 - accuracy: 0.4956 - val_loss: 0.8716 - val_accuracy: 0.6571 - 5s/epoch - 11ms/step
Epoch 2/1000
473/473 - 4s - loss: 0.8175 - accuracy: 0.7056 - val_loss: 0.5311 - val_accuracy: 0.8646 - 4s/epoch - 8ms/step
Epoch 3/1000
473/473 - 4s - loss: 0.5591 - accuracy: 0.8128 - val_loss: 0.3854 - val_accuracy: 0.8624 - 4s/epoch - 8ms/step
Epoch 4/1000
473/473 - 4s - loss: 0.4536 - accuracy: 0.8472 - val_loss: 0.2600 - val_accuracy: 0.9228 - 4s/epoch - 8ms/step
Epoch 5/1000
473/473 - 3s - loss: 0.3932 - accuracy: 0.8605 - val_loss: 0.3250 - val_accuracy: 0.8899 - 3s/epoch - 7ms/step
Epoch 6/1000
473/473 - 3s - loss: 0.3636 - accuracy: 0.8782 - val_loss: 0.2211 - val_accuracy: 0.9090 - 3s/epoch - 7ms/step
Epoch 7/1000
473/473 - 3s - loss: 0.3423 - accuracy: 0.8896 - val_loss: 0.1802 - val_accuracy: 0.9460 - 3s/epoch - 6ms/step
Epoch 8/1000
473/473 - 2s - loss: 0.2908 - accuracy: 0.9110 - val_loss: 0.1692 - val_accuracy: 0.9312 - 2s/epoch - 5ms/step
Epoch 9/1000
473/473

  super(Adam, self).__init__(name, **kwargs)


237/237 - 3s - loss: 1.4547 - accuracy: 0.4355 - val_loss: 1.0479 - val_accuracy: 0.5884 - 3s/epoch - 11ms/step
Epoch 2/1000
237/237 - 2s - loss: 0.9476 - accuracy: 0.6664 - val_loss: 0.5932 - val_accuracy: 0.7947 - 2s/epoch - 7ms/step
Epoch 3/1000
237/237 - 2s - loss: 0.6366 - accuracy: 0.7628 - val_loss: 0.3986 - val_accuracy: 0.8741 - 2s/epoch - 7ms/step
Epoch 4/1000
237/237 - 2s - loss: 0.5322 - accuracy: 0.8242 - val_loss: 0.2914 - val_accuracy: 0.9376 - 2s/epoch - 8ms/step
Epoch 5/1000
237/237 - 2s - loss: 0.4403 - accuracy: 0.8501 - val_loss: 0.2487 - val_accuracy: 0.9450 - 2s/epoch - 9ms/step
Epoch 6/1000
237/237 - 2s - loss: 0.3806 - accuracy: 0.8666 - val_loss: 0.2417 - val_accuracy: 0.9397 - 2s/epoch - 7ms/step
Epoch 7/1000
237/237 - 2s - loss: 0.3587 - accuracy: 0.8848 - val_loss: 0.2213 - val_accuracy: 0.9397 - 2s/epoch - 8ms/step
Epoch 8/1000
237/237 - 1s - loss: 0.3446 - accuracy: 0.8809 - val_loss: 0.1829 - val_accuracy: 0.9450 - 1s/epoch - 5ms/step
Epoch 9/1000
237/237

  super(Adam, self).__init__(name, **kwargs)


119/119 - 2s - loss: 1.6079 - accuracy: 0.4181 - val_loss: 1.1298 - val_accuracy: 0.5259 - 2s/epoch - 14ms/step
Epoch 2/1000
119/119 - 1s - loss: 1.0775 - accuracy: 0.5510 - val_loss: 0.8017 - val_accuracy: 0.7037 - 1s/epoch - 9ms/step
Epoch 3/1000
119/119 - 1s - loss: 0.8297 - accuracy: 0.6894 - val_loss: 0.6039 - val_accuracy: 0.8381 - 1s/epoch - 9ms/step
Epoch 4/1000
119/119 - 1s - loss: 0.6512 - accuracy: 0.7861 - val_loss: 0.4210 - val_accuracy: 0.8836 - 1s/epoch - 9ms/step
Epoch 5/1000
119/119 - 1s - loss: 0.6009 - accuracy: 0.8041 - val_loss: 0.3250 - val_accuracy: 0.9079 - 928ms/epoch - 8ms/step
Epoch 6/1000
119/119 - 1s - loss: 0.4680 - accuracy: 0.8520 - val_loss: 0.2789 - val_accuracy: 0.9090 - 862ms/epoch - 7ms/step
Epoch 7/1000
119/119 - 1s - loss: 0.4457 - accuracy: 0.8491 - val_loss: 0.2490 - val_accuracy: 0.9407 - 926ms/epoch - 8ms/step
Epoch 8/1000
119/119 - 1s - loss: 0.3959 - accuracy: 0.8721 - val_loss: 0.2320 - val_accuracy: 0.9460 - 980ms/epoch - 8ms/step
Epoch 9/

  pre = TP / (TP + FP)
  super(Adam, self).__init__(name, **kwargs)


60/60 - 1s - loss: 1.7316 - accuracy: 0.3471 - val_loss: 1.2123 - val_accuracy: 0.5693 - 1s/epoch - 19ms/step
Epoch 2/1000
60/60 - 1s - loss: 1.1934 - accuracy: 0.5412 - val_loss: 0.9175 - val_accuracy: 0.7175 - 522ms/epoch - 9ms/step
Epoch 3/1000
60/60 - 1s - loss: 0.9772 - accuracy: 0.6373 - val_loss: 0.7746 - val_accuracy: 0.7344 - 515ms/epoch - 9ms/step
Epoch 4/1000
60/60 - 1s - loss: 0.8706 - accuracy: 0.6942 - val_loss: 0.6892 - val_accuracy: 0.8074 - 517ms/epoch - 9ms/step
Epoch 5/1000
60/60 - 1s - loss: 0.7605 - accuracy: 0.7337 - val_loss: 0.5629 - val_accuracy: 0.8466 - 508ms/epoch - 8ms/step
Epoch 6/1000
60/60 - 1s - loss: 0.6577 - accuracy: 0.7813 - val_loss: 0.4573 - val_accuracy: 0.8677 - 515ms/epoch - 9ms/step
Epoch 7/1000
60/60 - 1s - loss: 0.5642 - accuracy: 0.8107 - val_loss: 0.3978 - val_accuracy: 0.8878 - 504ms/epoch - 8ms/step
Epoch 8/1000
60/60 - 1s - loss: 0.5296 - accuracy: 0.8091 - val_loss: 0.3468 - val_accuracy: 0.8931 - 512ms/epoch - 9ms/step
Epoch 9/1000
60

  super(Adam, self).__init__(name, **kwargs)


30/30 - 1s - loss: 1.9167 - accuracy: 0.2886 - val_loss: 1.5116 - val_accuracy: 0.3143 - 877ms/epoch - 29ms/step
Epoch 2/1000
30/30 - 0s - loss: 1.5009 - accuracy: 0.4014 - val_loss: 1.2306 - val_accuracy: 0.5651 - 317ms/epoch - 11ms/step
Epoch 3/1000
30/30 - 0s - loss: 1.2648 - accuracy: 0.5004 - val_loss: 1.0016 - val_accuracy: 0.6212 - 299ms/epoch - 10ms/step
Epoch 4/1000
30/30 - 0s - loss: 1.0890 - accuracy: 0.5690 - val_loss: 0.8634 - val_accuracy: 0.7122 - 287ms/epoch - 10ms/step
Epoch 5/1000
30/30 - 0s - loss: 0.9499 - accuracy: 0.6219 - val_loss: 0.7259 - val_accuracy: 0.7481 - 285ms/epoch - 10ms/step
Epoch 6/1000
30/30 - 0s - loss: 0.8209 - accuracy: 0.6910 - val_loss: 0.5921 - val_accuracy: 0.8222 - 310ms/epoch - 10ms/step
Epoch 7/1000
30/30 - 0s - loss: 0.7115 - accuracy: 0.7408 - val_loss: 0.4565 - val_accuracy: 0.8720 - 287ms/epoch - 10ms/step
Epoch 8/1000
30/30 - 0s - loss: 0.6202 - accuracy: 0.7866 - val_loss: 0.3742 - val_accuracy: 0.9016 - 294ms/epoch - 10ms/step
Epoch

  super(Adam, self).__init__(name, **kwargs)


15/15 - 1s - loss: 2.1251 - accuracy: 0.2182 - val_loss: 1.8202 - val_accuracy: 0.3862 - 746ms/epoch - 50ms/step
Epoch 2/1000
15/15 - 0s - loss: 1.7564 - accuracy: 0.3257 - val_loss: 1.5242 - val_accuracy: 0.3577 - 179ms/epoch - 12ms/step
Epoch 3/1000
15/15 - 0s - loss: 1.5437 - accuracy: 0.3773 - val_loss: 1.4263 - val_accuracy: 0.4254 - 202ms/epoch - 13ms/step
Epoch 4/1000
15/15 - 0s - loss: 1.4516 - accuracy: 0.4403 - val_loss: 1.3338 - val_accuracy: 0.5058 - 180ms/epoch - 12ms/step
Epoch 5/1000
15/15 - 0s - loss: 1.3581 - accuracy: 0.4890 - val_loss: 1.2176 - val_accuracy: 0.6307 - 180ms/epoch - 12ms/step
Epoch 6/1000
15/15 - 0s - loss: 1.2455 - accuracy: 0.5592 - val_loss: 1.0648 - val_accuracy: 0.6825 - 175ms/epoch - 12ms/step
Epoch 7/1000
15/15 - 0s - loss: 1.1018 - accuracy: 0.6018 - val_loss: 0.9086 - val_accuracy: 0.6720 - 167ms/epoch - 11ms/step
Epoch 8/1000
15/15 - 0s - loss: 0.9894 - accuracy: 0.6362 - val_loss: 0.7808 - val_accuracy: 0.7302 - 161ms/epoch - 11ms/step
Epoch

  pre = TP / (TP + FP)


array([[  8.        ,   0.99254799,   0.87028436,   0.92878438,
          0.99580005,   0.88562872],
       [ 16.        ,   0.99345126,   0.93716675,   0.90542786,
          0.99643437,   0.91534213],
       [ 32.        ,   0.99239744,   0.81483251,          nan,
          0.99574461,          nan],
       [ 64.        ,   0.98976289,   0.83761611,   0.88078991,
          0.99433665,   0.84990181],
       [128.        ,   0.99360181,   0.87883621,   0.92001412,
          0.99647786,   0.88823013],
       [256.        ,   0.99337599,   0.85742331,          nan,
          0.99633718,          nan]])

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

<Figure size 864x864 with 0 Axes>

In [111]:
acc_scores_df = pd.DataFrame(acc_scores_ann, columns=['batch_size', 'acc', 'sen', 'pre', 'spe','f1'])
acc_scores_df.dropna().to_csv('ann_test_ac_df.csv')
acc_scores_df.dropna()

Unnamed: 0,batch_size,acc,sen,pre,spe,f1
0,8.0,0.992548,0.870284,0.928784,0.9958,0.885629
1,16.0,0.993451,0.937167,0.905428,0.996434,0.915342
3,64.0,0.989763,0.837616,0.88079,0.994337,0.849902
4,128.0,0.993602,0.878836,0.920014,0.996478,0.88823


Yukarıdaki başarılar göz önünde bulundurulduğunda ann optimum test başarısını %99,36'yla 128 batch_size değerinde vermiştir. </br>
Tüm değerler csv dosyası olarak kaydedilmiştir.