In [1]:
import pandas as pd # untuk manipulasi data
import numpy as np # untuk manipulasi data

from sklearn.model_selection import train_test_split # untuk splitting data kedalam train and test samples
from sklearn.metrics import classification_report # untuk metrik evaluasi model
from sklearn.svm import SVC # for Support Vector Classification model

import plotly.express as px  # untuk visualisasi data
import plotly.graph_objects as go # untuk visualisasi data


In [2]:
# baca data csv
df=pd.read_csv('games.csv', encoding='utf-8')

# Perbedaan antara white rating dan black rating - variabel independen
df['rating_difference']=df['white_rating']-df['black_rating']

# White wins flag (1=win vs. 0=not-win) - dependent (target) variable
df['white_win']=df['winner'].apply(lambda x: 1 if x=='white' else 0)

# Cetak snapshot dari beberapa kolom
df.iloc[:,[0,1,5,6,8,9,10,11,13,16,17]]

Unnamed: 0,id,rated,victory_status,winner,white_id,white_rating,black_id,black_rating,opening_eco,rating_difference,white_win
0,TZJHLljE,False,outoftime,white,bourgris,1500,a-00,1191,D10,309,1
1,l1NXvwaE,True,resign,black,a-00,1322,skinnerua,1261,B00,61,0
2,mIICvQHh,True,mate,white,ischia,1496,a-00,1500,C20,-4,1
3,kWKvrqYL,True,mate,white,daniamurashov,1439,adivanov2009,1454,D02,-15,1
4,9tXo1AUZ,True,mate,white,nik221107,1523,adivanov2009,1469,C41,54,1
...,...,...,...,...,...,...,...,...,...,...,...
20053,EfqH7VVH,True,resign,white,belcolt,1691,jamboger,1220,A80,471,1
20054,WSJDhbPl,True,mate,black,jamboger,1233,farrukhasomiddinov,1196,A41,37,0
20055,yrAas0Kj,True,mate,white,jamboger,1219,schaaksmurf3,1286,D00,-67,1
20056,b0v4tRyF,True,resign,white,marcodisogno,1360,jamboger,1227,B07,133,1


In [3]:
def fitting(X, y, C, gamma):
    # buat training and testing samples
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Fit the model
    # Note, available kernels: {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, default=’rbf’
    model = SVC(kernel='rbf', probability=True, C=C, gamma=gamma)
    clf = model.fit(X_train, y_train)

    # Predict class labels on training data
    pred_labels_tr = model.predict(X_train)
    # Predict class labels on a test data
    pred_labels_te = model.predict(X_test)

    # gunakan method score untuk mendapatkan akurasi dari model
    print('----- Evaluation on Test Data -----')
    score_te = model.score(X_test, y_test)
    print('Accuracy Score: ', score_te)
  
    # melihat classification report untuk evaluasi model
    print(classification_report(y_test, pred_labels_te))
    print('--------------------------------------------------------')

    print('----- Evaluation on Training Data -----')
    score_tr = model.score(X_train, y_train)
    print('Accuracy Score: ', score_tr)
    
    # melihat classification report untuk evaluasi model
    print(classification_report(y_train, pred_labels_tr))
    print('--------------------------------------------------------')
    
    # Kembalikan data yang relevan untuk plotting grafik
    return X_train, X_test, y_train, y_test, clf

In [4]:
def Plot_3D(X, X_test, y_test, clf):
            
    # Tentukan ukuran mesh yang akan digunakan
    mesh_size = 5
    margin = 1

    # Buat grid mesh di mana kita akan menjalankan model kita
    x_min, x_max = X.iloc[:, 0].fillna(X.mean()).min() - margin, X.iloc[:, 0].fillna(X.mean()).max() + margin
    y_min, y_max = X.iloc[:, 1].fillna(X.mean()).min() - margin, X.iloc[:, 1].fillna(X.mean()).max() + margin
    xrange = np.arange(x_min, x_max, mesh_size)
    yrange = np.arange(y_min, y_max, mesh_size)
    xx, yy = np.meshgrid(xrange, yrange)
            
    # Hitung prediksi di grid
    Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    Z = Z.reshape(xx.shape)

    # Buat 3D scatter plot dengan predictions
    fig = px.scatter_3d(x=X_test['rating_difference'], y=X_test['turns'], z=y_test, 
                     opacity=0.8, color_discrete_sequence=['black'])

    # Set figure title dan colors
    fig.update_layout(#title_text="Scatter 3D Plot dengan SVM Prediction Surface",
                      paper_bgcolor = 'white',
                      scene = dict(xaxis=dict(backgroundcolor='white',
                                              color='black',
                                              gridcolor='#f0f0f0'),
                                   yaxis=dict(backgroundcolor='white',
                                              color='black',
                                              gridcolor='#f0f0f0'
                                              ),
                                   zaxis=dict(backgroundcolor='lightgrey',
                                              color='black', 
                                              gridcolor='#f0f0f0', 
                                              )))
    # Update ukuran marker 
    fig.update_traces(marker=dict(size=1))

    # tambahkan prediction plane
    fig.add_traces(go.Surface(x=xrange, y=yrange, z=Z, name='SVM Prediction',
                              colorscale='RdBu', showscale=False, 
                              contours = {"z": {"show": True, "start": 0.2, "end": 0.8, "size": 0.05}}))
    fig.show()

In [5]:
# Select data untuk modeling
X=df[['rating_difference', 'turns']]
y=df['white_win'].values

# Fit model dan hasil display 
X_train, X_test, y_train, y_test, clf = fitting(X, y, 1, 'scale')

----- Evaluation on Test Data -----
Accuracy Score:  0.6530408773678963
              precision    recall  f1-score   support

           0       0.64      0.70      0.67      2024
           1       0.66      0.60      0.63      1988

    accuracy                           0.65      4012
   macro avg       0.65      0.65      0.65      4012
weighted avg       0.65      0.65      0.65      4012

--------------------------------------------------------
----- Evaluation on Training Data -----
Accuracy Score:  0.6468901907017325
              precision    recall  f1-score   support

           0       0.64      0.68      0.66      8033
           1       0.66      0.62      0.64      8013

    accuracy                           0.65     16046
   macro avg       0.65      0.65      0.65     16046
weighted avg       0.65      0.65      0.65     16046

--------------------------------------------------------


In [6]:
Plot_3D(X, X_test, y_test, clf)