In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from google.colab import drive
import joblib
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
import warnings

warnings.filterwarnings(action='ignore') 
drive.mount('/content/gdrive')

In [None]:
filepath = '/content/gdrive/MyDrive/answerData.csv'
data = pd.read_csv(filepath)
train, test = train_test_split(data, test_size=0.15, shuffle=True, random_state=34)

In [None]:
def svc_param_selection(X, y, nfolds): 
  svm_parameters = [ 
    {'kernel' : ['rbf'], 
      'gamma' : [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
      'C' : [0.01, 0.1, 1, 10, 100, 1000]} 
  ] 
  #사이킷런에서 제공하는 GridSearchCV를 사용해 최적의 파라미터를 구함 
  clf = GridSearchCV(svm.SVC(), svm_parameters, cv=nfolds, scoring='accuracy') 
  # estimator, param_grid, cross-validation 
  clf.fit(X_train, y_train.values.ravel()) 
  
  print(clf.best_params_) #최고 점수를 낸 파라미터 출력 
  return clf 
  
X_train = train[['Face', 'Color', 'Season', 'Word', 'MBTI(E,I/S,N)', 'MBTI(T,F/J,P)']]
y_train = train[['Result']] #최적의 파라미터로 학습된 모델을 clf로 저장 

In [None]:
clf = svc_param_selection(X_train, y_train.values.ravel(), 10)
C_candidates = [] 
C_candidates.append(clf.best_params_['C'] * 0.01) 
C_candidates.append(clf.best_params_['C']) 
C_candidates.append(clf.best_params_['C'] * 100)

gamma_candidates = [] 
gamma_candidates.append(clf.best_params_['gamma'] * 0.01) 
gamma_candidates.append(clf.best_params_['gamma']) 
gamma_candidates.append(clf.best_params_['gamma'] * 100)

X = train[['Face', 'Color', 'Season', 'Word', 'MBTI(E,I/S,N)', 'MBTI(T,F/J,P)']]
Y = train['Result'].tolist()

classifiers = []
for C in C_candidates: 
  for gamma in gamma_candidates: 
    clf = svm.SVC(C=C, gamma=gamma) 
    clf.fit(X, Y) 
    classifiers.append((C, gamma, clf))

In [None]:
X_test = test[['Face', 'Color', 'Season', 'Word', 'MBTI(E,I/S,N)', 'MBTI(T,F/J,P)']] 
y_test = test[['Result']]

print(x_test)
y_true, y_pred = y_test, clf.predict(X_test)

print(classification_report(y_true, y_pred)) 
print("accuracy :", str(accuracy_score(y_true, y_pred)))

In [None]:
comparison = pd.DataFrame({'prediction':y_pred, 'ground_truth':y_true.values.ravel()}) 

comparison

In [None]:
# joblib.dump(clf, '/content/gdrive/MyDrive/svc_model.pkl')