### 1、Data preparation

In [3]:
import pandas as pd 
import numpy as np

data = pd.read_csv("TrainDataset_AfterMulti.csv")
X = data.iloc[:, :-1]
y = np.array(data.iloc[: , -1])

### 2、Train GWO-RF

In [60]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)

In [5]:
X_train = pd.read_csv("SVM_X_train.csv")
X_test = pd.read_csv("SVM_X_test.csv")
X_train = X_train.iloc[:,1:]
X_test = X_test.iloc[:,1:]

In [64]:
# from sklearn.externals import joblib
# clf2 = joblib.load('model/GWO_SVM.pkl')
# clf2.coef_ 

In [6]:
from sklearn import svm
# clf2 = svm.SVC(kernel = 'rbf', C = 0.563, gamma = 0.037,probability=True).fit(X_train, y_train)
clf2 = svm.SVC(kernel = 'rbf', C = 0.048, gamma = 0.011,probability=True).fit(X_train, y_train)
from sklearn.metrics import mean_squared_error
Train_y_pred = clf2.predict(X_train)
Vlid_y_pred = clf2.predict(X_test)
mse_train = np.sqrt(mean_squared_error(y_train, Train_y_pred))
mse_valid = np.sqrt(mean_squared_error(y_test, Vlid_y_pred))
print("Molde with pure tree, Train MSE: {} Test MSE: {}".format(mse_train, mse_valid))

In [289]:
Vlid_y_prob = clf2.predict_proba(X_test)[:,1]
Train_y_prob = clf2.predict_proba(X_train)[:,1]

### 3、Plot learning curve

In [3]:
from sklearn.model_selection import ShuffleSplit
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)

In [4]:
from sklearn.model_selection import learning_curve
train_sizes1, train_scores1, valid_scores1 = learning_curve( clf2, X_train, y_train, train_sizes=np.linspace(0.0000001, 1.0, 500), cv=cv, scoring='accuracy')
# train_error = 1 - np.mean(train_scores1,axis=1)
# test_error = 1 - np.mean(valid_scores1, axis=1)
train_mean = np.mean(train_scores1,axis=1)
train_std = np.std(train_scores1,axis=1)
valid_mean = np.mean(valid_scores1, axis=1)
valid_std = np.std(valid_scores1, axis=1)

In [8]:
from matplotlib.pyplot import MultipleLocator
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(5,3))
plt.style.use('seaborn-notebook')
plt.plot(train_sizes1, train_mean, color='blue',  markersize=5, label='Training score Std=±{0:.4f}'.format(train_std[6:].mean()))
plt.fill_between(train_sizes1,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
plt.plot(train_sizes1, valid_mean, color='red',  markersize=5, label='Cross-validation score Std=±{0:.4f}'.format(valid_std[6:].mean()))
plt.fill_between(train_sizes1,valid_mean+train_std,valid_mean-train_std,alpha=0.15,color='red')
# plt.grid()
plt.rcParams['xtick.direction']='in'
plt.rcParams['ytick.direction']='in'
plt.xlabel('Number of training samples')
plt.ylabel('Accuracy')
plt.ylim([0.35,1.02])
# plt.legend(loc='lower right')
# plt.show()
plt.savefig('SVM-leaningCurv600dpi.jpg', dpi=600)

In [68]:
Vlid_y_prob = clf2.predict_proba(X_test)[:,1]
Train_y_prob = clf2.predict_proba(X_train)[:,1]

### 4、Metrics

In [None]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, cohen_kappa_score, mean_squared_error, mean_absolute_error
# print(confusion_matrix(y_train, Train_y_pred, labels=[0,1]))
tn, fp, fn, tp = confusion_matrix(y_train, Train_y_pred, labels=[0,1]).ravel()
# tn, fp, fn, tp = confusion_matrix(y_test, Vlid_y_pred, labels=[0,1]).ravel()
print(tn, fp, fn, tp)
print(accuracy_score(y_train, Train_y_pred))
# print(accuracy_score(y_test, Vlid_y_pred))
print(classification_report(y_train, Train_y_pred))
# print(classification_report(y_test, Vlid_y_pred))
tpr = tp/(tp+fn)
fpr = fp/(fp+tn)
print('TPR:%s'%tpr,' FPR:%s'%fpr)
cohen_kappa_score(y_train, Train_y_pred)
# cohen_kappa_score(y_test, Vlid_y_pred)
print("Kappa:%s"%cohen_kappa_score(y_train, Train_y_pred))
rmse_train = np.sqrt(mean_squared_error(y_train, Train_y_pred))
rmse_valid = np.sqrt(mean_squared_error(y_test, Vlid_y_pred))
print("Molde with pure tree, Train RMSE: {} Valid RMSE: {}".format(rmse_train, rmse_valid))
mse_train = mean_squared_error(y_train, Train_y_pred)
mse_valid = mean_squared_error(y_test, Vlid_y_pred)
print("Molde with pure tree, Train MSE: {} Valid MSE: {}".format(mse_train, mse_valid))
# mae_train = median_absolute_error(y_train, Train_y_pred)
# mae_valid = median_absolute_error(y_test, Vlid_y_pred)
# print("Molde with pure tree, Train MAE: {} Valid MAE: {}".format(mae_train, mae_valid))


In [9]:
from sklearn.metrics import roc_curve, auc
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')
# FPR, TPR, thresholds = roc_curve(y_train, Train_y_prob)
FPR, TPR, thresholds = roc_curve(y_test, Vlid_y_prob)
roc_auc = auc(FPR, TPR)
# FPR = FPR*100
# FPR = FPR*100
plt.figure(figsize=(5, 5), dpi=100)
plt.title('ROC')
plt.plot(FPR*100, TPR*100, 'b', label='AUC = %0.4f'%roc_auc)
plt.legend(loc='best')
plt.plot([0,100], [0,100],'r--')
plt.ylabel('TPR')
plt.xlabel('FPR')


### 5、Save GWO-SVM model and predict

In [284]:
# # 方法一--joblib
# from sklearn.externals import joblib
# import time
# timenow = time.strftime('%Y_%m_%d_%H:%M:%S', time.localtime(time.time()))
# # file = r'E:/研一文章/代码文件夹/model/GWO_RF_joblib' + '_' + mdhms
# file = 'model/GWO_SVM.pkl'
# joblib.dump(clf2, file)
joblib.load('model/GWO_SVM.pkl')

SVC(C=0.048, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.011, kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [285]:
data_random = pd.read_csv("RandomPoints.csv")
X_random = data_random.iloc[:, :]
data_random.head()
data_random.shape

(65000, 13)

In [286]:
x_pred = clf2.predict(X_random)
x_pred_prob = clf2.predict_proba(X_random)
a = pd.DataFrame(x_pred)
b = pd.DataFrame(x_pred_prob)
c = pd.concat([a, b],axis=1)
c.to_csv('65000pred-SVM.csv')