In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
import random
random.seed(1)

data0=pd.read_csv('2330.csv')
data0_train=data0.iloc[2:286,:]
data0_test=data0.iloc[287:306,:]

train_X=data0_train[['LA_ADR','MA10','J','DIF12-26']]
train_Y=data0_train['LA']#上涨下跌

test_X=data0_test[['LA_ADR','MA10','J','DIF12-26']]
test_Y=data0_test['LA']

#KNN
knn = KNeighborsClassifier(n_neighbors=5) 
knn.fit(train_X,train_Y)
pred_Y_KNN = knn.predict(test_X)
pred_acc_KNN = accuracy_score(test_Y,pred_Y_KNN)
loss_Y_KNN = knn.predict(train_X)
loss_acc_KNN = accuracy_score(train_Y,loss_Y_KNN)

#決策樹
tree= DecisionTreeClassifier(criterion='gini',max_depth=5)
tree.fit(train_X, train_Y)
pred_Y_tree = tree.predict(test_X)
pred_acc_tree = accuracy_score(test_Y,pred_Y_tree)
loss_Y_tree = tree.predict(train_X)
loss_acc_tree = accuracy_score(train_Y,loss_Y_tree)

#貝氏分類器
bayes = GaussianNB()                          
bayes.fit(train_X, train_Y)
pred_Y_bayes = bayes.predict(test_X)
pred_acc_bayes = accuracy_score(test_Y,pred_Y_bayes)
loss_Y_bayes = bayes.predict(train_X)
loss_acc_bayes = accuracy_score(train_Y,loss_Y_bayes)

#Logistic Regression
logist = LogisticRegression()
logist.fit(train_X, train_Y)
pred_Y_logist = logist.predict(test_X)
pred_acc_logist = accuracy_score(test_Y,pred_Y_logist)
loss_Y_logist = logist.predict(train_X)
loss_acc_logist = accuracy_score(train_Y,loss_Y_logist)

#隨機森林
forest = RandomForestClassifier(n_estimators=500)
forest.fit(train_X,train_Y)
pred_Y_forest = forest.predict(test_X)
pred_acc_forest = accuracy_score(test_Y,pred_Y_forest)
loss_Y_forest = logist.predict(train_X)
loss_acc_forest = accuracy_score(train_Y,loss_Y_forest)


#SVM(linear)
param_list1 = {'C': [0.1, 0.25, 0.5 ,0.75, 1]}
cv_svm1 = GridSearchCV(SVC(kernel='linear'), param_list1)
cv_svm1.fit(train_X, train_Y)
best_C_svm1=cv_svm1.best_estimator_.C
svm1 = SVC(kernel='linear',C=best_C_svm1)
svm1.fit(train_X,train_Y)
pred_Y_svm1 = svm1.predict(test_X)
pred_acc_svm1 = accuracy_score(test_Y,pred_Y_svm1)
loss_Y_svm1 = svm1.predict(train_X)
loss_acc_svm1 = accuracy_score(train_Y,loss_Y_svm1)

#SVM(nonlinear)
param_list2 = {'C': [0.1, 0.5, 1, 5, 10, 15, 20, 50, 100],'gamma': [0.01, 0.1 ,0.25, 0.5,0.75, 1, 3, 5, 10, 20]}
cv_svm2 = GridSearchCV(SVC(kernel='rbf'), param_list2)
cv_svm2.fit(train_X, train_Y)
best_C_svm2=cv_svm2.best_estimator_.C
best_gamma_svm2=cv_svm2.best_estimator_.gamma
svm2 = SVC(kernel='rbf',C=best_C_svm2, gamma=best_gamma_svm2)
svm2.fit(train_X,train_Y)
pred_Y_svm2 = svm2.predict(test_X)
pred_acc_svm2 = accuracy_score(test_Y,pred_Y_svm2)
loss_Y_svm2 = svm2.predict(train_X)
loss_acc_svm2 = accuracy_score(train_Y,loss_Y_svm2)

pred_res=[pred_acc_tree,pred_acc_bayes,pred_acc_svm1,pred_acc_svm2,pred_acc_logist,pred_acc_forest,pred_acc_KNN]
loss_res=[loss_acc_tree,loss_acc_bayes,loss_acc_svm1,loss_acc_svm2,loss_acc_logist,loss_acc_forest,loss_acc_KNN]
print(pred_res)
print(loss_res)
'''
[0.5789473684210527, 0.5789473684210527, 0.5789473684210527, 0.5789473684210527, 0.5789473684210527, 0.631578947368421, 0.7368421052631579]
[0.7464788732394366, 0.6514084507042254, 0.6514084507042254, 0.6619718309859155, 0.6514084507042254, 0.6514084507042254, 0.7288732394366197]

'''


[0.5789473684210527, 0.5789473684210527, 0.5789473684210527, 0.5789473684210527, 0.5789473684210527, 0.631578947368421, 0.7368421052631579]
[0.7464788732394366, 0.6514084507042254, 0.6514084507042254, 0.6619718309859155, 0.6514084507042254, 0.6514084507042254, 0.7288732394366197]
