## 데이터 불러오기와 데이터셋 나누기

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE

In [3]:
data=pd.read_csv('상장비상장(2008_2013).csv',header=None,skiprows=1)

In [4]:
# 시작~끝 위치로 불러오기
X = data.iloc[:,2:37].values
y = data.iloc[:,[37]].values 

In [5]:
train_indices = []
test_indices = []

for index,element in data.iterrows():
    if element[1] == 2013:
        test_indices.append(index)
    else:
        train_indices.append(index)
        
train_indices=np.array(train_indices)
train_indices=np.int_(train_indices)
test_indices=np.array(test_indices)
test_indices=np.int_(test_indices)

X_train = X[train_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]  

In [6]:
# 모델설정
sm = SMOTE(ratio='auto', kind='regular')

# train데이터를 넣어 복제함
X_train, y_train = sm.fit_sample(X_train,list(y_train))

## SVM

In [7]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

In [8]:
param_distribs={'kernel': ['poly'], 'C': randint(low=0.001, high=100), 
                'gamma': randint(low=0.001, high=100)}

In [9]:
from sklearn.svm import SVC

In [10]:
random_search=RandomizedSearchCV(SVC(), param_distributions=param_distribs, cv=5)

In [11]:
#random_search.fit(X_train, y_train)

In [12]:
#print("Best Parameter: {}".format(random_search.best_params_))

In [13]:
#print("Best Cross-validity Score: {:.3f}".format(random_search.best_score_))

In [14]:
#print("Test set Score: {:.3f}".format(random_search.score(X_test, y_test)))

## 모델평가

In [15]:
svc=SVC(C=100, gamma=0.01, kernel='linear',class_weight = {1:1.5}, probability=True)

In [None]:
svc.fit(X_train, y_train)

In [None]:
pred_train=svc.predict(X_train)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_train=confusion_matrix(y_train, pred_train)

In [None]:
print("훈련데이터 오차행렬:\n", confusion_train)

In [None]:
pred_test=svc.predict(X_test)

In [None]:
pred_test_prob=svc.predict_proba(X_test)

In [None]:
decision=svc.decision_function(X_test)

In [None]:
confusion_test=confusion_matrix(y_test, pred_test)

In [None]:
print("테스트데이터 오차행렬:\n", confusion_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
cfreport=classification_report(y_test, pred_test)

In [None]:
print("분류예측 레포트:\n", cfreport)

In [None]:
from sklearn import metrics
import matplotlib.pyplot as plt 

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test, pred_test,pos_label=1)
roc_auc = metrics.auc(fpr, tpr)

In [None]:
plt.plot(fpr, tpr, label='ROC curve (area = %0.3f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')  # random predictions curve
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate or (1 - Specifity)')
plt.ylabel('True Positive Rate or (Sensitivity)')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")