In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# DB 연결
import pymysql

# 실시간 주식가격 데이터
import FinanceDataReader as fdr

# 텍스트 분석
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from konlpy.tag import Okt
from gensim import models

from wordcloud import WordCloud

# 모델 학습 및 평가
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

# 기타
import datetime
from collections import Counter
import sys
import warnings
warnings.filterwarnings('ignore')

In [5]:
# 주식 데이터

# 서버가 열려있을 때
db = pymysql.connect(user='root',
                     passwd='1234',
                     host='3.35.70.166',
                     db='proj',
                     charset='utf8')

cursor = db.cursor(pymysql.cursors.DictCursor)

sql = "select * from financedata"
cursor.execute(sql)
result = cursor.fetchall()

# DataFrame으로 변경
finance = pd.DataFrame(result)
# db 닫기 --> 안하면 메모리 잡아먹음
db.close()

In [7]:
# 코스피 데이터

# 서버가 열려있을 때
db = pymysql.connect(user='root',
                     passwd='1234',
                     host='3.35.70.166',
                     db='proj',
                     charset='utf8')

cursor = db.cursor(pymysql.cursors.DictCursor)

sql = "select * from kospi"
cursor.execute(sql)
result = cursor.fetchall()

# DataFrame으로 변경
kospi = pd.DataFrame(result)
# db 닫기 --> 안하면 메모리 잡아먹음
db.close()

In [6]:
finance

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,st_cd
0,2018-01-02,150500,153000,149000,149500,731763,-0.041667,005380
1,2018-01-03,149000,151000,149000,150500,450874,0.006689,005380
2,2018-01-04,150500,151000,146000,146500,931558,-0.026578,005380
3,2018-01-05,146000,149000,145500,149000,463695,0.017065,005380
4,2018-01-08,148500,151000,147500,151000,482619,0.013423,005380
...,...,...,...,...,...,...,...,...
4590,2021-09-24,209500,210500,206000,207000,409939,-0.007194,005380
4591,2021-09-24,77600,77700,77100,77300,11846941,-0.001292,005930
4592,2021-09-24,106000,106500,104000,104000,2184456,-0.014218,000660
4593,2021-09-24,770000,770000,746000,761000,398670,0.001316,051910


In [9]:
skhynix = finance[finance['st_cd'] == '000660']
hyundai = finance[finance['st_cd'] == '005380']
samsung = finance[finance['st_cd'] == '005930']
lgchem = finance[finance['st_cd'] == '051910']
celltrion = finance[finance['st_cd'] == '068270']

In [12]:
len(kospi), len(skhynix), len(hyundai), len(samsung), len(lgchem), len(celltrion)

(919, 919, 919, 919, 919, 919)

In [35]:
skhynix.tail(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,st_cd
2731,2021-09-08,104000,107500,103500,106000,2891267,0.014354,660
2732,2021-09-09,105000,106000,103000,103000,3233011,-0.028302,660
3650,2021-09-10,103500,105500,103000,105000,1999369,0.019418,660
3651,2021-09-13,105000,107000,104000,106500,1923344,0.014286,660
3652,2021-09-14,107500,108500,106500,107500,2463248,0.00939,660
4572,2021-09-15,107500,107500,105500,107500,2797024,0.0,660
4577,2021-09-16,107000,107500,104000,104000,3584461,-0.032558,660
4582,2021-09-17,104500,107500,104000,107000,3282623,0.028846,660
4587,2021-09-23,105500,107000,105000,105500,2474131,-0.014019,660
4592,2021-09-24,106000,106500,104000,104000,2184456,-0.014218,660


In [14]:
kospi

Unnamed: 0,Date,Close,Open,High,Low,Volume,Change
0,2018-01-02,2480,2475,2481,2466,262200000,0.0049
1,2018-01-03,2486,2485,2493,2482,331100000,0.0027
2,2018-01-04,2466,2502,2502,2466,333840000,-0.0080
3,2018-01-05,2498,2477,2498,2476,308770000,0.0126
4,2018-01-08,2513,2511,2515,2494,311430000,0.0063
...,...,...,...,...,...,...,...
914,2021-09-15,3153,3147,3164,3139,605120,0.0015
915,2021-09-16,3130,3165,3168,3127,604460,-0.0074
916,2021-09-17,3141,3128,3142,3113,575320,0.0033
917,2021-09-23,3128,3124,3132,3108,612360,-0.0041


In [30]:
change = pd.concat([kospi.Date, 
                    kospi.Change.reset_index(drop=True), 
                    skhynix.Change.reset_index(drop=True), 
                    hyundai.Change.reset_index(drop=True), 
                    samsung.Change.reset_index(drop=True), 
                    lgchem.Change.reset_index(drop=True), 
                    celltrion.Change.reset_index(drop=True)], axis=1)
change.columns = ['Date', 'kospi', 'skhynix', 'hyundai', 'samsung', 'lgchem', 'celltrion']

In [31]:
change

Unnamed: 0,Date,kospi,skhynix,hyundai,samsung,lgchem,celltrion
0,2018-01-02,0.0049,0.001307,-0.041667,0.001177,0.014815,0.021709
1,2018-01-03,0.0027,0.014360,0.006689,0.011760,-0.003650,0.092075
2,2018-01-04,-0.0080,-0.007722,-0.026578,-0.010461,-0.034188,0.013381
3,2018-01-05,0.0126,0.028534,0.017065,0.020360,0.024020,0.067598
4,2018-01-08,0.0063,-0.013871,0.013423,-0.001919,0.048148,0.133381
...,...,...,...,...,...,...,...
914,2021-09-15,0.0015,0.000000,0.002392,0.005222,-0.021798,0.036122
915,2021-09-16,-0.0074,-0.032558,-0.007160,-0.011688,-0.002786,-0.020184
916,2021-09-17,0.0033,0.028846,0.004808,0.014455,-0.020950,0.031835
917,2021-09-23,-0.0041,-0.014019,-0.002392,0.002591,0.084166,0.032668


In [48]:
no_sk = sum(abs(change.kospi) > abs(change.skhynix))
no_sk, no_sk / 919 * 100

(228, 24.809575625680086)

In [49]:
no_hd = sum(abs(change.kospi) > abs(change.hyundai))
no_hd, no_hd / 919 * 100

(268, 29.16213275299238)

In [50]:
no_ss = sum(abs(change.kospi) > abs(change.samsung))
no_ss, no_ss / 919 * 100

(277, 30.141458106637646)

In [51]:
no_lg = sum(abs(change.kospi) > abs(change.lgchem))
no_lg, no_lg / 919 * 100

(244, 26.550598476605003)

In [52]:
no_ce = sum(abs(change.kospi) > abs(change.celltrion))
no_ce, no_ce / 919 * 100

(233, 25.35364526659412)