# **Stock-Year**

In [1]:
import numpy as np
import pandas as pd

### Visualization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### DB 연결
# !pip install pymysql
import pymysql

### DB에 저장
import sqlalchemy
from sqlalchemy import create_engine

### 실시간 주식가격 데이터
# !pip install finance-datareader
import FinanceDataReader as fdr

### 텍스트 분석
## KoNLPy
# 1) JAVA 설치, 2) Python 버전과 맞는 JPype1-py3 설치, 3) !pip install konlpy, 4) 설치 경로에서 jvm.py 파일 코드 67번 줄 주석 처리 
from konlpy.tag import Okt
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import re
## FastText
# !pip install gensim
# !pip install fasttext
import fasttext
import fasttext.util
# Facebook 한국어 Embedding 모델 다운로드 → 한 번만 설치하면 됨
fasttext.util.download_model('ko', if_exists='ignore')
# 유사도 계산
from gensim import models

### 모델 학습 및 평가
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.metrics import precision_score, recall_score, f1_score

### 모델 저장 및 로드
import joblib

### 기타
import datetime
from collections import Counter
import sys
import warnings
warnings.filterwarnings('ignore')

## **Read Data**

### **① Samsung**

In [2]:
# 2018년 Samsung
samsung_maeil_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_maeil_samsung.csv')
samsung_asia_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_asia_samsung.csv')
samsung_2018 = pd.concat([samsung_maeil_2018, samsung_asia_2018], axis=0)
samsung_2018.to_csv('../../../../Code/Data/Test/Stock-Year/2018_samsung.csv', index=False)

# 2019년 Samsung
samsung_maeil_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_maeil_samsung.csv')
samsung_asia_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_asia_samsung.csv')
samsung_2019 = pd.concat([samsung_maeil_2019, samsung_asia_2019], axis=0)
samsung_2019.to_csv('../../../../Code/Data/Test/Stock-Year/2019_samsung.csv', index=False)

# 2020년 Samsung
samsung_maeil_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_maeil_samsung.csv')
samsung_asia_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_asia_samsung.csv')
samsung_2020 = pd.concat([samsung_maeil_2020, samsung_asia_2020], axis=0)
samsung_2020.to_csv('../../../../Code/Data/Test/Stock-Year/2020_samsung.csv', index=False)

# 2021년 Samsung
samsung_maeil_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_maeil_samsung.csv')
samsung_asia_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_asia_samsung.csv')
samsung_2021 = pd.concat([samsung_maeil_2021, samsung_asia_2021], axis=0)
samsung_2021.to_csv('../../../../Code/Data/Test/Stock-Year/2021_samsung.csv', index=False)

### **② Hyundai**

In [3]:
# 2018년 Hyundai
hyundai_maeil_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_maeil_hyundai.csv')
hyundai_asia_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_asia_hyundai.csv')
hyundai_2018 = pd.concat([hyundai_maeil_2018, hyundai_asia_2018], axis=0)
hyundai_2018.to_csv('../../../../Code/Data/Test/Stock-Year/2018_hyundai.csv', index=False)

# 2019년 Hyundai
hyundai_maeil_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_maeil_hyundai.csv')
hyundai_asia_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_asia_hyundai.csv')
hyundai_2019 = pd.concat([hyundai_maeil_2019, hyundai_asia_2019], axis=0)
hyundai_2019.to_csv('../../../../Code/Data/Test/Stock-Year/2019_hyundai.csv', index=False)

# 2020년 Hyundai
hyundai_maeil_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_maeil_hyundai.csv')
hyundai_asia_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_asia_hyundai.csv')
hyundai_2020 = pd.concat([hyundai_maeil_2020, hyundai_asia_2020], axis=0)
hyundai_2020.to_csv('../../../../Code/Data/Test/Stock-Year/2020_hyundai.csv', index=False)

# 2021년 Hyundai
hyundai_maeil_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_maeil_hyundai.csv')
hyundai_asia_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_asia_hyundai.csv')
hyundai_2021 = pd.concat([hyundai_maeil_2021, hyundai_asia_2021], axis=0)
hyundai_2021.to_csv('../../../../Code/Data/Test/Stock-Year/2021_hyundai.csv', index=False)

### **③ LG**

In [4]:
# 2018년 LG
lg_maeil_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_maeil_lg.csv')
lg_asia_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_asia_lg.csv')
lg_2018 = pd.concat([lg_maeil_2018, lg_asia_2018], axis=0)
lg_2018.to_csv('../../../../Code/Data/Test/Stock-Year/2018_lg.csv', index=False)

# 2019년 LG
lg_maeil_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_maeil_lg.csv')
lg_asia_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_asia_lg.csv')
lg_2019 = pd.concat([lg_maeil_2019, lg_asia_2019], axis=0)
lg_2019.to_csv('../../../../Code/Data/Test/Stock-Year/2019_lg.csv', index=False)

# 2020년 LG
lg_maeil_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_maeil_lg.csv')
lg_asia_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_asia_lg.csv')
lg_2020 = pd.concat([lg_maeil_2020, lg_asia_2020], axis=0)
lg_2020.to_csv('../../../../Code/Data/Test/Stock-Year/2020_lg.csv', index=False)

# 2021년 LG
lg_maeil_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_maeil_lg.csv')
lg_asia_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_asia_lg.csv')
lg_2021 = pd.concat([lg_maeil_2021, lg_asia_2021], axis=0)
lg_2021.to_csv('../../../../Code/Data/Test/Stock-Year/2021_lg.csv', index=False)

### **④ SK**

In [5]:
# 2018년 SK
sk_maeil_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_maeil_sk.csv')
sk_asia_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_asia_sk.csv')
sk_2018 = pd.concat([sk_maeil_2018, sk_asia_2018], axis=0)
sk_2018.to_csv('../../../../Code/Data/Test/Stock-Year/2018_sk.csv', index=False)

# 2019년 SK
sk_maeil_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_maeil_sk.csv')
sk_asia_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_asia_sk.csv')
sk_2019 = pd.concat([sk_maeil_2019, sk_asia_2019], axis=0)
sk_2019.to_csv('../../../../Code/Data/Test/Stock-Year/2019_sk.csv', index=False)

# 2020년 SK
sk_maeil_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_maeil_sk.csv')
sk_asia_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_asia_sk.csv')
sk_2020 = pd.concat([sk_maeil_2020, sk_asia_2020], axis=0)
sk_2020.to_csv('../../../../Code/Data/Test/Stock-Year/2020_sk.csv', index=False)

# 2021년 SK
sk_maeil_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_maeil_sk.csv')
sk_asia_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_asia_sk.csv')
sk_2021 = pd.concat([sk_maeil_2021, sk_asia_2021], axis=0)
sk_2021.to_csv('../../../../Code/Data/Test/Stock-Year/2021_sk.csv', index=False)

### **⑤ Celltrion**

In [6]:
# 2018년 Celltrion
celltrion_maeil_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_maeil_celltrion.csv')
celltrion_asia_2018 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2018_asia_celltrion.csv')
celltrion_2018 = pd.concat([celltrion_maeil_2018, celltrion_asia_2018], axis=0)
celltrion_2018.to_csv('../../../../Code/Data/Test/Stock-Year/2018_celltrion.csv', index=False)

# 2019년 Celltrion
celltrion_maeil_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_maeil_celltrion.csv')
celltrion_asia_2019 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2019_asia_celltrion.csv')
celltrion_2019 = pd.concat([celltrion_maeil_2019, celltrion_asia_2019], axis=0)
celltrion_2019.to_csv('../../../../Code/Data/Test/Stock-Year/2019_celltrion.csv', index=False)

# 2020년 Celltrion
celltrion_maeil_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_maeil_celltrion.csv')
celltrion_asia_2020 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2020_asia_celltrion.csv')
celltrion_2020 = pd.concat([celltrion_maeil_2020, celltrion_asia_2020], axis=0)
celltrion_2020.to_csv('../../../../Code/Data/Test/Stock-Year/2020_celltrion.csv', index=False)

# 2021년 Celltrion
celltrion_maeil_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_maeil_celltrion.csv')
celltrion_asia_2021 = pd.read_csv('../../../../Code/Data/Test/Stock-Year/2021_asia_celltrion.csv')
celltrion_2021 = pd.concat([celltrion_maeil_2021, celltrion_asia_2021], axis=0)
celltrion_2021.to_csv('../../../../Code/Data/Test/Stock-Year/2021_celltrion.csv', index=False)