In [14]:
# 2022/04/21 ~
# 2022/

import FinanceDataReader as fdr
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score

#### 뉴스 : 우크라이나 전쟁 + a(발칸반도, 911, 중동전쟁)

#### .array.reshape(-1, 1)

In [3]:
# 원유 데이터 불러오기
WI_WTI = pd.read_csv('../csv/WTI_WI.csv')
NWI_WTI = pd.read_csv('../csv/WTI_noWI.csv')

# 날짜 데이터 정렬
WI_WTI.sort_values('날짜', ascending=True, inplace=True)
NWI_WTI.sort_values('날짜', ascending=True, inplace=True)

# 날짜 데이터 변환
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace(" ","")
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace("년","-")
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace("월","-")
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace("일","")
WI_WTI['날짜'] = pd.to_datetime(WI_WTI['날짜'])
WI_WTI['날짜'] = WI_WTI['날짜'].dt.strftime('%m/%d')

NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace(" ","")
NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace("년","-")
NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace("월","-")
NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace("일","")
NWI_WTI['날짜'] = pd.to_datetime(NWI_WTI['날짜'])
NWI_WTI['날짜'] = NWI_WTI['날짜'].dt.strftime('%m/%d')

# 컬럼명 변경
WI_WTI = WI_WTI.rename(columns={'날짜':'Date', '종가':'oil(2021-2022)'})
NWI_WTI = NWI_WTI.rename(columns={'날짜':'Date', '종가':'oil(2020-2021)'})

# 병합
war_oil = pd.merge(NWI_WTI, WI_WTI, how='inner', on='Date')
war_oil[['Date', 'oil(2020-2021)', 'oil(2021-2022)']]

# 시각화
fig = px.line(war_oil, x='Date', y=["oil(2020-2021)", "oil(2021-2022)"])
fig.show()


![war_oil](https://user-images.githubusercontent.com/96412661/164569910-ed548971-365e-435b-947c-ad5bbdc06e8b.png)

In [4]:
# 농산물 데이터 불러오기
# 271060 : KODEX 3대농산물선물(H) - 옥수수, 콩, 밀
# 전쟁의 영향력을 알기 위해 '러시아의 우크라이나 침공이 언급되는 시점 및 전년동기대비 가격'
beforewar = pd.read_csv('../csv/beforewar.csv')
afterwar = pd.read_csv('../csv/afterwar.csv')

# 날짜 데이터 변환
# beforewar['Date'] = beforewar['Date'].str[5:]
# afterwar['Date'] = afterwar['Date'].str[5:]

beforewar['Date'] = pd.to_datetime(beforewar['Date'])
afterwar['Date'] = pd.to_datetime(afterwar['Date'])

beforewar['Date'] = beforewar['Date'].dt.strftime('%m/%d')
afterwar['Date'] = afterwar['Date'].dt.strftime('%m/%d')

# 컬럼명 변경
beforewar = beforewar.rename(columns={'Close':'produce(2020-2021)'})
afterwar = afterwar.rename(columns={'Close':'produce(2021-2022)'})

# 병합
war_produce = pd.merge(beforewar, afterwar, how='inner', on='Date')
war_produce[['Date', 'produce(2020-2021)', 'produce(2021-2022)']]

# 시각화
fig = px.line(war_produce, x='Date', y=["produce(2020-2021)","produce(2021-2022)"])
fig.show()

![war_produce](https://user-images.githubusercontent.com/96412661/164569913-c9c0ae7d-50fa-4cea-9990-e64f60c76f92.png)

In [5]:
# 원유와 농산물 ETF 병합
# "oil(2020-2021)", "oil(2021-2022)", "produce(2020-2021)", "produce(2021-2022)"
# 전쟁 충격 이전
war_oil_produce = pd.merge(war_produce, war_oil, how='inner', on='Date')

# 학습 데이터와 테스트 데이터 분리
ex_oil_train, ex_oil_test, ex_produce_train, ex_produce_test = \
    train_test_split(war_oil_produce['oil(2020-2021)'], war_oil_produce['produce(2020-2021)'], test_size=0.2, random_state=107) 

# 선형 회귀 모델로 학습 수행
lr = LinearRegression()
lr.fit(ex_oil_train.array.reshape(-1, 1), ex_produce_train.array.reshape(-1, 1))

# 테스트 데이터로 예측
ex_produce_pred = lr.predict(ex_oil_test.array.reshape(-1, 1))

# MSE & RMSE & R2
mse = mean_squared_error(ex_produce_test, ex_produce_pred)
rmse = np.sqrt(mse)

print('MSE :{0:.3f}, RMSE :{1:.3f}'.format(mse, rmse))
print('Variance score:{0:.3f}'.format(r2_score(ex_produce_test, ex_produce_pred)))

print('절편 값 :',lr.intercept_)
print('회귀 계수값 :', np.round(lr.coef_))

MSE :68160.279, RMSE :261.075
Variance score:0.847
절편 값 : [4815.60979138]
회귀 계수값 : [[88.]]


In [6]:
# 전쟁 충격 이후
war_oil_produce = pd.merge(war_produce, war_oil, how='inner', on='Date')

# 학습 데이터와 테스트 데이터 분리
oil_train, oil_test, produce_train, produce_test = \
    train_test_split(war_oil_produce['oil(2021-2022)'], war_oil_produce['produce(2021-2022)'], test_size=0.2, random_state=107) 

# 선형 회귀 모델로 학습 수행
lr = LinearRegression()
lr.fit(oil_train.array.reshape(-1, 1), produce_train.array.reshape(-1, 1))

# 테스트 데이터로 예측
produce_pred = lr.predict(oil_test.array.reshape(-1, 1))

# MSE & RMSE & R2
mse = mean_squared_error(produce_test, produce_pred)
rmse = np.sqrt(mse)

print('MSE :{0:.3f}, RMSE :{1:.3f}'.format(mse, rmse))
print('Variance score:{0:.3f}'.format(r2_score(produce_test, produce_pred)))

print('절편 값 :',lr.intercept_)
print('회귀 계수값 :', np.round(lr.coef_))

MSE :359320.521, RMSE :599.434
Variance score:0.867
절편 값 : [4567.52579607]
회귀 계수값 : [[88.]]


Linear Regression with scikit-learn
---
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

df = px.data.tips()
X = df.total_bill.values.reshape(-1, 1)

model = LinearRegression()
model.fit(X, df.tip)

x_range = np.linspace(X.min(), X.max(), 100)
y_range = model.predict(x_range.reshape(-1, 1))

fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)
fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
fig.show()

In [7]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

war_oil_produce
X = war_oil_produce['oil(2020-2021)'].values.reshape(-1, 1)

model = LinearRegression()
model.fit(X, war_oil_produce['produce(2020-2021)'])

x_range = np.linspace(X.min(), X.max(), 100)
y_range = model.predict(x_range.reshape(-1, 1))

fig = px.scatter(war_oil_produce, x='oil(2020-2021)', y='produce(2020-2021)', opacity=0.65)
fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
fig.show()

![nwi_oil_produce](https://user-images.githubusercontent.com/96412661/164569904-eef771f5-9f0a-42d1-b0f0-d049429b003f.png)

In [8]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

war_oil_produce
X = war_oil_produce['oil(2021-2022)'].values.reshape(-1, 1)

model = LinearRegression()
model.fit(X, war_oil_produce['produce(2021-2022)'])

x_range = np.linspace(X.min(), X.max(), 100)
y_range = model.predict(x_range.reshape(-1, 1))

fig = px.scatter(war_oil_produce, x='oil(2021-2022)', y='produce(2021-2022)', opacity=0.65)
fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
fig.show()

![wi_oil_produce](https://user-images.githubusercontent.com/96412661/164569914-8933efb5-ca7b-4e94-a4f5-671712a4eaff.png)

In [9]:
# 합
WI_WTI = pd.read_csv('../csv/WTI_WI.csv')
NWI_WTI = pd.read_csv('../csv/WTI_noWI.csv')
beforewar = pd.read_csv('../csv/beforewar.csv')
afterwar = pd.read_csv('../csv/afterwar.csv')

# 날짜 데이터 정렬
WI_WTI.sort_values('날짜', ascending=True, inplace=True)
NWI_WTI.sort_values('날짜', ascending=True, inplace=True)

# 날짜 데이터 변환
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace(" ","")
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace("년","-")
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace("월","-")
WI_WTI['날짜'] = WI_WTI['날짜'].str.replace("일","")
WI_WTI['날짜'] = pd.to_datetime(WI_WTI['날짜'])
WI_WTI['날짜'] = WI_WTI['날짜'].dt.strftime('%y/%m/%d')

NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace(" ","")
NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace("년","-")
NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace("월","-")
NWI_WTI['날짜'] = NWI_WTI['날짜'].str.replace("일","")
NWI_WTI['날짜'] = pd.to_datetime(NWI_WTI['날짜'])
NWI_WTI['날짜'] = NWI_WTI['날짜'].dt.strftime('%y/%m/%d')

# 컬럼명 변경
WI_WTI = WI_WTI.rename(columns={'날짜':'Date', '종가':'oil(2021-2022)'})
NWI_WTI = NWI_WTI.rename(columns={'날짜':'Date', '종가':'oil(2020-2021)'})

# 병합
war_oil = pd.merge(NWI_WTI, WI_WTI, how='outer', on='Date')
war_oil[['Date', 'oil(2020-2021)', 'oil(2021-2022)']]

# oil '*200', '*150' 가중
war_oil['oil(2020-2021)'] = war_oil['oil(2020-2021)'] * 200
war_oil['oil(2021-2022)'] = war_oil['oil(2021-2022)'] * 150

# 날짜 데이터 변환
beforewar['Date'] = pd.to_datetime(beforewar['Date'])
afterwar['Date'] = pd.to_datetime(afterwar['Date'])

beforewar['Date'] = beforewar['Date'].dt.strftime('%y/%m/%d')
afterwar['Date'] = afterwar['Date'].dt.strftime('%y/%m/%d')

# 컬럼명 변경
beforewar = beforewar.rename(columns={'Close':'produce(2020-2021)'})
afterwar = afterwar.rename(columns={'Close':'produce(2021-2022)'})

# 병합
war_produce = pd.merge(beforewar, afterwar, how='outer', on='Date')
war_produce[['Date', 'produce(2020-2021)', 'produce(2021-2022)']]

# 병합2
war_oil_produce = pd.merge(war_produce, war_oil, how='inner', on='Date')

# 시각화
fig = px.line(war_oil_produce, x='Date', y=["oil(2020-2021)","oil(2021-2022)","produce(2020-2021)","produce(2021-2022)"])
fig.show()

![war_oil_produce](https://user-images.githubusercontent.com/96412661/164569906-31e29fe9-4105-49c9-b86b-672b25791d0a.png)