In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier, Pool, cv
import catboost
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.metrics import log_loss



Dataset Info.

1. train.csv

train 데이터 : 2019년 01월 01일부터 2023년 03월 03일까지의 유통된 품목의 가격 데이터
item: 품목 코드
TG : 감귤
BC : 브로콜리
RD : 무
CR : 당근
CB : 양배추
corporation : 유통 법인 코드
법인 A부터 F 존재
location : 지역 코드
J : 제주도 제주시
S : 제주도 서귀포시
supply(kg) : 유통된 물량, kg 단위
price(원/kg) : 유통된 품목들의 kg 마다의 가격, 원 단위


2. international_trade.csv

관련 품목 수출입 정보
중량 단위 kg
금액 단위 천 달러


3. test.csv

test 데이터 : 2023년 03월 04일부터 2023년 03월 31일까지의 데이터


4. sample_submission.csv

제출을 위한 양식
2023년 03월 04일부터 2023년 03월 31일까지의 price(원/kg)을 예측
ID는 품목, 유통 법인, 지역 코드로 구성된 식별자
해당 ID에 맞춰 price(원/kg) 예측값을 answer 컬럼에 기입해야 함

In [2]:
directory = 'C:/Users\eunse/Downloads/open (1)/open'
os.chdir(directory)

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
trade = pd.read_csv('international_trade.csv')

In [25]:
trade = pd.read_csv('international_trade.csv')

In [7]:
test

Unnamed: 0,ID,timestamp,item,corporation,location
0,TG_A_J_20230304,2023-03-04,TG,A,J
1,TG_A_J_20230305,2023-03-05,TG,A,J
2,TG_A_J_20230306,2023-03-06,TG,A,J
3,TG_A_J_20230307,2023-03-07,TG,A,J
4,TG_A_J_20230308,2023-03-08,TG,A,J
...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J
1088,RD_F_J_20230328,2023-03-28,RD,F,J
1089,RD_F_J_20230329,2023-03-29,RD,F,J
1090,RD_F_J_20230330,2023-03-30,RD,F,J


In [20]:
train


Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg),holiday,y-m
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,1,2019-01
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,0,2019-01
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,0,2019-01
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,0,2019-01
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,1,2019-01
...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,0,2023-02
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,0,2023-02
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,1,2023-03
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,0,2023-03


In [4]:
# 'date'를 제외한 컬럼 저장하기
# 'timestamp' 컬럼을 datetime 타입으로 변경
train['timestamp'] = pd.to_datetime(train['timestamp'])
test['timestamp'] = pd.to_datetime(test['timestamp'])

train.head()

Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg)
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0


In [6]:
# 날짜 관련 feature 추가 하기

train['year'] = train['timestamp'].dt.year
train['month'] = train['timestamp'].dt.month
train['week'] = train['timestamp'].dt.isocalendar().week.astype(np.int32)
train['weekday']  = train['timestamp'].dt.weekday

# 날짜 관련 피처를 저장해 둔다.
features_date = ['month', 'week', 'weekday']
# 컬럼명 변경
train = train.rename(columns = {'price(원/kg)' : 'price', 'supply(kg)' : 'supply'})
train

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,year,month,week,weekday
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,1
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,1,2
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,1,3
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,1,4
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,1,5
...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,2023,2,9,0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,2023,2,9,1
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,2023,3,9,2
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,2023,3,9,3


In [8]:
import seaborn as sns
import matplotlib.pyplot as plt
from pytimekr import pytimekr

In [9]:
#주말 또는 공휴일이면 1반환 코드

year_2019 = pytimekr.holidays(year=2019)
year_2020 = pytimekr.holidays(year=2020)
year_2021 = pytimekr.holidays(year=2021)
year_2022 = pytimekr.holidays(year=2022)
year_2023 = pytimekr.holidays(year=2023)



def holidays(x):
    if x.weekday() in range(5,8):
        return 1
    if x.year == 2019  and x in year_2019 :
        return 1 
    elif x.year == 2020 and x in year_2020:
        return 1 
    elif x.year == 2021 and x in year_2021 :
        return 1 
    elif x.year == 2022 and x in year_2022 :
        return 1
    elif x.year == 2023 and x in year_2023:
        return 1
    else:
        return 0

In [10]:
import warnings
warnings.filterwarnings('ignore')
train['holiday'] = train['timestamp'].apply(holidays)
train

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,year,month,week,weekday,holiday
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,1,1
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,1,2,0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,1,3,0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,1,4,0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,1,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,2023,2,9,0,0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,2023,2,9,1,0
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,2023,3,9,2,1
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,2023,3,9,3,0


In [11]:
train['y-m']=train['timestamp'].dt.strftime('%Y-%m')
train['y-m']

0        2019-01
1        2019-01
2        2019-01
3        2019-01
4        2019-01
          ...   
59392    2023-02
59393    2023-02
59394    2023-03
59395    2023-03
59396    2023-03
Name: y-m, Length: 59397, dtype: object

In [26]:
trade = trade[trade.품목명.str.contains('감귤|브로콜리|무|당근|양배추')]
trade

Unnamed: 0,기간,품목명,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
3,2019-01,꽃양배추와 브로콜리(broccoli),160,1,638913,563,-562
4,2019-01,방울다다기 양배추,0,0,7580,38,-38
5,2019-01,양배추,184650,94,395802,90,4
8,2019-01,당근,23150,22,7466150,2955,-2934
12,2019-01,무화과,2627,23,94529,464,-441
...,...,...,...,...,...,...,...
1250,2023-02,양배추,13188,13,377456,104,-91
1253,2023-02,당근,22510,20,9260020,3758,-3737
1254,2023-02,순무,4000,4,2,0,4
1258,2023-02,무화과,1319,14,104566,454,-440


In [27]:
trade = trade[trade['품목명']!='무화과']
trade = trade[trade['품목명']!='방울다다기 양배추']
trade = trade[trade['품목명']!='순무']
trade.loc[trade['품목명']=='꽃양배추와 브로콜리(broccoli)','품목명'] = '브로콜리'



In [30]:
# 컬럼명 변경
trade = trade.rename(columns = {'품목명' : 'item'})
trade

Unnamed: 0,기간,item,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
3,2019-01,브로콜리,160,1,638913,563,-562
5,2019-01,양배추,184650,94,395802,90,4
8,2019-01,당근,23150,22,7466150,2955,-2934
17,2019-01,감귤,58368,172,0,0,172
28,2019-02,브로콜리,780,1,396870,399,-398
...,...,...,...,...,...,...,...
1236,2023-01,감귤,81509,269,0,0,269
1248,2023-02,브로콜리,24,0,332640,352,-352
1250,2023-02,양배추,13188,13,377456,104,-91
1253,2023-02,당근,22510,20,9260020,3758,-3737


In [31]:
trade

Unnamed: 0,기간,item,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
3,2019-01,브로콜리,160,1,638913,563,-562
5,2019-01,양배추,184650,94,395802,90,4
8,2019-01,당근,23150,22,7466150,2955,-2934
17,2019-01,감귤,58368,172,0,0,172
28,2019-02,브로콜리,780,1,396870,399,-398
...,...,...,...,...,...,...,...
1236,2023-01,감귤,81509,269,0,0,269
1248,2023-02,브로콜리,24,0,332640,352,-352
1250,2023-02,양배추,13188,13,377456,104,-91
1253,2023-02,당근,22510,20,9260020,3758,-3737


In [32]:
fruits_dict = {'감귤':'TG' ,'브로콜리':'BC' ,'당근':'CR' ,'양배추':'CB'}
fruits_dict


{'감귤': 'TG', '브로콜리': 'BC', '당근': 'CR', '양배추': 'CB'}

In [33]:
trade['item'] = trade['item'].map(fruits_dict)
trade

Unnamed: 0,기간,item,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
3,2019-01,BC,160,1,638913,563,-562
5,2019-01,CB,184650,94,395802,90,4
8,2019-01,CR,23150,22,7466150,2955,-2934
17,2019-01,TG,58368,172,0,0,172
28,2019-02,BC,780,1,396870,399,-398
...,...,...,...,...,...,...,...
1236,2023-01,TG,81509,269,0,0,269
1248,2023-02,BC,24,0,332640,352,-352
1250,2023-02,CB,13188,13,377456,104,-91
1253,2023-02,CR,22510,20,9260020,3758,-3737


In [34]:
train_not_RD = train[train['item']!='RD']
train_not_RD

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,year,month,week,weekday,holiday,y-m
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,1,1,2019-01
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,1,2,0,2019-01
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,1,3,0,2019-01
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,1,4,0,2019-01
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,1,5,1,2019-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57869,CB_F_J_20230227,2023-02-27,CB,F,J,232312.0,652.0,2023,2,9,0,0,2023-02
57870,CB_F_J_20230228,2023-02-28,CB,F,J,224072.0,672.0,2023,2,9,1,0,2023-02
57871,CB_F_J_20230301,2023-03-01,CB,F,J,273800.0,621.0,2023,3,9,2,1,2023-03
57872,CB_F_J_20230302,2023-03-02,CB,F,J,238992.0,653.0,2023,3,9,3,0,2023-03


In [35]:
df_merged_not_RD = pd.merge(train_not_RD, trade, left_on=['item', 'y-m'], right_on=['item', '기간'])
df_merged_not_RD


Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,year,month,week,weekday,holiday,y-m,기간,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,1,1,2019-01,2019-01,58368,172,0,0,172
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,1,2,0,2019-01,2019-01,58368,172,0,0,172
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,1,3,0,2019-01,2019-01,58368,172,0,0,172
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,1,4,0,2019-01,2019-01,58368,172,0,0,172
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,1,5,1,2019-01,2019-01,58368,172,0,0,172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47115,BC_E_S_20230224,2023-02-24,BC,E,S,2104.0,2025.0,2023,2,8,4,0,2023-02,2023-02,24,0,332640,352,-352
47116,BC_E_S_20230225,2023-02-25,BC,E,S,1032.0,2353.0,2023,2,8,5,1,2023-02,2023-02,24,0,332640,352,-352
47117,BC_E_S_20230226,2023-02-26,BC,E,S,0.0,0.0,2023,2,8,6,1,2023-02,2023-02,24,0,332640,352,-352
47118,BC_E_S_20230227,2023-02-27,BC,E,S,2200.0,2488.0,2023,2,9,0,0,2023-02,2023-02,24,0,332640,352,-352


## LGBM 모델링

In [42]:
#원-핫 인코딩
df_merged_not_RD = pd.get_dummies(df_merged_not_RD, columns=['item', 'corporation', 'location'])


KeyError: "None of [Index(['item', 'corporation', 'location'], dtype='object')] are in the [columns]"

In [43]:
df_merged_not_RD

Unnamed: 0,ID,timestamp,supply,price,year,month,week,weekday,holiday,y-m,...,item_CR,item_TG,corporation_A,corporation_B,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S
0,TG_A_J_20190101,2019-01-01,0.0,0.0,2019,1,1,1,1,2019-01,...,0,1,1,0,0,0,0,0,1,0
1,TG_A_J_20190102,2019-01-02,0.0,0.0,2019,1,1,2,0,2019-01,...,0,1,1,0,0,0,0,0,1,0
2,TG_A_J_20190103,2019-01-03,60601.0,1728.0,2019,1,1,3,0,2019-01,...,0,1,1,0,0,0,0,0,1,0
3,TG_A_J_20190104,2019-01-04,25000.0,1408.0,2019,1,1,4,0,2019-01,...,0,1,1,0,0,0,0,0,1,0
4,TG_A_J_20190105,2019-01-05,32352.0,1250.0,2019,1,1,5,1,2019-01,...,0,1,1,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47115,BC_E_S_20230224,2023-02-24,2104.0,2025.0,2023,2,8,4,0,2023-02,...,0,0,0,0,0,0,1,0,0,1
47116,BC_E_S_20230225,2023-02-25,1032.0,2353.0,2023,2,8,5,1,2023-02,...,0,0,0,0,0,0,1,0,0,1
47117,BC_E_S_20230226,2023-02-26,0.0,0.0,2023,2,8,6,1,2023-02,...,0,0,0,0,0,0,1,0,0,1
47118,BC_E_S_20230227,2023-02-27,2200.0,2488.0,2023,2,9,0,0,2023-02,...,0,0,0,0,0,0,1,0,0,1


In [44]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 데이터 전처리 및 특성 선택
# (데이터프레임 df에서 필요한 열만 선택하여 X로, 예측하고자 하는 'target' 열을 y로 설정)
#X = df[['supply', 'year','month', '수출 중량', '수입 중량', '수입 금액', '무역수지']]
#X= dr_merged[
#y = dr_merged['price']


# 데이터프레임에서 필요한 열만 선택
#selected_columns = ['supply','year', 'month', 'week' , 'weekday', 'holiday', 'price']
#df = dr_merged[selected_columns]

# 특성과 타겟 데이터 분리
columns_to_drop = ['price', 'ID', 'timestamp', '기간', '수출 중량', '수출 금액', '수입 중량', '수입 금액','y-m']
X = df_merged_not_RD.drop(columns=columns_to_drop)
#X = dr_merged['supply',
y = df_merged_not_RD['price']

# 학습 데이터와 테스트 데이터로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LightGBM 데이터셋 생성
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# LightGBM 모델 설정 및 학습
params = {
    'objective': 'regression',  # 회귀 문제 설정
    'metric': 'rmse',  # 평가 지표 설정 (Root Mean Squared Error)
    'boosting_type': 'gbdt',  # 부스팅 알고리즘 설정
    'num_leaves': 31,  # 트리의 최대 잎 수
    'learning_rate': 0.05,  # 학습 속도
    'feature_fraction': 0.9,  # 각 트리마다 사용되는 특성의 비율
}

num_round = 100  # 학습 라운드 수 (트리의 개수)

# LightGBM 모델 학습
model = lgb.train(params, train_data, num_round, valid_sets=[train_data, test_data])

# 예측
y_pred = model.predict(test_x, num_iteration=model.best_iteration)

# 평가 지표 계산
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'Root Mean Squared Error: {rmse}')

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 37696, number of used features: 19
[LightGBM] [Info] Start training from score 1376.659274
[1]	training's rmse: 2109.62	valid_1's rmse: 2083.29
[2]	training's rmse: 2013.8	valid_1's rmse: 1988.48
[3]	training's rmse: 1923.35	valid_1's rmse: 1898.8
[4]	training's rmse: 1837.62	valid_1's rmse: 1813.87
[5]	training's rmse: 1756.45	valid_1's rmse: 1734.05
[6]	training's rmse: 1679.82	valid_1's rmse: 1659.06
[7]	training's rmse: 1607.17	valid_1's rmse: 1587.47
[8]	training's rmse: 1538.79	valid_1's rmse: 1520.45
[9]	training's rmse: 1487.27	valid_1's rmse: 1469.66
[10]	training's rmse: 1425.68	valid_1's rmse: 1408.84
[11]	training's rmse: 1367.25	valid_1's rmse: 1351.36
[12]	training's rmse: 1324.44	valid_1's rmse: 1309.04
[13]	training's rmse: 1284.53	valid_1's rmse: 1269.67
[14]	

### test 데이터

In [49]:
test['year'] = test['timestamp'].dt.year
test['month'] = test['timestamp'].dt.month
test['week'] = test['timestamp'].dt.isocalendar().week.astype(np.int32)
test['weekday']  = test['timestamp'].dt.weekday
test_x = test.drop(columns='timestamp')

# 날짜 관련 피처를 저장해 둔다.
test_features_date = ['month', 'week', 'weekday']
test_x

Unnamed: 0,ID,item,corporation,location,year,month,week,weekday
0,TG_A_J_20230304,TG,A,J,2023,3,9,5
1,TG_A_J_20230305,TG,A,J,2023,3,9,6
2,TG_A_J_20230306,TG,A,J,2023,3,10,0
3,TG_A_J_20230307,TG,A,J,2023,3,10,1
4,TG_A_J_20230308,TG,A,J,2023,3,10,2
...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,RD,F,J,2023,3,13,0
1088,RD_F_J_20230328,RD,F,J,2023,3,13,1
1089,RD_F_J_20230329,RD,F,J,2023,3,13,2
1090,RD_F_J_20230330,RD,F,J,2023,3,13,3


In [50]:
# 예측
y_pred = model.predict(test_x, num_iteration=model.best_iteration)

# 평가 지표 계산
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'Root Mean Squared Error: {rmse}')

ValueError: DataFrame.dtypes for data must be int, float or bool.
Did not expect the data types in the following fields: ID, item, corporation, location

## Seq2Seq 모델링

In [49]:
pip install --upgrade pip


Collecting pip
  Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)
     ---------------------------------------- 2.1/2.1 MB 4.5 MB/s eta 0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.2.2
    Uninstalling pip-22.2.2:
      Successfully uninstalled pip-22.2.2
Successfully installed pip-23.3.1


In [50]:
pip install tensorflow

Collecting tensorflow==2.5.0
  Downloading tensorflow-2.5.0-cp39-cp39-win_amd64.whl (422.6 MB)
     -------------------------------------- 422.6/422.6 MB 1.5 MB/s eta 0:00:00
Collecting typing-extensions~=3.7.4
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Collecting six~=1.15.0
  Downloading six-1.15.0-py2.py3-none-any.whl (10 kB)
Collecting keras-nightly~=2.5.0.dev
  Downloading keras_nightly-2.5.0.dev2021032900-py2.py3-none-any.whl (1.2 MB)
     ---------------------------------------- 1.2/1.2 MB 3.8 MB/s eta 0:00:00
Collecting flatbuffers~=1.12.0
  Downloading flatbuffers-1.12-py2.py3-none-any.whl (15 kB)
Collecting gast==0.4.0
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting absl-py~=0.10
  Downloading absl_py-0.15.0-py3-none-any.whl (132 kB)
     -------------------------------------- 132.0/132.0 kB 7.6 MB/s eta 0:00:00
Collecting astunparse~=1.6.3
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting h5py~=3.1.0
  Downloading h5py

ERROR: Could not install packages due to an OSError: [WinError 5] 액세스가 거부되었습니다: 'C:\\Users\\eunse\\anaconda3\\Lib\\site-packages\\~umpy\\.libs\\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll'
Consider using the `--user` option or check the permissions.



^C
Note: you may need to restart the kernel to use updated packages.


In [55]:
pip install --upgrade numpy

Note: you may need to restart the kernel to use updated packages.


In [53]:
pip install --user tensorflow




In [58]:
pip install --user tensorflow

Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.15.0->tensorflow)
  Using cached flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Using cached tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Collecting urllib3<1.27,>=1.21.1 (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Downloading urllib3-1.26.18-py2.py3-none-any.whl.metadata (48 kB)
     ---------------------------------------- 48.9/48.9 kB 1.2 MB/s eta 0:00:00
Collecting pyasn1<0.5.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
     ---------------------------------------- 77.1/77.1 kB 4.5 MB/s eta 0:00:00
Using cached flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)
Using cached tensorboard_data_ser

RuntimeError: module compiled against API version 0x10 but this version of numpy is 0xe

SystemError: initialization of _pywrap_checkpoint_reader raised unreported exception

In [60]:
pip install --user --upgrade numpy




In [61]:
pip install --user --upgrade --force-reinstall tensorflow

Collecting tensorflow
  Using cached tensorflow-2.15.0-cp39-cp39-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow)
  Using cached tensorflow_intel-2.15.0-cp39-cp39-win_amd64.whl.metadata (5.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.15.0->tensorflow)
  Using cached absl_py-2.0.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.15.0->tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.15.0->tensorflow)
  Using cached flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.15.0->tensorflow)
  Using cached gast-0.5.4-py3-none-any.whl (19 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.15.0->tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl (57 kB)
Collecting h5py>=2.9.0 (from tensorflow-intel==2.15.0->tensorflow)
  Us

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
daal4py 2021.6.0 requires daal==2021.4.0, which is not installed.
spyder 5.2.2 requires pyqt5<5.13, which is not installed.
spyder 5.2.2 requires pyqtwebengine<5.13, which is not installed.
botocore 1.27.28 requires urllib3<1.27,>=1.25.4, but you have urllib3 2.1.0 which is incompatible.
conda-repo-cli 1.0.20 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.5.0 which is incompatible.
conda-repo-cli 1.0.20 requires requests==2.28.1, but you have requests 2.31.0 which is incompatible.
numba 0.55.1 requires numpy<1.22,>=1.18, but you have numpy 1.26.2 which is incompatible.
openxlab 0.0.28 requires requests~=2.28.2, but you have requests 2.31.0 which is incompatible.
openxlab 0.0.28 requires setuptools~=60.2.0, but you hav

Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Using cached requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)
Collecting importlib-metadata>=4.4 (from markdown>=2.6.8->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Using cached importlib_metadata-6.8.0-py3-none-any.whl.metadata (5.1 kB)
Collecting charset-normalizer<4,>=2 (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Using cached charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Using cached idna-3.4-py3-none-any.whl (61 kB)
Collecting urllib3<3,>=1.21.1 (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow)
  Using cached urllib3-2.1.0-py3-none-any.whl.metadata (6.4 kB)
Collecting certifi>=2017.4.17 (from requests<3,

In [1]:
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense






In [3]:
price_data = df_merged['price']

NameError: name 'df_merged' is not defined

In [None]:


# 데이터 정규화
scaler = MinMaxScaler(feature_range=(0, 1))
price_data_scaled = scaler.fit_transform(price_data)

# 데이터 전처리
def create_sequences(data, seq_length):
    sequences = []
    target = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        label = data[i+seq_length]
        sequences.append(seq)
        target.append(label)
    return np.array(sequences), np.array(target)

seq_length = 10  # 시퀀스 길이 설정
X, y = create_sequences(price_data_scaled, seq_length)

# Seq2Seq 모델 생성
def seq2seq_model(input_shape, hidden_units):
    inputs = Input(shape=input_shape)
    encoder = LSTM(hidden_units, return_state=True)
    _, state_h, state_c = encoder(inputs)
    encoder_states = [state_h, state_c]

    decoder_inputs = Input(shape=(1, 1))
    decoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(1, activation='linear')
    decoder_outputs = decoder_dense(decoder_outputs)

    model = Model([inputs, decoder_inputs], decoder_outputs)
    return model

model = seq2seq_model((seq_length, 1), hidden_units=50)
model.compile(optimizer='adam', loss='mean_squared_error')

# 모델 학습
model.fit([X, X[:, -1:, :]], y, epochs=50, batch_size=16, verbose=2)

# 미래 예측을 위한 시퀀스 생성
future_seq = price_data_scaled[-seq_length:].reshape((1, seq_length, 1))

# 다음 시점의 가격 예측
future_pred = model.predict([future_seq, np.zeros((1, 1, 1))])

# 역정규화하여 실제 가격으로 변환
future_pred_price = scaler.inverse_transform(future_pred.reshape(-1, 1))

print("다음 시점의 예측 가격:", future_pred_price)