In [1]:
import matplotlib.pyplot as plt
plt.rc('font', family='NanumBarunGothic') 
import tensorflow as tf
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from statsmodels.tsa.seasonal import STL
from datetime import timedelta

In [23]:
def get_vegetable_trade_dict(trade_path_args):
    return get_dict_from_csv(trade_path_args, '거래물량(톤)')

def get_vegetable_price_dict(price_path_args):
    return get_dict_from_csv(price_path_args, '평균가격(원/kg)')

def get_dict_from_csv(path_args, col_name):
    path = ''.join(path_args)

    target = pd.read_csv(path, encoding='utf-8').replace(np.NaN, 0)
    target['일자'] = pd.to_datetime(target['일자'], format='%Y%m%d')

    target_dict = {}
    # 오늘 날짜 얻기
    today = datetime.today().date()
    # 행을 반복하며 '일자' 확인
    for index, row in target.iterrows():
        date = row['일자'].date()
        target_dict[date] = row[col_name] if col_name == '평균가격(원/kg)' else row[col_name] * 1000

        # '일자'가 오늘 날짜와 같으면 반복 중지
        if row['일자'].date() == today:
            break
    return target_dict

## 크롤링한 데이터 가져오기

In [24]:
original_path = './vegetable_price/original_vegetable_csv/'
test_path = './vegetable_price/test.csv'
train_path = './vegetable_price/train.csv'

vegetable_names = ['청양', '시금치(일반)', '토마토(일반)', '파프리카(일반)', '깻잎(일반)', '미나리(일반)', '양파(일반)', '풋고추(전체)']

vegetable_price_dict = {x: None for x in vegetable_names}
vegetable_trade_dict = {x: None for x in vegetable_names}

for vegetable in vegetable_names:
    trade_path_args = [original_path, vegetable, " 거래물량 정보.csv"]
    price_path_args = [original_path, vegetable, " 평균가격 정보.csv"]

    vegetable_price_dict[vegetable] = get_vegetable_price_dict(price_path_args)
    vegetable_trade_dict[vegetable] = get_vegetable_trade_dict(trade_path_args)

## test 파일 전처리

In [25]:
test = pd.read_csv(test_path, encoding='ANSI').replace(np.NaN, 0)

# 유효한 날짜 형식을 가지는 행만 선택
test['date'] = pd.to_datetime(test['date'], errors='coerce')
test = test.dropna(subset=['date'])

weekdays = ["월요일", "화요일", "수요일", "목요일", "금요일", "토요일", "일요일"]

# 오늘의 날짜를 얻습니다.
today = datetime.today()


# DataFrame의 마지막 날짜를 얻습니다.
last_date = test['date'].iloc[-1] + timedelta(days=1)


while True:
    if last_date.date() > today.date():
        break
    li = [last_date, weekdays[last_date.weekday()]]
    for col in vegetable_trade_dict.keys():
        li.append(vegetable_trade_dict[col][last_date.date()])
        li.append(vegetable_price_dict[col][last_date.date()])
    test.loc[len(test)] = li
    last_date += timedelta(days=1)
test['date'] = pd.to_datetime(test['date']).dt.strftime('%Y-%m-%d')
test.to_csv(test_path, encoding='ANSI', index=False)


## train 파일 전처리

In [26]:
train_csv = pd.read_csv(train_path, encoding='ANSI')
train = pd.concat([train_csv, test], axis=0)

train.date = pd.to_datetime(train.date)
train = pd.concat([train, pd.get_dummies(train['요일'])], axis=1)
feature = train.columns[2:]
train = train.reset_index(drop=True)
train = train[1:].replace(0, np.NaN).fillna(method='ffill').fillna(0)

vegetable_data_frame = train.drop(columns=['금요일', '목요일', '수요일', '월요일', '일요일', '토요일', '화요일'])

df2 = vegetable_data_frame[0:-2]
vegetable_today = vegetable_data_frame[-1:]

## 머신러닝 실행!

In [27]:
from matplotlib import pyplot as plt
from tqdm import tqdm
class Nong1:
    def __init__(self, df, test):
        self.test = test
        self.df = pd.concat([df, self.test], axis=0)[1:].replace(0, np.NaN).fillna(method='ffill').fillna(0)

        self.df.date = pd.to_datetime(self.df.date)
        self.df = pd.concat([self.df, pd.get_dummies(self.df['요일'])], axis=1)
        self.feature = self.df.columns[2:]
        self.df = self.df.reset_index(drop=True)

    def set_feature(self,name):
        self.name = name
        self.name1 = name+"_가격(원/kg)"
        self.name2 = name+"_거래량(kg)"
        self.feature = [self.name1, self.name2, '금요일',   '목요일',   '수요일',   '월요일',   '일요일',   '토요일',   '화요일', 'resid']

    def set_target(self,day):
        if day == 1:
            self.df['target'] = self.df[self.name1].shift(-2)
        elif day == 2:
            self.df['target'] = self.df[self.name1].shift(-3)
        elif day == 3:
            self.df['target'] = self.df[self.name1].shift(-4)
        elif day == 4:
            self.df['target'] = self.df[self.name1].shift(-5)
        elif day == 5:
            self.df['target'] = self.df[self.name1].shift(-6)
        elif day == 6:
            self.df['target'] = self.df[self.name1].shift(-7)
        elif day == 7:
            self.df['target'] = self.df[self.name1].shift(-8)
        self.df['resid'] = 0
        stl = STL(self.df[['date', self.name1]].set_index('date'), period=12)
        res = stl.fit()
        self.df['resid'] = res.resid.values

    def set_model(self):
        self.scaler = MinMaxScaler()
        self.df[self.feature] = self.scaler.fit_transform(self.df[self.feature])
        self.df_learn = self.df[self.df['target'].notnull()]
        self.X = self.df_learn[self.feature].values.reshape(-1,1,len(self.feature))
        self.y = self.df_learn['target'].values.reshape(-1,1,1)

        with tf.device('/device:GPU:0'):
            self.model = Sequential()
            self.model.add(layers.Activation('relu'))
            self.model.add(tf.compat.v1.keras.layers.LSTM(100, input_shape=(21,len(self.feature)), return_sequences=True))
            self.model.add(Dropout(0.1))
            self.model.add(layers.Dense(30))
            self.model.add(Dropout(0.1))
            self.model.add(layers.Dense(1))
            self.model.compile(optimizer='adam', loss='mse')
            self.early_stopping = EarlyStopping(patience=30)

            # tqdm을 사용하여 진행 상태 표시
            # tqdm을 사용하여 진행 상태 표시
            with tqdm(total=1000, desc="Epochs", bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]") as pbar:
                self.model.fit(
                    self.X, self.y, epochs=1000, batch_size=32, validation_split=0.1,
                    callbacks=[
                        self.early_stopping,
                        tf.keras.callbacks.LambdaCallback(on_epoch_end=lambda batch, logs: pbar.update(1))
                    ],
                    verbose=0
                )
    def get_plot(self):
        self.y_pred = self.model.predict(self.X)
        self.MAE = mean_absolute_error(self.y.reshape(-1,1), self.y_pred.reshape(-1,1))
        plt.figure(figsize=(20, 10), dpi=300)
        plt.title(self.name + ' 가격 예측 결과'+   '   MAE : ' + str(self.MAE)[:7])
        plt.ylabel(self.name + ' 가격')
        plt.plot(np.array(self.y.reshape(-1,1)), alpha = 0.9, label = 'Real')
        plt.plot(self.model.predict(self.X).reshape(-1,1), alpha = 0.6, linestyle = "--", label = 'Predict')
        plt.legend()
        plt.show()

    def get_price(self):
        self.price = self.model.predict(self.df[self.feature].iloc[len(self.df)-1].values.reshape(-1,1,len(self.feature)))
        return self.price[0][0][0]

In [7]:
features = ['청양고추']
days = [1,2]
models = []
predictions = []
for day in days:
    for feature in features:
        my_nong1 = Nong1(df2, vegetable_today)
        my_nong1.set_feature(feature)
        my_nong1.set_target(day)
        my_nong1.set_model()
        predictions.append(my_nong1.get_price())
        models.append(my_nong1)

Epochs:  15%|█▌        | 152/1000 [02:08<11:58]




Epochs:  11%|█         | 109/1000 [01:32<12:33]








In [8]:
day_1_model = models[0]
day_2_model = models[1]
predict_1 = day_1_model.model.predict(day_1_model.df[day_1_model.feature].iloc[len(day_1_model.df)-1].values.reshape(-1,1,len(day_1_model.feature)))
print(predict_1, predictions[0])

[[[7452.083]]] 7452.083


In [22]:
day_2_model.df[day_2_model.feature].iloc[len(day_2_model.df)-1]

청양고추_가격(원/kg)    0.590047
청양고추_거래량(kg)     0.558527
금요일              0.000000
목요일              0.000000
수요일              0.000000
월요일              0.000000
일요일              0.000000
토요일              1.000000
화요일              0.000000
resid            0.440599
Name: 3795, dtype: float64

In [9]:
day_1_model.df

Unnamed: 0,date,요일,청양고추_거래량(kg),청양고추_가격(원/kg),시금치_거래량(kg),시금치_가격(원/kg),토마토_거래량(kg),토마토_가격(원/kg),파프리카_거래량(kg),파프리카_가격(원/kg),...,풋고추_가격(원/kg),금요일,목요일,수요일,월요일,일요일,토요일,화요일,target,resid
0,2013-01-03,목요일,0.371428,0.719194,82050.0,2820.0,30970.0,2350.0,4760.0,3840.0,...,5870.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,8500.0,0.457482
1,2013-01-04,금요일,0.371895,0.713270,97580.0,3020.0,48250.0,2260.0,4540.0,4290.0,...,6260.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,8500.0,0.446439
2,2013-01-05,토요일,0.358415,0.708531,103760.0,3190.0,50650.0,2230.0,4100.0,4510.0,...,6440.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,8550.0,0.438597
3,2013-01-06,일요일,0.358415,0.708531,103760.0,3190.0,50650.0,2230.0,4100.0,4510.0,...,6440.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,8550.0,0.432596
4,2013-01-07,월요일,0.356734,0.714455,117690.0,3280.0,58180.0,2210.0,4420.0,4510.0,...,6560.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,8590.0,0.435854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3791,2023-05-22,월요일,0.554044,0.604265,158930.0,2240.0,208530.0,2810.0,5990.0,5670.0,...,7050.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,7560.0,0.446557
3792,2023-05-23,화요일,0.554822,0.600711,158300.0,2240.0,208800.0,2800.0,5980.0,5670.0,...,7020.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,7530.0,0.446807
3793,2023-05-24,수요일,0.557562,0.597156,158000.0,2250.0,210640.0,2790.0,5980.0,5670.0,...,6990.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,7500.0,0.444306
3794,2023-05-25,목요일,0.558122,0.593602,157390.0,2250.0,211230.0,2790.0,5980.0,5660.0,...,6960.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,0.444758


In [9]:
days = [1,2,3,4,5,6,7]
features = ['청양고추', '시금치', '토마토', '파프리카', '깻잎', '미나리', '양파', '풋고추']

day1=[]
day2=[]
day3=[]
day4=[]
day5=[]
day6=[]
day7=[]

for day in days:
  print(day)
  for feature in features:
    my_nong1 = Nong1(df2, vegetable_today)
    my_nong1.set_feature(feature)
    my_nong1.set_target(day)
    print(feature)
    my_nong1.set_model()
    if day == 1:
      day1.append(my_nong1.get_price())
    if day == 2:
      day2.append(my_nong1.get_price())
    if day == 3:
      day3.append(my_nong1.get_price())
    if day == 4:
      day4.append(my_nong1.get_price())
    if day == 5:
      day5.append(my_nong1.get_price())
    if day == 6:
      day6.append(my_nong1.get_price())
    if day == 7:
      day7.append(my_nong1.get_price())

1
청양고추


Epochs:   1%|          | 8/1000 [00:09<19:42]


KeyboardInterrupt: 

In [214]:
day1, day2, day3, day4, day5, day6, day7

([4926.3555,
  2644.0574,
  2128.835,
  5032.6763,
  7036.887,
  3138.13,
  1010.06616,
  4731.6045],
 [5250.878,
  2574.186,
  2195.8887,
  5154.391,
  7239.4243,
  3274.9475,
  1027.6329,
  4976.937],
 [5425.44,
  2529.1123,
  2274.462,
  5189.2275,
  7345.473,
  3328.8271,
  1026.2704,
  5165.7744],
 [5341.48,
  2516.0156,
  2297.6765,
  5290.6025,
  7510.421,
  3376.2532,
  1034.9738,
  5282.3623],
 [5440.4814,
  2509.268,
  2301.5515,
  5319.11,
  7633.9487,
  3379.9626,
  1049.2926,
  5304.7446],
 [5436.737,
  2536.3132,
  2329.5776,
  5399.0415,
  7653.7764,
  3351.7886,
  1034.4648,
  5287.4946],
 [5720.405,
  2483.521,
  2341.559,
  5382.0063,
  7603.546,
  3384.8367,
  1045.0146,
  5369.2446])

In [215]:
c = [day1[0], day2[0], day3[0], day4[0], day5[0], day6[0], day7[0]]
s = [day1[1], day2[1], day3[1], day4[1], day5[1], day6[1], day7[1]]
t = [day1[2], day2[2], day3[2], day4[2], day5[2], day6[2], day7[2]]
p = [day1[3], day2[3], day3[3], day4[3], day5[3], day6[3], day7[3]]
g = [day1[4], day2[4], day3[4], day4[4], day5[4], day6[4], day7[4]]
m = [day1[5], day2[5], day3[5], day4[5], day5[5], day6[5], day7[5]]
y = [day1[6], day2[6], day3[6], day4[6], day5[6], day6[6], day7[6]]
u = [day1[7], day2[7], day3[7], day4[7], day5[7], day6[7], day7[7]]
models = dict(zip(['청양고추', '시금치', '토마토', '파프리카', '깻잎', '미나리', '양파', '풋고추'], [c, s, t, p, g, m, y, u]))
def method(model):
    if model in models.keys():
        return models[model]
print(models)
print(method('청양고추'))
print(method('시금치'))
print(method('토마토'))
print(method('파프리카'))
print(method('깻잎'))
print(method('미나리'))
print(method('양파'))
print(method('풋고추'))

{'청양고추': [4926.3555, 5250.878, 5425.44, 5341.48, 5440.4814, 5436.737, 5720.405], '시금치': [2644.0574, 2574.186, 2529.1123, 2516.0156, 2509.268, 2536.3132, 2483.521], '토마토': [2128.835, 2195.8887, 2274.462, 2297.6765, 2301.5515, 2329.5776, 2341.559], '파프리카': [5032.6763, 5154.391, 5189.2275, 5290.6025, 5319.11, 5399.0415, 5382.0063], '깻잎': [7036.887, 7239.4243, 7345.473, 7510.421, 7633.9487, 7653.7764, 7603.546], '미나리': [3138.13, 3274.9475, 3328.8271, 3376.2532, 3379.9626, 3351.7886, 3384.8367], '양파': [1010.06616, 1027.6329, 1026.2704, 1034.9738, 1049.2926, 1034.4648, 1045.0146], '풋고추': [4731.6045, 4976.937, 5165.7744, 5282.3623, 5304.7446, 5287.4946, 5369.2446]}
[4926.3555, 5250.878, 5425.44, 5341.48, 5440.4814, 5436.737, 5720.405]
[2644.0574, 2574.186, 2529.1123, 2516.0156, 2509.268, 2536.3132, 2483.521]
[2128.835, 2195.8887, 2274.462, 2297.6765, 2301.5515, 2329.5776, 2341.559]
[5032.6763, 5154.391, 5189.2275, 5290.6025, 5319.11, 5399.0415, 5382.0063]
[7036.887, 7239.4243, 7345.473, 7510.