In [1]:
# conda install -c conda-forge tensorflow=1.15

In [2]:
import tensorflow as tf
import numpy as np
from pandas.io.parsers import read_csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
data = read_csv('price_data.csv', sep=',')

In [4]:
data = data[data.avgPrice < 6500]
data

Unnamed: 0,year,avgTemp,minTemp,maxTemp,rainFall,avgPrice
0,20100101,-4.9,-11.0,0.9,0.0,2123
1,20100102,-3.1,-5.5,5.5,0.8,2123
2,20100103,-2.9,-6.9,1.4,0.0,2123
3,20100104,-1.8,-5.1,2.2,5.9,2020
4,20100105,-5.2,-8.7,-1.8,0.7,2060
...,...,...,...,...,...,...
2917,20171227,-3.9,-8.0,0.7,0.0,2865
2918,20171228,-1.5,-6.9,3.7,0.0,2884
2919,20171229,2.9,-2.1,8.0,0.0,2901
2920,20171230,2.9,-1.6,7.1,0.6,2901


In [5]:
data['year_1'] = data['year'].apply(lambda x: str(x)[:4])
data['month'] = data['year'].apply(lambda x: str(x)[4:6])
data.head()

Unnamed: 0,year,avgTemp,minTemp,maxTemp,rainFall,avgPrice,year_1,month
0,20100101,-4.9,-11.0,0.9,0.0,2123,2010,1
1,20100102,-3.1,-5.5,5.5,0.8,2123,2010,1
2,20100103,-2.9,-6.9,1.4,0.0,2123,2010,1
3,20100104,-1.8,-5.1,2.2,5.9,2020,2010,1
4,20100105,-5.2,-8.7,-1.8,0.7,2060,2010,1


In [6]:
data.tail()

Unnamed: 0,year,avgTemp,minTemp,maxTemp,rainFall,avgPrice,year_1,month
2917,20171227,-3.9,-8.0,0.7,0.0,2865,2017,12
2918,20171228,-1.5,-6.9,3.7,0.0,2884,2017,12
2919,20171229,2.9,-2.1,8.0,0.0,2901,2017,12
2920,20171230,2.9,-1.6,7.1,0.6,2901,2017,12
2921,20171231,2.1,-2.0,5.8,0.4,2901,2017,12


In [9]:
means = data.groupby([data['year_1'],data['month']],as_index=False).mean()
means

Unnamed: 0,year_1,month,year,avgTemp,minTemp,maxTemp,rainFall,avgPrice
0,2010,01,20100116.0,-1.851613,-6.635484,3.577419,0.967742,2169.612903
1,2010,02,20100214.5,2.353571,-1.985714,7.203571,2.932143,3082.857143
2,2010,03,20100316.0,5.458065,1.125806,9.880645,2.874194,4414.193548
3,2010,04,20100415.5,9.863333,4.493333,15.456667,2.970000,5850.000000
4,2010,05,20100516.0,17.087097,11.538710,23.041935,4.003226,4329.000000
...,...,...,...,...,...,...,...,...
90,2017,08,20170810.5,26.045000,22.845000,30.280000,8.465000,5549.300000
91,2017,09,20170922.5,19.956250,14.893750,25.843750,1.687500,6241.437500
92,2017,10,20171016.0,15.312903,10.803226,20.967742,2.170968,4387.774194
93,2017,11,20171115.5,6.803333,1.260000,12.923333,0.413333,2580.566667


In [10]:
month_price = means[['year_1','month','avgTemp','minTemp','maxTemp','rainFall','year','avgPrice']]
month_price = month_price[month_price.columns[6:8]]
month_price

Unnamed: 0,year,avgPrice
0,20100116.0,2169.612903
1,20100214.5,3082.857143
2,20100316.0,4414.193548
3,20100415.5,5850.000000
4,20100516.0,4329.000000
...,...,...
90,20170810.5,5549.300000
91,20170922.5,6241.437500
92,20171016.0,4387.774194
93,20171115.5,2580.566667


In [11]:
month_price['year_2'] = month_price['year'].apply(lambda x: str(x)[0:6])
month_price['avgPrice_2'] = month_price['avgPrice']
month_price = month_price[month_price.columns[2:4]]
month_price

Unnamed: 0,year_2,avgPrice_2
0,201001,2169.612903
1,201002,3082.857143
2,201003,4414.193548
3,201004,5850.000000
4,201005,4329.000000
...,...,...
90,201708,5549.300000
91,201709,6241.437500
92,201710,4387.774194
93,201711,2580.566667


In [12]:
data['year_2'] = data['year'].apply(lambda x: str(x)[0:6])
data.head()

Unnamed: 0,year,avgTemp,minTemp,maxTemp,rainFall,avgPrice,year_1,month,year_2
0,20100101,-4.9,-11.0,0.9,0.0,2123,2010,1,201001
1,20100102,-3.1,-5.5,5.5,0.8,2123,2010,1,201001
2,20100103,-2.9,-6.9,1.4,0.0,2123,2010,1,201001
3,20100104,-1.8,-5.1,2.2,5.9,2020,2010,1,201001
4,20100105,-5.2,-8.7,-1.8,0.7,2060,2010,1,201001


In [13]:
import pandas as pd

In [14]:
last = pd.merge(data,month_price,how='outer')

In [15]:
last.shape

(2825, 10)

In [16]:
last_2 = last.drop(['avgPrice','year_1','month','year_2'],axis=1)
last_2.head()

Unnamed: 0,year,avgTemp,minTemp,maxTemp,rainFall,avgPrice_2
0,20100101,-4.9,-11.0,0.9,0.0,2169.612903
1,20100102,-3.1,-5.5,5.5,0.8,2169.612903
2,20100103,-2.9,-6.9,1.4,0.0,2169.612903
3,20100104,-1.8,-5.1,2.2,5.9,2169.612903
4,20100105,-5.2,-8.7,-1.8,0.7,2169.612903


In [17]:
minmax = MinMaxScaler()

In [18]:
xy = np.array(last_2, dtype=np.float32)
yx = np.array(last_2.iloc[:, [-1]], dtype=np.float32 )

In [19]:
xy = minmax.fit_transform(xy)

In [20]:
model = tf.global_variables_initializer();

In [21]:
# 4개의 변인을 입력을 받습니다.
x_data = xy[:, 1:-1]

In [22]:
# 가격 값을 입력 받습니다.
y_data = yx

In [23]:
X_train, X_test, y_train, y_test = train_test_split(x_data,y_data,random_state=0)

In [24]:
# 플레이스 홀더를 설정합니다.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])

In [25]:
# y = wX+b
W = tf.Variable(tf.random_normal([4, 1]), name="weight")
b = tf.Variable(tf.random_normal([1]), name="bias")

In [26]:
# 가설을 설정합니다. y = wX+b
# tensorflow 행렬곱 합수 matmul
hypothesis = tf.matmul(X, W) + b
# hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)

In [27]:
# 비용 함수를 설정합니다. (예측값 - 실제값)제곱의 평균
# reduce_mean은 특정 차원을 제거하고 평균을 구한다.
cost = tf.reduce_mean(tf.square(hypothesis - Y))
correct_prediction = tf.equal(tf.argmax(hypothesis,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

In [33]:
mae = tf.reduce_mean(tf.abs(hypothesis - Y))

In [34]:
# 최적화 함수를 설정합니다.
optimizer = tf.train.MomentumOptimizer(learning_rate=0.0005,momentum=0.9)
train = optimizer.minimize(cost)

In [35]:
# 세션을 생성합니다.
sess = tf.Session()

In [36]:
# 글로벌 변수를 초기화합니다.
sess.run(tf.global_variables_initializer())

In [None]:
# 학습을 수행합니다.
for step in range(100001):
    mae_, hypo_, _ = sess.run([mae, hypothesis, train], feed_dict={X: X_train, Y: y_train})
    if step % 1000 == 0:
        print("#", step, " 손실 비용: ", mae_)
        print("- 배추 가격: ", hypo_[0])
        
# 손실비용 대신 MAE(Mean absolute error) 사용해보기      

# 0  손실 비용:  3112.0505
- 배추 가격:  [0.6897178]
# 1000  손실 비용:  3102.1248
- 배추 가격:  [12.997787]
# 2000  손실 비용:  3092.1118
- 배추 가격:  [25.417404]
# 3000  손실 비용:  3082.0972
- 배추 가격:  [37.836937]
# 4000  손실 비용:  3072.0835
- 배추 가격:  [50.25524]
# 5000  손실 비용:  3062.071
- 배추 가격:  [62.67344]
# 6000  손실 비용:  3052.0574
- 배추 가격:  [75.09211]
# 7000  손실 비용:  3042.0437
- 배추 가격:  [87.51262]
# 8000  손실 비용:  3032.0273
- 배추 가격:  [99.93358]
# 9000  손실 비용:  3022.0132
- 배추 가격:  [112.354546]
# 10000  손실 비용:  3011.9973
- 배추 가격:  [124.77551]
# 11000  손실 비용:  3001.9822
- 배추 가격:  [137.19647]
# 12000  손실 비용:  2991.9678
- 배추 가격:  [149.616]
# 13000  손실 비용:  2981.9546
- 배추 가격:  [162.03384]
# 14000  손실 비용:  2971.944
- 배추 가격:  [174.44928]
# 15000  손실 비용:  2961.934
- 배추 가격:  [186.86469]
# 16000  손실 비용:  2951.9236
- 배추 가격:  [199.28012]
# 17000  손실 비용:  2941.9126
- 배추 가격:  [211.69554]
# 18000  손실 비용:  2931.903
- 배추 가격:  [224.11096]
# 19000  손실 비용:  2921.8918
- 배추 가격:  [236.5264]
# 20000  손실 비용:  2911.883
- 배추 가격:  [248.941

In [32]:
# 글로벌 변수를 초기화합니다.
sess.run(tf.global_variables_initializer())

In [33]:
correct_prediction = tf.equal(tf.argmax(hypothesis,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

In [41]:
for step in range(100001):
    mae_, hypo_,accuracy_, _ = sess.run([mae, hypothesis, accuracy, train], feed_dict={X: X_test, Y: y_test})
    if step % 1000 == 0:
        print("#", step, " 손실 비용: ", mae_)        
        print("- 배추 가격: ", hypo_[0])
        if step == 1000:
            print("@", '정확도', accuracy_)       

# 0  손실 비용:  3281.197
- 배추 가격:  [1.0603071]
# 1000  손실 비용:  1026.4772
- 배추 가격:  [2472.4446]
@ 정확도 1.0
# 2000  손실 비용:  1011.7071
- 배추 가격:  [2682.448]
# 3000  손실 비용:  1006.7701
- 배추 가격:  [2778.0671]
# 4000  손실 비용:  1004.34644
- 배추 가격:  [2821.3896]
# 5000  손실 비용:  1003.0683
- 배추 가격:  [2840.7886]
# 6000  손실 비용:  1002.2327
- 배추 가격:  [2849.229]
# 7000  손실 비용:  1001.7071
- 배추 가격:  [2852.6382]
# 8000  손실 비용:  1001.2931
- 배추 가격:  [2853.726]
# 9000  손실 비용:  1000.9403
- 배추 가격:  [2853.739]
# 10000  손실 비용:  1000.6297
- 배추 가격:  [2853.2458]
# 11000  손실 비용:  1000.3414
- 배추 가격:  [2852.5132]
# 12000  손실 비용:  1000.06714
- 배추 가격:  [2851.637]
# 13000  손실 비용:  999.80316
- 배추 가격:  [2850.7412]
# 14000  손실 비용:  999.54913
- 배추 가격:  [2849.769]
# 15000  손실 비용:  999.30457
- 배추 가격:  [2848.7676]
# 16000  손실 비용:  999.06866
- 배추 가격:  [2847.7468]
# 17000  손실 비용:  998.83966
- 배추 가격:  [2846.7285]
# 18000  손실 비용:  998.6188
- 배추 가격:  [2845.7148]
# 19000  손실 비용:  998.4038
- 배추 가격:  [2844.7068]
# 20000  손실 비용:  998.19684
- 배

In [36]:
# 학습된 모델을 저장합니다.
saver = tf.train.Saver()
save_path = saver.save(sess, "./saved.cpkt")
print('학습된 모델을 저장했습니다.')

학습된 모델을 저장했습니다.
