In [18]:
import os, glob
import numpy as np
import pandas as pd
from scipy import stats
from datetime import date, time, datetime
from datetime import timedelta
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
import keras
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.utils import np_utils
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,  accuracy_score


def data_select(fn_weather):
    r_data = pd.read_csv(fn_weather, encoding="utf-8")
    col_fix =  ['T.Max', 'T.Min', 'Precp','Temperature', 'RH', 'StnPres', 'WS', 'WSGust']   
    # rm "/"
    for col in col_fix:
        old = r_data[col]
        new = []
        for i in range(len(old)):
            try:
                new.append(float(old[i]))
                tmp = float(old[i])
            except ValueError:
                new.append(tmp)
        r_data[col] = new
    d_tmp = r_data['T.Max'] - r_data['T.Min']
    r_data["d_tmp"]= d_tmp
    
    # extract_date
    d_data = r_data["Date"]
    drop_c =["Date",'T.Max', 'T.Min']
    r_data = r_data.drop(columns=drop_c)

    def to_zscore2(df):
        col_x = df.columns.to_list()
        mean = df.mean(axis=0)
        std = df.std(axis=0)
        for i in range(len(col_x)):
            df[col_x[i]]=(df[col_x[i]]-mean[i])/std[i]
        return  df , (mean, std)
    
    r_data, recordz = to_zscore2(r_data)
    
    return r_data, recordz , d_data




def data_shift(r_data, d_data, fn_price ,shift1, shift2=0):
    # shift and conbinde Data~ D-28 (D0~D-28)
    dfs =[]
    
    shifts = shift1
    
    if shift2==0:
        df_s = r_data.copy()
    else:
        df_s = r_data.copy()
        df_s = df_s.shift(periods=shift2)
        d_data = d_data[:(-shift2)]
        
#     add shift base
    arr_all = np.array(df_s)
    
    for i in range(1,shifts+1):
        tp = np.array(df_s.shift(periods=i))
        arr_all = np.concatenate((arr_all, tp), axis=1)
    
    df_all = pd.DataFrame(arr_all)
    
    df_all = df_all.dropna()
    df_all = df_all.reset_index()
    df_all = df_all.drop(columns="index")
    d_data = d_data[:(-shifts)]
    
    r_data = df_all

    d_date = d_data[0]
    
    def trans_to_y_w(d_date):
        year = int(d_date.split("-")[0])
        d_day = date(year,int(d_date.split("-")[1]), int(d_date.split("-")[2]))- date(year, 1, 1)
        d_w = 1+ (d_day.days // 7)
        if d_w ==53:
            d_w = 52
        result =  str(year) + "_" + str(d_w)

        if d_day.days % 7 ==6:
            return result
        else:
            return None
    
    d_new = []
    for i in range(len(d_data)):
        d_new.append(trans_to_y_w(d_data[i]))
    r_data["y_w"] = d_new
    r_data = r_data.dropna()
    
    df_pbw = pd.read_csv(fn_price, encoding="utf-8")
    
    drop_c = ["year", "week"]
    df_pbw = df_pbw.drop(columns=drop_c)

    df_join = df_pbw.join(r_data.set_index("y_w"), on="y_w")
    df_join = df_join.dropna()
    
    d_t_dummy = ["market"]
    date_info = pd.DataFrame()
    for col in d_t_dummy:
        date_info[col] = df_join[col]
        dummy = pd.get_dummies(date_info[col])
        df_join = pd.concat([df_join, dummy], axis=1)
    
    drop_c = ["market", "w_avg", "w_sale", "y_w","price_diff"]
    y_raw = np.array(df_join["price_diff"])
#     print("y_price_diff describe:")
#     print(df_join["price_diff"].describe())
#     print("======")
    df_join = df_join.drop(columns=drop_c)
    
    def y_to_class(v):
        t = 0
        y_class_range = []
        for q in range(1,12):
            y_class_range.append(round((q*0.1-0.6),4))

        for i in range(len(y_class_range)):
            if (v >= y_class_range[i]):
                t = i+1
        return int(t)

    y_class = []

    for i in range(len(y_raw)):
        y_class.append(y_to_class(y_raw[i]))
#     print("freq:",np.bincount(y_class))
    
    
    
    x = np.array(df_join)
    y = y_class
    input_d = x.shape[1]
    
    
    return x, y, input_d


def model_run(x, y, input_d, shifts, epochs=60):
    
    out_class_count = len(np.bincount(y))
#     need_new_model_cut
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
    model = Sequential()
    layer_0 = Dense(units =input_d//4, input_dim = input_d,
                    kernel_initializer = "random_normal", activation = "relu")
    model.add(layer_0)
    model.add(Dropout(0.25))
    layer_1 = Dense(units =shifts,kernel_initializer = "random_normal", activation = "relu")
    model.add(layer_1)
    model.add(Dropout(0.25))
    layer_out = Dense(units = out_class_count,kernel_initializer = "random_normal", 
                      activation = "softmax")
    model.add(layer_out)
#     model.summary()
    model.compile(loss="categorical_crossentropy",optimizer = "adam", metrics = ['accuracy'])
    train_history = model.fit(x = x_train, y = np_utils.to_categorical(y_train), 
                              validation_split = 0.1, epochs =epochs, verbose = 0)
    
#     plt.plot(train_history.history["loss"])
#     plt.plot(train_history.history["val_loss"])
#     plt.title("Loss Graph")
#     plt.legend(['loss', 'val_loss'], loc="upper left")
    pre = model.predict_classes(x_test)
    acc = round(accuracy_score(y_test, pre)*100,2)
#     print("acc:", round(accuracy_score(y_test, pre)*100,2),"%")

    return acc, pd.DataFrame(confusion_matrix(y_test, pre)), model
    

In [28]:
w_list= glob.glob("../result/merge_weather/*.csv")
p_list = glob.glob("../result/flower_price_byweek/*")
select_t = [(1,0),(3,2),(2,1),(2,3),(2,4)]


def run_model_build(fn_weather,fn_price,shift2=0):
    rs_data = []
    test_w = fn_weather.split("\\")[-1].split(".")[0] 
    test_p = fn_price.split("\\")[-1].split("_")[0]
    rs_data.append(test_w)
    rs_data.append(test_p)
    print(rs_data)
    r_data, recordz, d_data = data_selectv2(fn_weather)
    rs_data.append(recordz)
    tmp_acc =[]
    acc_cf = 0
    shift1 = 28
    epochs = 80
#     for shift1 in range(28,42+1,7):
    print("data_shift_require=", shift1)
#         for epochs in range(80,120+1,20):
    x, y, input_d = data_shift(r_data, d_data, fn_price , shift1, shift2)
    acc, df_conf, model = model_run(x, y, input_d, shift1, epochs)
    tmp_acc.append(acc)
    if acc > acc_cf:
        acc_cf = acc
        best_model = model
        set_info = (shift1, shift2, epochs)
#                 print("epochs = ", epochs,"acc = ",acc)
    rs_data.append([acc_cf, set_info])        
    rs_data.append(best_model)
    return rs_data

shift2 = 0
# for st, flower in select_t:
st, flower = select_t[0]
fn_weather = w_list[st]
fn_price = p_list[flower]

result = run_model_build(fn_weather,fn_price, shift2)

station, flower, recordz, bestmodel_info, bestmodel = result

fn_model = "./model_test0519/"+ station + "_" + flower + "_shift"+ str(bestmodel_info[1][1]) + \
           "_" + str(bestmodel_info[1][0]) + "_epo" + str(bestmodel_info[1][2]) + ".h5"
fn_rec = "./model_test0519/"+ station + ".csv"

print(bestmodel_info[0])
bestmodel.save(fn_model)
df_rec = pd.DataFrame(recordz)
df_rec["info"] = ["mean", "std"]
df_rec.to_csv(fn_rec, index=False, encoding="utf-8")
print("datasaved")
# df_rec

['C0F9L0_后里', 'Anthurium']
data_shift_require= 28
93.98
datasaved


95.11
datasaved


0

In [15]:
def pred_weather_input(fn_weather, station_info):
    r_data = pd.read_csv(fn_weather, encoding="utf-8")
    col_fix =  ['T.Max', 'T.Min', 'Precp','Temperature', 'RH', 'StnPres', 'WS', 'WSGust']   
    # rm "/"
    for col in col_fix:
        old = r_data[col]
        new = []
        for i in range(len(old)):
            try:
                new.append(float(old[i]))
                tmp = float(old[i])
            except ValueError:
                new.append(tmp)
        r_data[col] = new
    d_tmp = r_data['T.Max'] - r_data['T.Min']
    r_data["d_tmp"]= d_tmp
    
    # extract_date
    d_data = r_data["Date"]
    drop_c =["Date",'T.Max', 'T.Min']
    r_data = r_data.drop(columns=drop_c)

    def to_zscore2(df):
        col_x = df.columns.to_list()
        mean = df.mean(axis=0)
        std = df.std(axis=0)
        for i in range(len(col_x)):
            df[col_x[i]]=(df[col_x[i]]-mean[i])/std[i]
        return  df , (mean, std)
    r_data, recordz = to_zscore2(r_data)
    
    r_data 
    d_data
    pre_week, pre_flower, pre_market = station_info
    
    shift2 = 7 * (pre_week-1)



# get model name 
    model_list = glob.glob("./model_test0519//*"+pre_flower + "*")
    for m in model_list:
        keyw = "shift" + str((pre_week-1)*7)
        if keyw in models:
            model_fn = models
# get shift 1 
    shift1 = int(model_fn.split("\\")[-1].split("_")[3].replace("shift",""))

def data_shift(r_data, d_data, fn_price shift1, shift2=0):
    # shift and conbinde Data~ D-28 (D0~D-28)
    dfs =[]
    
    shifts = shift1
    
    if shift2==0:
        df_s = r_data.copy()
    else:
        df_s = r_data.copy()
        df_s = df_s.shift(periods=shift2)
        d_data = d_data[:(-shift2)]
        
#     add shift base
    arr_all = np.array(df_s)
    
    for i in range(1,shifts+1):
        tp = np.array(df_s.shift(periods=i))
        arr_all = np.concatenate((arr_all, tp), axis=1)
    
    df_all = pd.DataFrame(arr_all)
    
    df_all = df_all.dropna()
    df_all = df_all.reset_index()
    df_all = df_all.drop(columns="index")
    d_data = d_data[:(-shifts)]
    
    r_data = df_all

    d_date = d_data[0]
    
    def trans_to_y_w(d_date):
        year = int(d_date.split("-")[0])
        d_day = date(year,int(d_date.split("-")[1]), int(d_date.split("-")[2]))- date(year, 1, 1)
        d_w = 1+ (d_day.days // 7)
        if d_w ==53:
            d_w = 52
        result =  str(year) + "_" + str(d_w)

        if d_day.days % 7 ==6:
            return result
        else:
            return None
    
    d_new = []
    for i in range(len(d_data)):
        d_new.append(trans_to_y_w(d_data[i]))
    r_data["y_w"] = d_new
    r_data = r_data.dropna()
    
    df_pbw = pd.read_csv(fn_price, encoding="utf-8")
    
    drop_c = ["year", "week"]
    df_pbw = df_pbw.drop(columns=drop_c)

    df_join = df_pbw.join(r_data.set_index("y_w"), on="y_w")
    df_join = df_join.dropna()
    
    d_t_dummy = ["market"]
    date_info = pd.DataFrame()
    for col in d_t_dummy:
        date_info[col] = df_join[col]
        dummy = pd.get_dummies(date_info[col])
        df_join = pd.concat([df_join, dummy], axis=1)
    
    drop_c = ["market", "w_avg", "w_sale", "y_w","price_diff"]
    y_raw = np.array(df_join["price_diff"])
#     print("y_price_diff describe:")
#     print(df_join["price_diff"].describe())
#     print("======")
    df_join = df_join.drop(columns=drop_c)
    
    def y_to_class(v):
        t = 0
        y_class_range = []
        for q in range(1,12):
            y_class_range.append(round((q*0.1-0.6),4))

        for i in range(len(y_class_range)):
            if (v >= y_class_range[i]):
                t = i+1
        return int(t)

    y_class = []

    for i in range(len(y_raw)):
        y_class.append(y_to_class(y_raw[i]))
#     print("freq:",np.bincount(y_class))
    
    
    
    x = np.array(df_join)
    y = y_class
    input_d = x.shape[1]
    
    
    return x, y, input_d


(Temperature      24.226559
 Precp             3.687634
 RH               78.808065
 StnPres        1009.065968
 WS                0.967742
 WSGust            6.073172
 d_tmp             9.274247
 dtype: float64, Temperature     4.999712
 Precp          14.968040
 RH              8.197747
 StnPres         5.919220
 WS              0.368651
 WSGust          2.198037
 d_tmp           2.548539
 dtype: float64)

In [12]:
# def model_pred():
opt_week = [1, 2]
opt_flower = ['Anthurium', 'Chrysanthemum', 'OrientalLily', 'Eustoma', 'Rose']
opt_market = ["台北", "台中", "台南", "高雄", "彰化"]
model_select_info =  (opt_week[0], opt_flower[0], opt_market[0])
# import weather data 
r_data, recordz , d_data = data_selectv2(fn_weather,fn_price)
# shift and merger to  model input len

# model predit


96.35


(1, 'Anthurium', '台北')