In [2]:
import os
import logging
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
#os.environ["CUDA_VISIBLE_DEVICES"]="0,2,5,7"

from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_custom_objects
import pandas as pd
import numpy as np
from tensorflow.keras.backend import sigmoid
import tensorflow as tf
import tensorflow.keras.backend as K
from multiprocessing.dummy import Pool as ThreadPool

def swish(x, beta = 1):
    # https://www.geeksforgeeks.org/ml-swish-function-by-google-in-keras/
    return (x * sigmoid(beta * x))
def leaky_relu(x):
    return tf.nn.leaky_relu(x, alpha=0.25)
def get_weighted_loss(weights):
    def weighted_loss(y_true, y_pred):
        return K.mean((weights[:,0]**(1-y_true))*(weights[:,1]**(y_true))*K.binary_crossentropy(y_true, y_pred), axis=-1)
    return weighted_loss
def weighted_loss(y_true, y_pred):
    return K.mean((weights[:,0]**(1-y_true))*(weights[:,1]**(y_true))*K.binary_crossentropy(y_true, y_pred), axis=-1)
def image_convert(a):
    new = np.zeros((30, 30))
    for i in range(30):
        if int(a[i]*100)==0:
            d = 29
        elif int(a[i]*100)==100:
            d = 0
        else:
            d = 30-int(30-(1-a[i])*30)
            if d==30:
                d = 29
        new[d, i] = 1
    return new
def parsing_groupby(data_train_, ind):
    df = data_train_.groupby([ind,ind+'-P']).size().rename('num').reset_index(drop = False)
    df.columns = ['before', 'after', 'num']
    df['col'] = ind
    return df

get_custom_objects().update({'swish': swish})
get_custom_objects().update({'leaky_relu': leaky_relu})
get_custom_objects().update({"get_weighted_loss": get_weighted_loss})
get_custom_objects().update({"weighted_loss": weighted_loss})

In [2]:
banana_X_train = pd.read_pickle("banana_X_train.pkl")
banana_X_val = pd.read_pickle("banana_X_val.pkl")
banana_Y_train = pd.read_pickle("banana_Y_train.pkl")
banana_Y_val = pd.read_pickle("banana_Y_val.pkl")
cols_name = [[y+'-'+str(x).rjust(2,"0") for x in range(1,31)] for y in ['RSV','上價中位數','下價中位數','中價中位數','交易量','平均價','雨量']]
cols_name_2 = [x for x in banana_X_train.columns.tolist() if '-' not in x]

train_X_num = banana_X_train[cols_name_2[1:]].values
val_X_num = banana_X_val[cols_name_2[1:]].values

train_Y_1 = banana_Y_train[[x for x in banana_Y_train.columns if '成本價格' in x]].values
val_Y_1 = banana_Y_val[[x for x in banana_Y_val.columns if '成本價格' in x]].values

In [3]:
train_X_lstm_1 = []
for i in range(len(cols_name)):
    tmp = banana_X_train[cols_name[i]].iloc[:,-7:]
    train_X_lstm_1.append(tmp)
train_X_lstm_1 = np.stack(train_X_lstm_1,axis=2)
val_X_lstm_1 = []
for i in range(len(cols_name)):
    tmp = banana_X_val[cols_name[i]].iloc[:,-7:]
    val_X_lstm_1.append(tmp)
val_X_lstm_1 = np.stack(val_X_lstm_1,axis=2)

train_X_lstm_2 = []
for i in range(len(cols_name)):
    tmp_all = []
    tmp = banana_X_train[cols_name[i]].iloc[:,-13:]
    for j in range(7):
        tmp_1 = tmp.iloc[:,j:j+7]
        tmp_all.append(tmp_1)
    tmp_all = np.stack(tmp_all,axis=2)
    train_X_lstm_2.append(tmp_all)
train_X_lstm_2 = np.stack(train_X_lstm_2,axis=3)

val_X_lstm_2 = []
for i in range(len(cols_name)):
    tmp_all = []
    tmp = banana_X_val[cols_name[i]].iloc[:,-13:]
    for j in range(7):
        tmp_1 = tmp.iloc[:,j:j+7]
        tmp_all.append(tmp_1)
    tmp_all = np.stack(tmp_all,axis=2)
    val_X_lstm_2.append(tmp_all)
val_X_lstm_2 = np.stack(val_X_lstm_2,axis=3)

train_X_lstm_4 = np.stack([np.stack([image_convert(x) for x in banana_X_train[cols_name[y]].values]) for y in range(len(cols_name))], axis=3)
val_X_lstm_4 = np.stack([np.stack([image_convert(x) for x in banana_X_val[cols_name[y]].values]) for y in range(len(cols_name))], axis=3)

data_X = {'Conv1D':[train_X_lstm_1, val_X_lstm_1],
          'Vanilla':[train_X_lstm_1, val_X_lstm_1],
          'Stacked':[train_X_lstm_1, val_X_lstm_1],
          'Bidirectional':[train_X_lstm_1, val_X_lstm_1],
          'Conv1D_LSTM':[train_X_lstm_2, val_X_lstm_2],
          'Conv2D_1':[train_X_lstm_2, val_X_lstm_2],
          'Conv2D_2':[train_X_lstm_4, val_X_lstm_4]}

In [4]:
def model_predict(source, x1, model_, x2, data_all):
    data_train = model_.predict([x1, x2], verbose = 0)
    data_train[data_train>=0.5] = 1
    data_train[data_train<0.5] = 0
    data_train = pd.DataFrame(data_train)
    data_train.columns = ['成本價格-02-P', '成本價格-03-P', '成本價格-04-P', '成本價格-05-P', '成本價格-06-P']
    data_train = pd.concat([data_all.reset_index(drop = True), data_train], axis=1)
    data_train['source'] = source

    df = pd.concat([parsing_groupby(data_train, x) for x in ['成本價格-02','成本價格-03','成本價格-04','成本價格-05','成本價格-06']])
    df = pd.merge(df,
                  df.groupby(['col','before'])['num'].sum().reset_index(drop = False).rename(columns={'num':'num_total'}),
                  on = ['col','before'],
                  how = 'left')
    df['per'] = df['num']/df['num_total']
    df['source'] = source
    return data_train, df
def predict_all(list_route_, dataset, data_X_ = data_X):
    if dataset == "train":
        X = data_X_[list_route_.split("/")[1]][0]
        Y = train_X_num
        al = banana_Y_train
    else:
        X = data_X_[list_route_.split("/")[1]][1]
        Y = val_X_num
        al = banana_Y_val
        
    final_model_ = list_route_.replace("history.png", "final.h5")
    final_model = load_model(final_model_)
    data_all1, data_groupby1 = model_predict(source = final_model_, 
                                             x1 = X, x2 = Y, data_all = al,
                                             model_ = final_model)

    acc_model_ = list_route_.replace("history.png", "weights_accuracy.hdf5")
    acc_model = load_model(acc_model_)
    data_all2, data_groupby2 = model_predict(source = acc_model_, 
                                             x1 = X, x2 = Y, data_all = al,
                                             model_ = final_model)

    loss_model_ = list_route_.replace("history.png", "weights_loss.hdf5")
    loss_model = load_model(loss_model_)
    data_all3, data_groupby3 = model_predict(source = loss_model_, 
                                             x1 = X, x2 = Y, data_all = al,
                                             model_ = final_model)

    data_all = pd.concat([data_all1,data_all2,data_all3]).reset_index(drop = True)
    data_groupby = pd.concat([data_groupby1,data_groupby2,data_groupby3]).reset_index(drop = True)
    data_all['dataset'] = dataset
    data_groupby['dataset'] = dataset
    return data_all, data_groupby

In [5]:
def cal_all_acc(list_route_):
    data_all = []
    data_groupby = []
    data_error = []
    for i in range(len(list_route_)):
        print(i, len(list_route_))
        try:
            data_all_train, data_groupby_train = predict_all(list_route_[i], dataset = 'train')
            data_all_val, data_groupby_val = predict_all(list_route_[i], dataset = 'val')
            result = "OK"
        except:
            print("error", list_route_[i])
            data_error.append(list_route_[i])
            result = "fail"
        if result == "OK":
            data_all_ = pd.concat([data_all_train, data_all_val]).reset_index(drop = True)
            data_groupby_ = pd.concat([data_groupby_train, data_groupby_val]).reset_index(drop = True)
            data_all.append(data_all_)
            data_groupby.append(data_groupby_)
            del data_all_train, data_groupby_train, data_all_val, data_groupby_val, result, data_all_, data_groupby_
    data_all = pd.concat(data_all).reset_index(drop = True)
    data_groupby = pd.concat(data_groupby).reset_index(drop = True)
    return data_all, data_groupby, data_error

list_route = [os.path.join(path, name) for path, subdirs, files in os.walk('model') for name in files]
list_route = [x for x in list_route if 'history.png' in x]
list_route = [list_route[x:x+125] for x in range(0, len(list_route), 125)]
print(len(list_route))
#list_route = [x for x in list_route if 'history.png' in x][50:100]
#cal_all_acc(list_route)
pool = ThreadPool(len(list_route))
data = pool.map(cal_all_acc, list_route)

17
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 125
0 100
1 125
1 125
1 125
1 125
1 125
1 100
1 125
1 125
1 125
1 125
1 125
1 125
1 125
1 125
1 125
1 125
1 125
2 125
2 125
2 125
2 125
2 125
2 125
2 125
2 125
2 100
2 125
2 125
2 125
2 125
3 125
3 125
2 125
3 125
2 125
2 125
3 125
2 125
3 125
3 125
3 125
3 125
4 125
4 125
3 100
3 125
4 125
3 125
3 125
3 125
4 125
3 125
4 125
4 125
3 125
4 125
3 125
5 125
3 125
5 125
5 125
4 125
5 125
4 100
4 125
4 125
4 125
5 125
6 125
4 125
5 125
4 125
6 125
5 125
4 125
6 125
6 125
4 125
4 125
5 125
5 125
6 125
5 125
7 125
5 100
7 125
5 125
6 125
5 125
6 125
7 125
5 125
7 125
5 125
7 125
8 125
8 125
6 125
5 125
5 125
7 125
6 125
6 125
6 125
8 125
7 125
6 100
6 125
8 125
9 125
6 125
8 125
9 125
6 125
8 125
9 125
7 125
8 125
7 125
7 125
7 125
7 100
10 125
6 125
7 125
6 125
9 125
10 125
9 125
7 125
10 125
9 125
9 125
8 125
8 125
8 125
8 125
8 100
11 125
7 125
10 125
11 125
10 125
10 125
8 125
11 125
7 125
7 12

63 100
77 125
94 125
81 125
72 125
63 125
64 125
90 125
80 125
76 125
62 125
63 125
78 125
56 125
63 125
56 125
95 125
91 125
73 125
78 125
64 100
57 125
77 125
82 125
81 125
64 125
65 125
79 125
74 125
92 125
96 125
63 125
64 125
64 125
57 125
57 125
83 125
78 125
65 100
79 125
82 125
58 125
80 125
93 125
97 125
65 125
66 125
64 125
75 125
65 125
58 125
65 125
79 125
84 125
98 125
58 125
94 125
83 125
80 125
66 100
67 125
81 125
65 125
76 125
66 125
59 125
85 125
99 125
95 125
66 125
66 125
80 125
59 125
81 125
84 125
59 125
82 125
77 125
66 125
67 100
68 125
96 125
67 125
100 125
60 125
86 125
67 125
67 125
81 125
85 125
60 125
83 125
78 125
101 125
97 125
82 125
60 125
67 125
68 100
87 125
69 125
68 125
102 125
82 125
61 125
86 125
68 125
68 125
84 125
98 125
61 125
79 125
83 125
61 125
69 100
88 125
103 125
83 125
68 125
69 125
87 125
70 125
62 125
99 125
85 125
69 125
69 125
80 125
89 125
104 125
62 125
62 125
84 125
84 125
70 100
88 125
100 125
69 125
81 125
70 125
86 125
105 125

In [14]:
data_all = pd.concat([x[0] for x in data]).reset_index(drop = True)
data_groupby = pd.concat([x[1] for x in data]).reset_index(drop = True)
error_list = sum([x[2] for x in data], [])
data_all.to_csv("inference_all_banana_classification.csv", index = False)
data_groupby.to_csv("inference_groupby_banana_classification.csv", index = False)
data_error = cal_all_acc(error_list)
data_error[0].to_csv("inference_all_banana_classification_2.csv", index = False)
data_error[1].to_csv("inference_groupby_banana_classification_2.csv", index = False)
data_error[2]

In [4]:
data_groupby = pd.concat([pd.read_csv("inference_groupby_banana_classification.csv"),
                          pd.read_csv("inference_groupby_banana_classification_2.csv")]).reset_index(drop = True)

In [5]:
per = 0.965
a = data_groupby[data_groupby['before']==data_groupby['after']]
a = a.pivot(index=['source','dataset'],columns=['before','col'],values='per').reset_index(drop = False)
a.columns = [str(x[0])+str(x[1]) for x in a.columns.tolist()]
a_list = a[(a['dataset']=="val")&(a['0.0成本價格-02']>=per)&(a['1.0成本價格-02']>=per)&(a['0.0成本價格-03']>=per)&(a['1.0成本價格-03']>=per)&(a['0.0成本價格-04']>=per)&(a['1.0成本價格-04']>=per)&(a['0.0成本價格-05']>=per)&(a['1.0成本價格-05']>=per)&(a['0.0成本價格-06']>=per)&(a['1.0成本價格-06']>=per)]['source'].tolist()
b_list = a[(a['dataset']=="train")&(a['0.0成本價格-02']>=per)&(a['1.0成本價格-02']>=per)&(a['0.0成本價格-03']>=per)&(a['1.0成本價格-03']>=per)&(a['0.0成本價格-04']>=per)&(a['1.0成本價格-04']>=per)&(a['0.0成本價格-05']>=per)&(a['1.0成本價格-05']>=per)&(a['0.0成本價格-06']>=per)&(a['1.0成本價格-06']>=per)]['source'].tolist()
final_list = list(set(a_list)&set(b_list))

In [8]:
a[(a['source'].isin(final_list))&(a['source'].str.contains('weights_accuracy.hdf5'))].reset_index(drop = True)

Unnamed: 0,source,dataset,0.0成本價格-02,1.0成本價格-02,0.0成本價格-03,1.0成本價格-03,0.0成本價格-04,1.0成本價格-04,0.0成本價格-05,1.0成本價格-05,0.0成本價格-06,1.0成本價格-06
0,model/Conv1D_LSTM/0.0001-adam-relu-gelu/weight...,train,0.989502,1.0,0.989799,1.0,0.979004,1.0,0.979886,1.0,0.969397,1.0
1,model/Conv1D_LSTM/0.0001-adam-relu-gelu/weight...,val,0.984746,0.987805,0.98308,0.987654,0.986441,0.987805,0.979764,0.974684,0.967851,0.975309
2,model/Conv1D_LSTM/0.0001-sgd-leaky_relu-swish/...,train,0.967606,1.0,0.972097,1.0,0.971506,1.0,0.981987,1.0,0.978698,0.991471
3,model/Conv1D_LSTM/0.0001-sgd-leaky_relu-swish/...,val,0.971186,0.987805,0.976311,1.0,0.971186,0.987805,0.976391,0.987342,0.978003,0.987654
4,model/Conv1D_LSTM/0.0001-sgd-swish-relu/weight...,train,0.976905,1.0,0.975098,1.0,0.979904,1.0,0.975983,1.0,0.970597,1.0
5,model/Conv1D_LSTM/0.0001-sgd-swish-relu/weight...,val,0.974576,0.987805,0.966159,1.0,0.983051,1.0,0.983137,0.974684,0.971235,0.987654


In [19]:
set(['/'.join(x.split("/")[:-1]) for x in final_list])

{'model/Conv1D_LSTM/0.0001-adam-relu-gelu',
 'model/Conv1D_LSTM/0.0001-sgd-leaky_relu-swish',
 'model/Conv1D_LSTM/0.0001-sgd-swish-relu'}

In [None]:
list_route = [os.path.join(path, name) for path, subdirs, files in os.walk('model') for name in files]
list_route = [x for x in list_route if 'history.png' in x]
for i in range(len(list_route)):
    if len(os.listdir('/'.join(list_route[i].split("/")[:-1])))==7:
        print(i, len(list_route))
        _, data_groupby_val = predict_all(list_route[i], dataset = 'val')
        a = data_groupby_val[data_groupby_val['before']==data_groupby_val['after']].reset_index(drop = True)
        a['h5'] = [x.split("/")[-1] for x in a['source'].tolist()]
        a = a.pivot(index=['before','col'],columns='h5',values='per').reset_index(drop = False)
        a = a[(a['final.h5']==a['weights_accuracy.hdf5'])&(a['final.h5']==a['weights_loss.hdf5'])].reset_index(drop = True)
        if len(a)==10:
            print("remove", i)
            if os.path.isfile(list_route[i].replace("history.png", "weights_accuracy.hdf5")):
                os.remove(list_route[i].replace("history.png", "weights_accuracy.hdf5"))
            if os.path.isfile(list_route[i].replace("history.png", "weights_loss.hdf5")):
                os.remove(list_route[i].replace("history.png", "weights_loss.hdf5"))