In [1]:
print('Program start!')
# Import packages
import warnings
import datetime
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import keras
from keras import losses
from keras.models import Model, load_model
from keras.layers import Dense, LSTM, Input, Concatenate, Reshape
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
%matplotlib inline
# np.set_printoptions(suppress=True, formatter={'float': '{:.4f}'.format})

# Function define
def normdata(arr_x, arr_y = None):
    arr_x = np.array(arr_x).astype(float)
    arr_y = np.array(arr_y).astype(float)
    if arr_y.ndim:
        arr = np.append(arr_x, arr_y, axis = 1)
    else:
        arr = arr_x
    arr_min = arr.min(axis = 1)
    arr_max = arr.max(axis = 1)
    for i in range(arr.shape[0]):
        arr[i] = (arr[i] - arr_min[i])/(arr_max[i] - arr_min[i]+0.00001)
    arr[arr == 0] = 0.0001
    return arr[:, :arr_x.shape[1]], arr[:, arr_x.shape[1]:], arr_min, arr_max
def denormdata(arr, arr_min, arr_max):
    for i in range(arr.shape[0]):
        arr[i] = (arr[i]*(arr_max[i] - arr_min[i]+0.00001) + arr_min[i])
    return arr

Program start!


Using TensorFlow backend.


In [2]:
# Set parameters
start_train = '2017-11'
end_train = '2018-03'
start_pred = '2018-04'
end_pred = '2019-04'

label_dict = {1:['A'], 0.3:['A-R', 'A-2R', 'B', 'B-R']}
dl_val_num = 6
MCdrop_t = 10
save_model_path = './Models/DL_models_temp'
load_model_path = './Models/DL_models'
save_images_path = './Images'
output_file_path = './Output'

start_train_t = datetime.datetime.strptime(start_train, '%Y-%m')
end_train_t = datetime.datetime.strptime(end_train, '%Y-%m')
start_pred_t = datetime.datetime.strptime(start_pred, '%Y-%m')
end_pred_t = datetime.datetime.strptime(end_pred, '%Y-%m')
past = (start_pred_t.year - start_train_t.year)*12 + (start_pred_t.month - start_train_t.month)
future = (end_pred_t.year - start_pred_t.year)*12 + (end_pred_t.month - start_pred_t.month) + 1

In [3]:
# Prepare label data
proj = pd.read_excel("RDC_2016_2019.xlsx", index_col = 0)
proj_info = pd.read_excel("project_info.xlsx", index_col = 7, sheet_name = 0)
data_day = []
data_month = []
d = proj_info['create date'].min()
d = datetime.datetime(d.year, d.month, d.day)
while d <= max(proj_info['tapeout date'].max(), proj_info['create date'].max(), proj.columns.max()):
    data_day.append(d)
    if d.day == 1 or len(data_month) == 0:
        data_month.append(datetime.datetime(d.year, d.month, 1))
    d = d + datetime.timedelta(days = 1)
label_day_df = pd.DataFrame(columns = data_day)
proj_lable = pd.DataFrame(columns = data_month)
for row_name in proj.index[:-1]:
    label_day_df.loc[row_name] = [0] * label_day_df.shape[1]
    proj_lable.loc[row_name] = [0] * proj_lable.shape[1]
for idx in range(proj_info.shape[0]):
    if proj_info.index[idx] not in proj_lable.index:
        continue
    start = datetime.datetime(proj_info['create date'][idx].year, proj_info['create date'][idx].month, proj_info['create date'][idx].day)
    if pd.isnull(proj_info['tapeout date'][idx]):
        end = datetime.datetime(proj_info['tapeout date'].max().year, proj_info['tapeout date'].max().month, proj_info['tapeout date'].max().day)
    else:
        end = datetime.datetime(proj_info['tapeout date'][idx].year, proj_info['tapeout date'][idx].month, proj_info['tapeout date'][idx].day)

    label = 0.1
    for l in label_dict.keys():
        if proj_info['version'][idx] in label_dict[l]:
            label = l
    label_day_df.loc[proj_info.index[idx], start:end] += label
    
label_day_df = label_day_df.astype(float)
for idx in range(label_day_df.shape[0]):
    proj.iloc[idx,:] = proj.iloc[idx,:].astype(float).resample('MS').mean()
    proj_lable.iloc[idx, :] = label_day_df.iloc[idx, :].astype(float).resample('MS').mean()
proj_lable = pd.concat([proj_lable, pd.DataFrame(proj_lable.sum(axis = 0), columns = ['Total']).T])
dept_num = proj.shape[0]

In [4]:
proj

Unnamed: 0_level_0,2018-01-01,2018-02-01,2018-03-01,2018-04-01,2018-05-01,2018-06-01,2018-07-01,2018-08-01,2018-09-01,2018-10-01,...,2020-03-01,2020-04-01,2020-05-01,2020-06-01,2020-07-01,2020-08-01,2020-09-01,2020-10-01,2020-11-01,2020-12-01
Dept.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GeneralIP,117.1,120.6,125.7,142.1,150.5,149.9,167.0,162.4,168.2,183.7,...,167.16,167.17,167.18,167.19,167.2,167.21,167.22,167.23,167.24,167.25
10/100 NIC,1.4,0.9,1.0,1.8,1.5,2.5,1.7,2.1,1.8,0.2,...,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1
GbE NIC,13.8,14.2,8.0,2.3,2.3,3.5,1.6,1.6,1.1,0.6,...,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4
High Speed Bridge,0.0,0.0,0.4,0.7,1.4,2.2,4.8,6.6,10.1,4.1,...,3.14,3.15,3.16,3.17,3.18,3.19,3.2,3.21,3.22,3.23
2.5GbE NIC,0.0,0.0,2.6,4.0,2.4,2.9,3.3,6.0,2.9,4.6,...,4.14,4.15,4.16,4.17,4.18,4.19,4.2,4.21,4.22,4.23
AM Ethernet,14.2,13.0,12.3,11.4,12.0,14.2,15.9,18.1,17.9,20.7,...,19.17,19.18,19.19,19.2,19.21,19.22,19.23,19.24,19.25,19.26
10GbE NIC,0.0,0.0,0.3,0.3,0.0,1.7,1.7,0.6,0.1,0.3,...,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19
11n NIC,8.7,8.3,7.4,7.8,7.5,6.1,4.1,3.4,2.3,3.2,...,1.1,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19
11ac NIC,48.3,46.5,33.6,39.9,27.4,24.5,35.0,33.7,31.0,21.8,...,27.8,27.9,27.1,27.11,27.12,27.13,27.14,27.15,27.16,27.17
11ax NIC,0.0,0.0,10.7,11.6,8.6,7.8,2.9,2.5,4.8,18.0,...,36.11,36.12,36.13,36.14,36.15,36.16,36.17,36.18,36.19,36.2


In [5]:
# Split data into training, validation, testing data
for dept_idx in range(proj.shape[0]):
    proj.iloc[dept_idx,:] = proj.iloc[dept_idx,:].astype(float).resample('MS').mean()
    proj_lable.iloc[dept_idx,:] = proj_lable.iloc[dept_idx,:].resample('MS').mean()
dept_num = proj.shape[0]
proj_train = proj.loc[:,:end_train_t]
proj_lable_train = proj_lable.loc[:,:end_train_t]
# proj_train = proj
# proj_lable_train = proj_lable

x_train ,y_train = [], []
x_train_l ,y_train_l = [], []
for dept_idx in range(proj.shape[0]):
    for data_idx in range(proj_train.shape[1]-past-future+1):
        x_train.append(proj_train.iloc[dept_idx, data_idx:data_idx+past])
        y_train.append(proj_train.iloc[dept_idx, data_idx+past:data_idx+past+future])
        x_train_l.append(proj_lable_train.iloc[dept_idx, data_idx:data_idx+past])
        y_train_l.append(proj_lable_train.iloc[dept_idx, data_idx+past:data_idx+past+future])
x_train = np.array(x_train).reshape(proj.shape[0], -1, past)
y_train = np.array(y_train).reshape(proj.shape[0], -1, future)
x_train_l = np.array(x_train_l).reshape(proj.shape[0], -1, past)
y_train_l = np.array(y_train_l).reshape(proj.shape[0], -1, future)
print(x_train.shape)
print(y_train.shape)

x_val ,y_val = [], []
x_val_l ,y_val_l = [], []
if (dl_val_num < x_train.shape[1] and dl_val_num !=  0):
    for dept_idx in range(proj.shape[0]):
        for data_idx in range(dl_val_num):
            x_val.append(proj_train.iloc[dept_idx, data_idx+proj_train.shape[1]-past-future-dl_val_num+1:data_idx+proj_train.shape[1]-future-dl_val_num+1])
            y_val.append(proj_train.iloc[dept_idx, data_idx+proj_train.shape[1]-future-dl_val_num+1:data_idx+proj_train.shape[1]-dl_val_num+1])
            x_val_l.append(proj_lable_train.iloc[dept_idx, data_idx+proj_train.shape[1]-past-future-dl_val_num+1:data_idx+proj_train.shape[1]-future-dl_val_num+1])
            y_val_l.append(proj_lable_train.iloc[dept_idx, data_idx+proj_train.shape[1]-future-dl_val_num+1:data_idx+proj_train.shape[1]-dl_val_num+1])
    x_val = np.array(x_val).reshape(proj.shape[0], dl_val_num, past)
    y_val = np.array(y_val).reshape(proj.shape[0], dl_val_num, future)
    x_val_l = np.array(x_val_l).reshape(proj.shape[0], dl_val_num, past)
    y_val_l = np.array(y_val_l).reshape(proj.shape[0], dl_val_num, future)
else:
    x_val ,y_val = x_train, y_train
    x_val_l ,y_val_l = x_train_l, y_train_l
    dl_val_num = x_train.shape[1]
# print(x_val.shape)
# print(y_val.shape)

x_test ,y_test = [], []
x_test_l ,y_test_l = [], []
for dept_idx in range(proj.shape[0]):
    x_test.append(proj.iloc[dept_idx][start_train_t:end_train_t])
    x_test_l.append(proj_lable.iloc[dept_idx][start_train_t:end_train_t])
    y_test.append(proj.iloc[dept_idx][start_pred_t:end_pred_t]) 
    y_test_l.append(proj_lable.iloc[dept_idx][start_pred_t:end_pred_t])
x_test = np.array(x_test)
y_test = np.array(y_test)
x_test_l = np.array(x_test_l)
y_test_l = np.array(y_test_l)
print(x_test.shape)
print(y_test.shape)

# Normalize data to (0, 1)
x, y, _, _ = normdata(x_train.reshape(-1, past, 1), y_train.reshape(-1, future, 1))
x_l, y_l, _, _ = normdata(x_train_l.reshape(-1, past, 1), y_train_l.reshape(-1, future, 1))
x_val, _, val_min, val_max  =  normdata(x_val.reshape(-1, past, 1), y_val.reshape(-1, future, 1))
x_val_l, y_val_l, val_min_l, val_max_l  =  normdata(x_val_l.reshape(-1, past, 1), y_val_l.reshape(-1, future, 1))
x_test, _, test_min , test_max  =  normdata(x_test.reshape(-1, past, 1), y_test.reshape(-1, future, 1))
x_test_l, y_test_l, test_min_l , test_max_l  =  normdata(x_test_l.reshape(-1, past, 1), y_test_l.reshape(-1, future, 1))

(43, 0, 5)
(43, 0, 17)
(43, 5)
(43, 17)


ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [None]:
# Build & Train the model
callback = EarlyStopping(monitor = 'loss', patience = 10, verbose = 1, mode = 'auto')
dl_models = []
dl_models_temp = []

keras.backend.clear_session()
# with label in the past & the future
for i in range(3):
    print('----------\nwith label in the past & the future\n{}/{}'.format(i+1, 10))
    input1 = Input(shape = (past, 1))
    input2 = Input(shape = (past, 1))
    input3 = Input(shape = (future, 1))
    x1 = Concatenate(axis = -1)([input1, input2])
    x1, state_h, state_c = LSTM(units = 64, activation = 'relu', return_sequences = True, return_state = True, dropout = 0.1)(x1, training = True)
    x2 = Dense(future)(state_h)
    x3 = Reshape((future,))(input3)
    concated = Concatenate()([x2, x3])
    x4 = Dense(32, activation = 'relu')(concated)
    out = Dense(future)(x4)
    model = Model(inputs = [input1, input2, input3], outputs = out)
    model.compile(loss = 'mae', optimizer = Adam())
    model.summary()
    model.fit([x, x_l, y_l], y[:,:,0], epochs = 5000, batch_size = 32, callbacks = [callback], verbose = 1)
    dl_models_temp.append(model)
dl_models.append(dl_models_temp)

# with label in the past
dl_models_temp = []
for i in range(3):
    print('----------\nwith label in the past\n{}/{}'.format(i+1, 10))
    input1 = Input(shape = (past, 1))
    input2 = Input(shape = (past, 1))
    input3 = Input(shape = (future, 1))
    x1 = Concatenate(axis = -1)([input1, input2])
    x2, state_h, state_c = LSTM(units = 64, activation = 'relu', return_sequences = True, return_state = True, dropout = 0.1)(x1, training = True)
    x3 = Dense(64, activation = 'relu')(state_h)
    x3 = Dense(32, activation = 'relu')(x3)
    out = Dense(units = future)(x3)
    model = Model(inputs = [input1, input2, input3], outputs = out)
    model.compile(loss = 'mae', optimizer = Adam())
    model.summary()
    model.fit([x, x_l, y_l], y[:,:,0], epochs = 5000, batch_size = 32, callbacks = [callback], verbose = 1)
    dl_models_temp.append(model)
dl_models.append(dl_models_temp)

# w/o label
dl_models_temp = []
for i in range(3):
    print('----------\nwo label\n{}/{}'.format(i+1, 10))
    input1 = Input(shape = (past, 1))
    input2 = Input(shape = (past, 1))
    input3 = Input(shape = (future, 1))
    x1 = input1
    x2, state_h, state_c = LSTM(units = 64, activation = 'relu', return_sequences = True, return_state = True, dropout = 0.1)(x1, training = True)
    x3 = Dense(64, activation = 'relu')(state_h)
    x3 = Dense(32, activation = 'relu')(x3)
    out = Dense(units = future)(x3)
    model = Model(inputs = [input1, input2, input3], outputs = out)
    model.compile(loss = 'mae', optimizer = Adam())
    model.summary()
    model.fit([x, x_l, y_l], y[:,:,0], epochs = 5000, batch_size = 32, callbacks = [callback], verbose = 1)
    dl_models_temp.append(model)
dl_models.append(dl_models_temp)

In [None]:
# Get the prediction
dl_pred_mc  =  []
dl_pred  =  []
dl_mspe  =  []
dl_pred_uc  =  []
dl_pred_uc1  =  []
dl_pred_uc2  =  []
dl_val_pred  =  []
dl_val_mspe  =  []

for model_list in dl_models:
    val_pred_temp  =  []
    for model in model_list:
        print('.', end  =  '')
        val_pred_temp.append(denormdata((model.predict([x_val, x_val_l, y_val_l])), val_min ,val_max).reshape(dept_num, -1, future))
    val_pred_temp  =  np.mean(val_pred_temp, axis  =  0)
    dl_val_pred.append(val_pred_temp)
    dl_pred_uc2.append(np.sqrt((((val_pred_temp - y_val)**2).mean(axis  =  1)).astype(float)))
    dl_val_mspe.append(np.sqrt(((((val_pred_temp - y_val)**2).mean(axis  =  2))/((y_val**2).mean(axis  =  2)+0.00001)).mean(axis  =  1).astype(float)))
    pred_temp  =  []
    for model in model_list:
        for _ in range(MCdrop_t):
            print('.', end  =  '')
            pred_temp.append(denormdata(model.predict([x_test, x_test_l, y_test_l]), test_min, test_max))
    dl_pred_mc.append(pred_temp)
    dl_pred.append(np.mean(pred_temp, axis  =  0))
    dl_pred_uc1.append(np.std(pred_temp, axis = 0))
    dl_mspe.append(np.sqrt(((((y_test-dl_pred[-1])**2).mean(axis  =  1))/((y_test**2).mean(axis  =  1)+0.00001)).astype(float)))
#     dl_mspe.append(np.sqrt((((y_test-dl_pred[-1])**2).mean(axis  =  1))))
    print()
dl_pred_mc  =  np.array(dl_pred_mc)
dl_pred  =  np.array(dl_pred)
dl_val_pred  =  np.array(dl_val_pred)
dl_pred_uc1  =  np.array(dl_pred_uc1)
dl_pred_uc2  =  np.array(dl_pred_uc2)
dl_pred_uc  =  np.sqrt((dl_pred_uc1)**2 + (dl_pred_uc2)**2)
dl_mspe  =  np.array(dl_mspe)
dl_val_mspe  =  np.array(dl_val_mspe)
w  =  dl_pred_uc**(-5)
w[w>100] = 100
if len(dl_models)>8:
    for dept_idx in range(dept_num):
        for t in range(future):
            w[dl_pred[:,dept_idx,t] !=  dl_pred[:,dept_idx,t].clip(np.percentile(dl_pred[:,dept_idx,t], q  =  25), np.percentile(dl_pred[:,dept_idx,t], q  =  75)),dept_idx,t]  =  0
dl_pred_avg  =  np.average(dl_pred, axis  =  0, weights  =  w)
dl_loss_avg  =  (np.sqrt(((((y_test-dl_pred_avg)**2).mean(axis  =  1))/(((y_test)**2).mean(axis  =  1)+0.00001)).astype(float)))
print('prediction complete ')



writer = pd.ExcelWriter('{}/{}to{}output_0829.xlsx'.format(output_file_path, past, future), datetime_format = 'yyyy/mm')
model_name = ['label_in_p&f', 'label_in_p', 'wo_label']
for idx in range(len(dl_models)):
    pred_df = pd.DataFrame(dl_pred[idx], index = proj.index, columns = proj.loc[:,start_pred_t:end_pred_t].columns)
    pred_df.to_excel(writer, index = True, index_label = '', encoding = 'utf-8', sheet_name = '{}_Prediction'.format(model_name[idx]),float_format = "%.4f")
loss_df = pd.DataFrame(dl_mspe.T, index = proj.index, columns = model_name)
loss_df = loss_df.append(pd.DataFrame([loss_df.min().values, loss_df.mean().values, loss_df.max().values], index = ['min', 'mean', 'max'], columns = loss_df.columns))
loss_df = 20*np.log10(loss_df)
loss_df = loss_df.append(pd.DataFrame([(np.absolute(dl_pred[:, :-1].sum(axis = 1) - dl_pred[:, -1])).sum(axis = 1)], index = ['total_dalta'], columns = loss_df.columns))
loss_df.to_excel(writer, index = True, index_label = '', encoding = 'utf-8', sheet_name = 'Evaulation',float_format = "%.4f")
writer.save()
# pred_df
loss_df

In [None]:
pred_df

In [None]:
pd.DataFrame(np.sum(dl_pred[:,:-1], axis = 1))

In [None]:
# Draw the images
for dept_idx in range(dept_num):
    plt.figure(figsize = (14, 6))
    ax = proj.iloc[dept_idx][:end_train_t].plot(label = 'observed', marker = 'o', legend = False)
    proj.iloc[dept_idx][start_pred_t:end_pred_t].plot(ax = ax, label = 'observed', marker = 'o', color = 'b', legend = False, alpha = 1)
    
    for dl_model_idx in range(len(dl_models)):
        dl_pred_df = pd.DataFrame(dl_pred[dl_model_idx][dept_idx], 
                                  index = proj.iloc[dept_idx][start_pred_t:end_pred_t].index, 
                                  columns = ['{:25s}{:.4f}'.format(model_name[dl_model_idx], 20*np.log10(dl_mspe[dl_model_idx,dept_idx]))])
        dl_pred_df.plot(ax = ax, alpha = 1, marker = 'o')
        
#         val_df = pd.DataFrame()
#         for data_idx in range(dl_val_pred.shape[2]):
#              val_df = pd.concat([val_df, pd.DataFrame(dl_val_pred[dl_model_idx, dept_idx, data_idx], 
#                                                       index = proj_train.iloc[:, data_idx+proj_train.shape[1]-future-dl_val_num+1:data_idx+proj_train.shape[1]-dl_val_num+1].columns).T])
#         val_df = pd.DataFrame(val_df.mean(axis = 0), columns = dl_pred_df.columns)
#         val_df.plot(ax = ax, alpha = 1, color = ax.get_lines()[-1].get_color(), legend = False)
#         for data_idx in range(dl_val_pred.shape[2]):
#             df_t = pd.DataFrame(dl_val_pred[dl_model_idx, dept_idx, data_idx], 
#                                 index = proj_train.iloc[:, data_idx+proj_train.shape[1]-future-dl_val_num+1:data_idx+proj_train.shape[1]-dl_val_num+1].columns)
#             df_t.plot(ax = ax, alpha = .2, color = ax.get_lines()[-1].get_color(), legend = False)

#         dl_pred_ci = pd.DataFrame({'down':dl_pred_mc.min(axis = 1)[dl_model_idx,dept_idx],
#                                    'up':dl_pred_mc.max(axis = 1)[dl_model_idx,dept_idx]}, 
#                                     index = proj.iloc[dept_idx][start_pred_t:end_pred_t].index)
#         for i in range(dl_pred_mc.shape[1]):
#             pd.DataFrame(dl_pred_mc[dl_model_idx,i,dept_idx], 
#                          index = proj.iloc[dept_idx][start_pred_t:end_pred_t].index).plot(ax = ax, c = ax.get_lines()[-1].get_color(), alpha = .15, legend = False)
#         ax.fill_between(dl_pred_ci.index, dl_pred_ci.iloc[:, 0], dl_pred_ci.iloc[:, 1], alpha = .1, color = ax.get_lines()[-1].get_color())  

#     ax.set_ylim(((proj.iloc[dept_idx].min()//20)*20), ((proj.iloc[dept_idx].max()//20+1)*20))
    ax.set_title('{}. {}'.format((dept_idx+1), proj.index[dept_idx]))
    ax2 = ax.twinx()
    proj_lable.iloc[dept_idx][:end_train_t].plot(ax = ax2, label = 'project label', color = 'k', marker = '+', alpha = .7)
    proj_lable.iloc[dept_idx][start_pred_t:end_pred_t].plot(ax = ax2, label = 'w', color = 'k', marker = '+', alpha = .7)
    ax2.set_ylim(0, ((proj_lable.iloc[dept_idx].max()//20+1)*20))    
    plt.savefig('{}/{}to{}_{}.png'.format(save_images_path, past, future,(dept_idx+1)))
    plt.show()
print('End program!')