In [None]:
import pandas as pd
import numpy as np
import glob
from pyts.image import RecurrencePlot, GramianAngularField, MarkovTransitionField
from sklearn.metrics import accuracy_score
import gc
from keras.backend.tensorflow_backend import set_session
import os
import tensorflow as tf
from resnet import *

In [None]:
# set up GPU environment 

if 'session' in locals() and session is not None:
    print('Close interactive session')
    session.close()
_SESSION = None
if _SESSION is None:
    if not os.environ.get('OMP_NUM_THREADS'):
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth=True
    else:
        num_thread = int(os.environ.get('OMP_NUM_THREADS'))
        config = tf.ConfigProto(intra_op_parallelism_threads=num_thread,
                                allow_soft_placement=True)
        config.gpu_options.allow_growth=True
    _SESSION = tf.Session(config=config)
session = _SESSION
set_session(session)

In [None]:
# List files

files = glob.glob('data.*')
files_train = files[:-2]
files_test = files[1:-1]

In [None]:
# First training

columns = ['time', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8', 'b9',
       'b10', 'b11', 'b12', 'b13', 'b14', 'b15', 'b16', 'b17', 'b18', 'b19',
       'b20', 'b21', 'b22', 'b23', 'b24', 'b25', 'b26', 'b27', 'b28', 'b29',
       'b30', 'b31', 'b32', 'b33', 'b34', 'b35', 'b36', 'b37', 'b38', 'b39',
       'b40', 'b41', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9',
       's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19',
       's20', 's21', 's22', 's23', 's24', 's25', 's26', 's27', 's28', 's29',
       's30', 's31', 's32', 's33', 's34', 's35', 's36', 's37', 's38', 's39',
       's40', 's41', 's42', 's43', 's44', 's45', 's46', 's47', 's48', 's49',
       's50', 'y_1m', 'y_5m', 'y_10m', 'y_15m', 'y_30m', 'y_hat1', 'y_hat2',
       'y_hat3']
#for i in range(len(files_train)):
i=0
#File loading
if i == 0:
    df = pd.read_csv(files_train[i])
    df.columns = columns
else:
    df = pd.read_csv(files_train[i],names=columns)
test = pd.read_csv(files_test[i],header=None,names=columns)

#Preprocessing
df = df.set_index(pd.DatetimeIndex(pd.to_datetime(df[df.columns[0]])))#[-1000:]
target = df.y_10m
df = df.drop(['s19','s24','time',"y_1m","y_5m","y_10m","y_15m","y_30m","y_hat1","y_hat2","y_hat3"],axis=1)
df = df.fillna(0)

test = test.set_index(pd.DatetimeIndex(pd.to_datetime(test[test.columns[0]]))) #[:15000]
target_test = test.y_10m
test = test.drop(['s19','s24','time',"y_1m","y_5m","y_10m","y_15m","y_30m","y_hat1","y_hat2","y_hat3"],axis=1)
test = test.fillna(0)
test2 = np.array(test)

# transformer = RecurrencePlot()
# df_temp1 = transformer.fit_transform(df)
# transformer2 = GramianAngularField()
# df_temp2 = transformer2.fit_transform(df)
# transformer3 = MarkovTransitionField()
# df_temp3 = transformer3.fit_transform(df)
# df2 = (df_temp1+df_temp2+df_temp3)/3

X_train = np.array(df).reshape(df.shape + (1,1,))
y_train = (np.array(target).reshape(len(target),1) - np.mean(target))/np.std(target)

# df_temp1 = transformer.transform(test2)
# transformer2 = GramianAngularField()
# df_temp2 = transformer2.transform(test2)
# transformer3 = MarkovTransitionField()
# df_temp3 = transformer3.transform(test2)
# test2 = (df_temp1+df_temp2+df_temp3)/3

X_test = np.array(test2).reshape(test2.shape + (1,1,))
y_test = (np.array(target_test).reshape(len(target_test),1) - np.mean(target))/np.std(target)

x , y = build_resnet(X_train.shape[1:], 64, 1)
model = keras.models.Model(inputs=x, outputs=y)
print(model.summary())
optimizer = keras.optimizers.Adam()


if i == 0:
    model.compile(loss='mse',
                  optimizer=optimizer,
                  metrics=['mse'])
    reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,
                  patience=2, min_lr=0.0001) 
    hist = model.fit(X_train, y_train, batch_size=512,epochs=20,
        #sample_weight=target_train**2+1e-16,
        #validation_data=(X_test, y_test),  #,target**2+1e-16
        verbose=1, callbacks = [reduce_lr])


In [None]:
# Save Resnet Weights
model.save('resnet.h5')

In [None]:
# Check predictions quantiles if stable enough
y_pred = model.predict(X_test,verbose=1)
lin = np.linspace(0,1,5)
print(pd.DataFrame(y_pred).quantile(lin))
print("cor: " + str(np.corrcoef(target_test,y_pred)[0,1]))
pd.DataFrame({'index':test.index,'y_pred':y_pred,'target_test':target_test}).to_csv("resnet" + str(i) + ".csv")

In [None]:
# Unstandardized regression
y_pred2 = (np.concatenate(y_pred)*np.std(target))+np.mean(target)
lin = np.linspace(0,1,5)
print(pd.DataFrame(y_pred2).quantile(lin))
print("cor: " + str(np.corrcoef(target_test,y_pred2)[0,1]))
pd.DataFrame({'index':test.index,'y_pred':y_pred2,'target_test':target_test}).to_csv("resnet" + str(i) + ".csv")

In [None]:
# Training / Testing batch
for i in range(1,len(files_train)):
    test = pd.read_csv(files_test[i],header=None,names=columns)
    test = test.set_index(pd.DatetimeIndex(pd.to_datetime(test[test.columns[0]]))) #[:15000]
    target_test = test.y_10m
    test = test.drop(['s19','s24','time',"y_1m","y_5m","y_10m","y_15m","y_30m","y_hat1","y_hat2","y_hat3"],axis=1)
    test = test.fillna(0)
    test2 = np.array(test)
    X_test = np.array(test2).reshape(test2.shape + (1,1,))
    y_test = (np.array(target_test).reshape(len(target_test),1) - np.mean(target))/np.std(target)
    y_pred = model.predict(X_test,verbose=1)
    y_pred = (np.concatenate(y_pred)*np.std(target))+np.mean(target)
    lin = np.linspace(0,1,5)
    print(pd.DataFrame(y_pred).quantile(lin))
    print("cor: " + str(np.corrcoef(target_test,y_pred)[0,1]))
    pd.DataFrame({'index':test.index,'y_pred':y_pred,'target_test':target_test}).to_csv("resnet" + str(i) + ".csv")

In [None]:
# Concatenating all predictions
files = glob.glob('resnet*.csv')
for i,file in enumerate(files):
    df = pd.read_csv(file)
    df = df.set_index(pd.DatetimeIndex(pd.to_datetime(df[df.columns[0]]))) 
    df =df.y_pred
    if i == 0:
        all_df = df
    else:
        all_df = pd.concat((all_df,df))
all_df.to_csv('all_resnet.csv')

In [None]:
# Averaging all predictions
files = ['all_rocket.csv','all_attlstm.csv','all_resnet.csv']
df = pd.read_csv(files[0])
df = df.set_index(pd.DatetimeIndex(pd.to_datetime(df[df.columns[0]]))) 
df2 = pd.read_csv(files[1])
df2 = df2.set_index(pd.DatetimeIndex(pd.to_datetime(df2[df2.columns[0]]))) 
df3 = pd.read_csv(files[2])
df3 = df3.set_index(pd.DatetimeIndex(pd.to_datetime(df3[df3.columns[0]])))
all_df = (df.IF + df2.IF + df3.IF) /3
all_df.to_csv('all_att_rocket_resnet.csv')