# Imports

In [None]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D, Conv3D, MaxPooling2D, MaxPooling3D, Dense, BatchNormalization, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.initializers import Constant
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import layers, Model
from sklearn.model_selection import train_test_split
from sklearn import utils
import numpy as np 
import h5py
import os
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras

In [None]:
from keras.regularizers import l2
from keras.utils.vis_utils import plot_model

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
!nvidia-smi

Mon Oct 31 12:59:49 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   61C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Dataset and Metrics

In [None]:
def average_results(avg):
    avg_mape = [0, 0, 0, 0]
    avg_rmse = [0, 0, 0, 0]
    for i in range(len(avg)):
        # print(avg)
        for j in range(4):
            avg_mape[j] += avg[i][j][0]
            avg_rmse[j] += avg[i][j][2]
    avg_mape = [x/len(avg) for x in avg_mape]
    avg_rmse = [x/len(avg) for x in avg_rmse]
    print(avg_mape)
    print(avg_rmse)
    return avg_mape, avg_rmse

In [None]:
def percentage_error(predictions, testing, median=False):
    def calc_error(predicted, actual):
        pe = abs((predicted - actual) / actual)
        return pe
    pe_scales = [i for i in range(0, 105, 5)]
    # shows distruibution of errors
    idk = [0] * 20
    total = 0
    median_pe = []
    for i in range(len(predictions)):
        pe = calc_error(float(predictions[i][0]), testing[i])
        total += pe
        for i in range(len(pe_scales) - 1):
            if pe_scales[i] <= pe * 100 and pe * 100 <= pe_scales[i+1]:
                idk[i] += 1
        median_pe.append(pe*100)
    ans = total / len(predictions)
    if median:
        median_pe.sort()
        return ans, idk, median_pe[len(median_pe)//2]
    return ans, idk



In [None]:

def evaluate(models, X_test, y_test):
    # individual
    all_predictions = [None, None, None]

    result = []


    for i in range(3): 
        model_info = []
        predictions = models[i].predict(X_test).tolist()
        all_predictions[i] = predictions

        mape, distrib = percentage_error(predictions, y_test)
        temp = models[i].evaluate(X_test, y_test, verbose=0)
        # print(temp)
        loss, rmse = models[i].evaluate(X_test, y_test, verbose=2)
        # print(loss, rmse)
        model_info.append(mape)
        model_info.append(distrib)
        model_info.append(rmse)
        result.append(model_info)
    # ensemble
    average_predictions = []
    for i in range(len(y_test)):
        temp1 = all_predictions[0][i][0]
        temp2 = all_predictions[1][i][0]
        temp3 = all_predictions[2][i][0]
        
        average_predictions.append([str((temp1 + temp2 + temp3) / 3)])
    model_info = []
    mape, distrib = percentage_error(average_predictions, y_test)
    rmse = get_rmse(average_predictions, y_test)
    model_info.append(mape)
    model_info.append(distrib)
    model_info.append(rmse)
    result.append(model_info)

    return result

In [None]:
def one_eval(model, X_test, y_test):
    predictions = model.predict(X_test).tolist()
    mape, distrib = percentage_error(predictions, y_test)
    loss, rmse = model.evaluate(X_test, y_test, verbose=2)
    print(mape, distrib, rmse)


In [None]:
def avg_pe(model, dir):
    region_dict = dict()
    count = 0
    for file in os.listdir(dir):
        splitted = file.split('_')
        output, name, year = float(splitted[0]), '_'.join(splitted[1:len(splitted)-1]), int(splitted[-1])
        if year in [2016, 2017, 2018]:
            pkl_file = open(dir + file, 'rb')
            hist_3d = pickle.load(pkl_file)
            hist_3d = hist_3d[:week_idx,:,:]
                        
            hist_3d = np.array([hist_3d])
        
            a = model.predict(hist_3d)

            if name not in region_dict:
                region_dict[name] = [None, None, None, -1]
            region_dict[name][year-2016] = abs((model.predict([np.array(hist_3d)]).tolist()[0][0] - output) / output)
            if None not in region_dict[name] and region_dict[name][-1] == -1:
                region_dict[name][-1] = (region_dict[name][0] + region_dict[name][1] + region_dict[name][2]) / 3

    return region_dict

In [None]:
def normalize_hist(hist):
    if float(hist.sum()) == 0:
        return hist
    hist = hist / float(hist.sum())
    return hist    

In [None]:
def get_rmse(prediction, actual):
    count = 0
    summ = 0
    for i in range(len(prediction)):
        # print(prediction[i], actual[i])
        summ += (float(prediction[i][0]) - float(actual[i])) ** 2
        count += 1
    summ /= count
    return summ ** 0.5

In [None]:
def create_dataset(week_idx, main_dir, test_size, train_years, normalize=True):

    # test dates = ['2015', '2016', '2017']
    # remove from regular training data. Will evaluate later on. For example train 2008-2015, save 2016-2018 afterwards

    # 32 x 9 x 26
    

    dir_len = os.scandir(main_dir)
    # 23 19 15
    # week idx changes weeks used. Like from week 1 (April) to week X
    # week_idx = 26
    X, y = [], []
    X_2016, y_2016 = [], []
    X_2017, y_2017 = [], []
    X_2018, y_2018 = [], []

    l = 0
    count = 0
    for file in os.listdir(main_dir):
        output = float(file.split('_')[0])
        pkl_file = open(main_dir + file, 'rb')
        hist_3d = pickle.load(pkl_file)
        # change a in :a for different month testing
        hist_3d = hist_3d[:week_idx,:,:]
        # print(hist_3d.shape)
        if normalize:
            for i in range(week_idx):
                for j in range(9):
                    temp = hist_3d[i,:,j]
                    normal = normalize_hist(temp)
                    hist_3d[i,:,j] = normal

        # hist_3d = np.swapaxes(hist_3d,0,2)

        # if '2016' in file or '2017' in file or '2018' in file:
        #     X_2016.append(hist_3d)
        #     y_2016.append(output)
        # elif '2017' in file:
        #     X_2017.append(hist_3d)
        #     y_2017.append(output)
        # elif '2018' in file:
        #     X_2018.append(hist_3d)
        #     y_2018.append(output)
        # if '2018' in file or '2017' in file or '2016' in file or '2015' in file or '2014' in file  or '2013' in file:
        #     l += 1
        #     X.append(hist_3d)
        #     y.append(output)

        if '2016' in file:
            X_2018.append(hist_3d)
            y_2018.append(output)
        elif '2017' in file: 
            X_2017.append(hist_3d)
            y_2017.append(output)
        # elif '2016' in file: 
        #     X_2016.append(hist_3d)
        #     y_2016.append(output)

        # if '2016' in file:
        #     X_2016.append(hist_3d)
        #     y_2016.append(output)
        # elif '2017' in file:
        #     X_2017.append(hist_3d)
        #     y_2017.append(output)
        # elif '2018' in file:
        #     X_2018.append(hist_3d)
        #     y_2018.append(output)
        # else:
        #     X.append(hist_3d)
        #     y.append(output)


        # lets just use one year of data
        # if '2015' in file or '2014' in file or '2013' in file: 
        else:
            for year in train_years:
                if year in file:
                    X.append(hist_3d)
                    y.append(output)
        count += 1 
        # if len(X) > 175:
        #     break

    X = np.array(X)
    y = np.array(y)
    
    X, y = utils.shuffle(X, y)
    X_2016, y_2016, X_2017, y_2017, X_2018, y_2018 = np.array(X_2016), np.array(y_2016), np.array(X_2017), np.array(y_2017), np.array(X_2018), np.array(y_2018)
    if test_size == 0:
        # We have no "testing" data
        return X, None, y, None, X_2016, y_2016, X_2017, y_2017, X_2018, y_2018
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    return X_train, X_test, y_train, y_test, X_2016, y_2016, X_2017, y_2017, X_2018, y_2018

# # Regular US counties
# us_dir = "/content/drive/MyDrive/earth_hist_pkl2/"
# # China cities
# china_dir = "/content/drive/MyDrive/china_pkl2/"
# X_train, X_test, y_train, y_test = create_dataset(26, us_dir)

# 3d CNN

In [None]:
# Standard 3D CNN

def cnn_3d(week_idx, save_dir, X_train, y_train, val_size=0.18):
    # 9 32 26
    sample_shape = (week_idx, 32, 9, 1)
    model = Sequential()
    model.add(Conv3D(64, kernel_size=(3, 3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=sample_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(2, 2, 1), padding='Same'))
    model.add(Conv3D(128, kernel_size=(3, 3, 3), activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(2, 2, 1), padding='Same'))
    model.add(Conv3D(128, kernel_size=(3, 3, 3), activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(2, 2, 1), padding='Same'))
    model.add(Flatten())

    model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    checkpoint_filepath = '/content/drive/MyDrive/3d_cnn2'
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_loss',
        mode='max',
        save_best_only=True)
    earlystop_callback = EarlyStopping(monitor='val_loss', patience=30)
    rmse = tf.keras.metrics.MeanSquaredError()
    model.compile(loss=tf.keras.metrics.mean_squared_error,
                  optimizer=Adam(lr=0.0005),
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')]
                  )
    # model.summary()
    history = model.fit(X_train, y_train,
                batch_size=16,
                epochs=300,
                verbose=1,
                validation_split=val_size,
                callbacks=[model_checkpoint_callback, earlystop_callback]
                )
    # predictions = model.predict(X_test).tolist()
    # mape, distrib = percentage_error(predictions, y_test)
    # print(mape)
    # average_mape += mape
    # test_loss = model.evaluate(X_test, y_test, verbose=2)
    # Only save if training with US data
    # model.save('/content/drive/MyDrive/earth_weight/3d_cnn.h5')
    # model.save('/content/drive/MyDrive/earth_weight/3d_cnn_china.h5')
    model.save(save_dir)
    return model

In [None]:
# 3D-CNN transfer learning

def cnn_3d_transfer(week_idx, X_train, y_train, val_size=0.18, keep_weights = False):
    old_model = tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/3d_cnn.h5')
    model = Sequential()
    for layer in old_model.layers[:-5]: # this is where I changed your code
        model.add(layer)    
    # Freeze the layers 
    unfreeze = 999
    count = 0
    for layer in model.layers:
        count += 1
        if unfreeze <= count:
            layer.trainable = True
        else:
            layer.trainable = keep_weights


    model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    checkpoint_filepath = '/content/drive/MyDrive/3d_cnn_transfer2'
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_loss',
        mode='max',
        save_best_only=True)
    earlystop_callback = EarlyStopping(monitor='val_loss', patience=30)
    rmse = tf.keras.metrics.MeanSquaredError()
    model.compile(loss=tf.keras.metrics.mean_squared_error,
                  optimizer=Adam(lr=0.001),
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')]
                  )
    model.build((week_idx, 16, 9, 1))
    # temp_model.summary()

    history = model.fit(X_train, y_train,
                batch_size=16,
                epochs=300,
                verbose=0,
                validation_split=val_size,
                callbacks=[model_checkpoint_callback, earlystop_callback]
                )
    # predictions = model.predict(X_test).tolist()
    # mape, distrib = percentage_error(predictions, y_test)
    return model

# CNN-LSTM

In [None]:
# CNN-LSTM

def cnn_lstm(week_idx, save_dir, X_train, y_train, val_size=0.18):
    
    sample_shape = layers.Input((week_idx,32,9,1))
    model = Sequential()
    model.add(Conv2D(64, kernel_size=(2,2), activation='relu', kernel_initializer='he_uniform', input_shape=(32,9,1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), padding='Same'))
    model.add(Conv2D(128, kernel_size=(2,2), activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), padding='Same'))
    model.add(Conv2D(128, kernel_size=(2,2), activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(Flatten())
    x = layers.TimeDistributed(model)(sample_shape)
    x = layers.LSTM(256)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(256)(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(128)(x)
    x = layers.Dropout(0.5)(x)
    out = layers.Dense(1)(x)

    model = Model(sample_shape, out)
    checkpoint_filepath = '/content/drive/MyDrive/cnn-lstm1'
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_loss',
        mode='max',
        save_best_only=True)
    earlystop_callback = EarlyStopping(monitor='val_loss', patience=30)
    model.compile(loss=tf.keras.metrics.mean_squared_error, 
                  optimizer=Adam(lr=0.0005), 
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
    history = model.fit(X_train, y_train,
                batch_size=16,
                epochs=300,
                verbose=0,
                validation_split=val_size,
                callbacks=[model_checkpoint_callback, earlystop_callback]
                )
    # model.save('/content/drive/MyDrive/earth_weight/cnn-lstm.h5')
    # model.save('/content/drive/MyDrive/earth_weight/cnn-lstm_china.h5')
    model.save(save_dir)
    return model

In [None]:
# CNN-LSTM transfer learning

def cnn_lstm_transfer(week_idx, X_train, y_train, val_size=0.18, keep_weights = False):
    old_model = tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn-lstm.h5')
    model = Sequential()
    for layer in old_model.layers[:-5]: 
        model.add(layer) 
    # Freeze the layers 
    unfreeze = 999
    count = 0
    for layer in model.layers:
        count += 1
        if unfreeze <= count:
            layer.trainable = True
        else:
            layer.trainable = keep_weights

    model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    checkpoint_filepath = '/content/drive/MyDrive/cnn-lstm_transfer2'
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_loss',
        mode='max',
        save_best_only=True)
    rmse = tf.keras.metrics.MeanSquaredError()
    model.compile(loss=tf.keras.metrics.mean_squared_error,
                  optimizer=Adam(lr=0.0005),
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')]
                  )
    # model.summary()
    earlystop_callback = EarlyStopping(monitor='val_loss', patience=30)
    # Fit data to model
    history = model.fit(X_train, y_train,
                batch_size=16,
                epochs=300,
                verbose=0,
                validation_split=val_size,
                callbacks=[model_checkpoint_callback, earlystop_callback]
                )
    return model

# CNN

In [None]:
# CNN

def cnn(week_idx, save_dir, X_train, y_train, val_size=0.18):
    model = Sequential()
    model.add(Conv2D(64, kernel_size=(2,2), activation='relu', kernel_initializer='he_uniform', input_shape=(week_idx,32,9)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), padding='Same'))
    model.add(Conv2D(128, kernel_size=(2,2), activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), padding='Same'))
    model.add(Conv2D(128, kernel_size=(2,2), activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    checkpoint_filepath = '/content/drive/MyDrive/cnn2'
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_loss',
        mode='max',
        save_best_only=True)
    earlystop_callback = EarlyStopping(monitor='val_loss', patience=30)
    rmse = tf.keras.metrics.RootMeanSquaredError()
    model.compile(loss=tf.keras.metrics.mean_squared_error,
                  optimizer=Adam(lr=0.0005),
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')]
                  )
    # model.summary()
    # Fit data to model
    history = model.fit(X_train, y_train,
                batch_size=16,
                epochs=300,
                verbose=0,
                validation_split=val_size,
                callbacks=[model_checkpoint_callback, earlystop_callback]
                )
    
    # model.save('/content/drive/MyDrive/earth_weight/cnn.h5')
    # model.save('/content/drive/MyDrive/earth_weight/cnn_china.h5')
    model.save(save_dir)
    return model

In [None]:
# CNN transfer learning

def cnn_transfer(week_idx, X_train, y_train, val_size=0.18, keep_weights = False):
    old_model = tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn.h5')
    model = Sequential()
    for layer in old_model.layers[:-5]: # this is where I changed your code
        model.add(layer) 
    # Freeze the layers 
    unfreeze = 999
    count = 0
    for layer in model.layers:
        count += 1
        if unfreeze <= count:
            print(layer)
            layer.trainable = True
        else:
            layer.trainable = keep_weights
   
    model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    # for layer in old_model.layers[:]:
    #     model.add(layer) 

    checkpoint_filepath = '/content/drive/MyDrive/cnn_transfer2'
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_loss',
        mode='max',
        save_best_only=True)
    rmse = tf.keras.metrics.MeanSquaredError()
    model.compile(loss=tf.keras.metrics.mean_squared_error,
                  optimizer=Adam(lr=0.005),
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')]
                  )
    # model3.summary()
    earlystop_callback = EarlyStopping(monitor='val_loss', patience=30)
    # Fit data to model
    history = model.fit(X_train, y_train,
                batch_size=16,
                epochs=300,
                verbose=0,
                validation_split=val_size,
                callbacks=[model_checkpoint_callback, earlystop_callback]
                )
    return model

# Multi Year Train

In [None]:
# Training everything
# omg why this so long

# US
us_dir = "/content/drive/MyDrive/earth_hist_pkl2/"
# China 
china_dir = "/content/drive/MyDrive/china_new2pkl/"

all_china_reg_results = []
all_china_transfer_results = []
# 26 23 19 15
week_idx = 15
repeat = 1

avg_us_2016, avg_us_2017, avg_us_2018 = [], [], []

avg_china_reg_2016, avg_china_reg_2017, avg_china_reg_2018 = [], [], []
avg_china_trans_2016, avg_china_trans_2017, avg_china_trans_2018 = [], [], []


all_train_years = ['2015', '2014', '2013', '2012', '2011', '2010', '2009', '2008']
# all_train_years = ['2015']

# US models are trained on ALL YEARS 
X_train_us, X_test_us, y_train_us, y_test_us, X_us_2016, y_us_2016, X_us_2017, y_us_2017, X_us_2018, y_us_2018 = create_dataset(week_idx, us_dir, 0, all_train_years, normalize=False)
# # Creating base models with US dataset
print("3D cnn")
cnn_3d_us = cnn_3d(week_idx, '/content/drive/MyDrive/earth_weight/3d_cnn.h5', X_train_us, y_train_us, val_size=0.15)
print("cnn lstm")
cnn_lstm_us = cnn_lstm(week_idx, '/content/drive/MyDrive/earth_weight/cnn-lstm.h5', X_train_us, y_train_us, val_size=0.15)
print("cnn")
cnn_us = cnn(week_idx, '/content/drive/MyDrive/earth_weight/cnn.h5', X_train_us, y_train_us, val_size=0.15)

print("US")
print(2016)
us_eval_2016 = evaluate([cnn_3d_us, cnn_lstm_us, cnn_us], X_us_2016, y_us_2016)
for data in us_eval_2016:
    print(data)
print(2017)
us_eval_2017 = evaluate([cnn_3d_us, cnn_lstm_us, cnn_us], X_us_2017, y_us_2017)
for data in us_eval_2016:
    print(data)
print(2018)
us_eval_2018 = evaluate([cnn_3d_us, cnn_lstm_us, cnn_us], X_us_2018, y_us_2018)
for data in us_eval_2018:
    print(data)

all_years_data = []
for i in range(len(all_train_years)):  
    train_years = all_train_years[:i+1]

    avg_china_reg_2016, avg_china_reg_2017, avg_china_reg_2018 = [], [], []
    avg_china_trans_2016, avg_china_trans_2017, avg_china_trans_2018 = [], [], []

    for i in range(repeat):  

        X_train_china, X_test_china, y_train_china, y_test_china, X_china_2016, y_china_2016, X_china_2017, y_china_2017, X_china_2018, y_china_2018 = create_dataset(week_idx, china_dir, 0, train_years, normalize=False)


        print(train_years)

        print("reg")
        print("cnn3d")
        cnn_3d_china = cnn_3d(week_idx, '/content/drive/MyDrive/earth_weight/3d_cnn_china.h5', X_train_china, y_train_china, val_size=0.2)
        print("cnn lstm")
        cnn_lstm_china = cnn_lstm(week_idx, '/content/drive/MyDrive/earth_weight/cnn-lstm_china.h5', X_train_china, y_train_china, val_size=0.2)
        print("cnn")
        cnn_china = cnn(week_idx, '/content/drive/MyDrive/earth_weight/cnn_china.h5', X_train_china, y_train_china, val_size=0.2)

        print(2016)
        china_eval_2016 = evaluate([cnn_3d_china, cnn_lstm_china, cnn_china], X_china_2016, y_china_2016)
        for data in china_eval_2016:
            print(data)
        print(2017)
        china_eval_2017 = evaluate([cnn_3d_china, cnn_lstm_china, cnn_china], X_china_2017, y_china_2017)
        for data in china_eval_2016:
            print(data)
        print(2018)
        china_eval_2018 = evaluate([cnn_3d_china, cnn_lstm_china, cnn_china], X_china_2018, y_china_2018)
        for data in china_eval_2018:
            print(data)

        avg_china_reg_2016.append(china_eval_2016)
        avg_china_reg_2017.append(china_eval_2017)
        avg_china_reg_2018.append(china_eval_2018)

        # cnn_3d_china = None
        # cnn_lstm_china = None
        # cnn_china = None
        # china_eval_2016 = None
        # china_eval_2017 = None
        # china_eval_2018 = None

        # Transfer learning with china dataset
        print("trans")
        cnn_3d_trans = cnn_3d_transfer(week_idx, X_train_china, y_train_china, val_size=0.2)
        cnn_lstm_trans = cnn_lstm_transfer(week_idx, X_train_china, y_train_china, val_size=0.2)
        cnn_trans = cnn_transfer(week_idx, X_train_china, y_train_china, val_size=0.2)

        print("trans")
        print(2016)
        china_eval_2016 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_china_2016, y_china_2016)
        for data in china_eval_2016:
            print(data)
        print(2017)
        china_eval_2017 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_china_2017, y_china_2017)
        for data in china_eval_2016:
            print(data)
        print(2018)
        china_eval_2018 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_china_2018, y_china_2018)
        for data in china_eval_2018:
            print(data)
            
        avg_china_trans_2016.append(china_eval_2016)
        avg_china_trans_2017.append(china_eval_2017)
        avg_china_trans_2018.append(china_eval_2018)


        # cnn_3d_trans = None
        # cnn_lstm_trans = None
        # cnn_trans = None
        # china_eval_2016 = None
        # china_eval_2017 = None
        # china_eval_2018 = None
    
    all_years_data.append(train_years)
    all_years_data.append("China reg")
    all_years_data.append(2016)
    mape, rmse = average_results(avg_china_reg_2016)
    all_years_data.append(mape)
    all_years_data.append(rmse)
    all_years_data.append(2017)
    mape, rmse = average_results(avg_china_reg_2017)
    all_years_data.append(mape)
    all_years_data.append(rmse)
    all_years_data.append(2018)
    mape, rmse = average_results(avg_china_reg_2018)
    all_years_data.append(mape)
    all_years_data.append(rmse)

    all_years_data.append("China trans")
    all_years_data.append(2016)
    amape, rmse = average_results(avg_china_trans_2016)
    all_years_data.append(mape)
    all_years_data.append(rmse)
    all_years_data.append(2017)
    mape, rmse = average_results(avg_china_trans_2017)
    all_years_data.append(mape)
    all_years_data.append(rmse)
    all_years_data.append(2018)
    mape, rmse = average_results(avg_china_trans_2018)
    all_years_data.append(mape)
    all_years_data.append(rmse)

    all_years_data.append(' ')

# Train

In [None]:
# Training everything
# omg why this so long

# US
us_dir = "/content/drive/MyDrive/earth_hist_pkl2/"
# China 
china_dir = "/content/drive/MyDrive/china_new2pkl/"
# Ukraine
ukraine_dir = "/content/drive/MyDrive/ukraine_pkl/"

all_china_reg_results = []
all_china_transfer_results = []
# 26 23 19 15
week_idx = 15
repeat = 3

avg_us_2016, avg_us_2017, avg_us_2018 = [], [], []

avg_china_reg_2016, avg_china_reg_2017, avg_china_reg_2018 = [], [], []
avg_china_trans_2016, avg_china_trans_2017, avg_china_trans_2018 = [], [], []

avg_ukraine, avg_trans_ukraine = [], []
avg_ukraine_2017, avg_trans_ukraine_2017 = [], []

all_train_year_data = []

# train_years = ['2015', '2014', '2013', '2012', '2011', '2010', '2009', '2008']
train_years = ['2018']

# X_train_us, X_test_us, y_train_us, y_test_us, X_us_2016, y_us_2016, X_us_2017, y_us_2017, X_us_2018, y_us_2018 = create_dataset(week_idx, us_dir, 0, train_years, normalize=False)

for i in range(repeat):  
 
    # X_train_china, X_test_china, y_train_china, y_test_china, X_china_2016, y_china_2016, X_china_2017, y_china_2017, X_china_2018, y_china_2018 = create_dataset(week_idx, china_dir, 0, train_years, normalize=False)
    X_train_ukraine, X_test_ukraine, y_train_ukraine, y_test_ukraine, X_ukraine_2016, y_ukraine_2016, X_ukraine_2017, y_ukraine_2017, X_ukraine_2018, y_ukraine_2018 = create_dataset(week_idx, ukraine_dir, 0, train_years, normalize=False)



    # Creating base models with US dataset
    # print("3D cnn")
    # cnn_3d_us = cnn_3d(week_idx, '/content/drive/MyDrive/earth_weight/3d_cnn.h5', X_train_us, y_train_us, val_size=0.15)
    # print("cnn lstm")
    # cnn_lstm_us = cnn_lstm(week_idx, '/content/drive/MyDrive/earth_weight/cnn-lstm.h5', X_train_us, y_train_us, val_size=0.15)
    # print("cnn")
    # cnn_us = cnn(week_idx, '/content/drive/MyDrive/earth_weight/cnn.h5', X_train_us, y_train_us, val_size=0.15)
    # break

    # print("US")
    # print(2016)
    # us_eval_2016 = evaluate([cnn_3d_us, cnn_lstm_us, cnn_us], X_us_2016, y_us_2016)
    # for data in us_eval_2016:
    #     print(data)
    # print(2017)
    # us_eval_2017 = evaluate([cnn_3d_us, cnn_lstm_us, cnn_us], X_us_2017, y_us_2017)
    # for data in us_eval_2016:
    #     print(data)
    # print(2018)
    # us_eval_2018 = evaluate([cnn_3d_us, cnn_lstm_us, cnn_us], X_us_2018, y_us_2018)
    # for data in us_eval_2018:
    #     print(data)
    # break

    # print(avg_pe(cnn_3d_us, us_dir))
    # print(avg_pe(cnn_lstm_us, us_dir))
    # print(avg_pe(cnn_us, us_dir))

    # avg_us_2016.append(us_eval_2016)
    # avg_us_2017.append(us_eval_2017)
    # avg_us_2018.append(us_eval_2018)

    # cnn_3d_us = None
    # cnn_lstm_us = None
    # cnn_us = None
    # us_eval_2016 = None
    # us_eval_2017 = None
    # us_eval_2018 = None
    
    # # US results
    # print("US")
    # US_eval = evaluate([tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/3d_cnn.h5'),tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn-lstm.h5'), tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn.h5')], X_test_us, y_test_us)


    # # print(2016)
    # US_eval_2016 = evaluate([tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/3d_cnn.h5'),tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn-lstm.h5'), tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn.h5')], X_us_2016, y_us_2016)
    # for data in US_eval_2016:
    #     print(data)
    # print(2017)
    # US_eval_2017 = evaluate([tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/3d_cnn.h5'),tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn-lstm.h5'), tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn.h5')], X_us_2017, y_us_2017)
    # for data in US_eval_2017:
    #     print(data)
    # print(2018)
    # US_eval_2018 = evaluate([tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/3d_cnn.h5'),tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn-lstm.h5'), tf.keras.models.load_model('/content/drive/MyDrive/earth_weight/cnn.h5')], X_us_2018, y_us_2018)
    # for data in US_eval_2018:
    #     print(data)
    # US_eval_2016, US_eval_2017, US_eval_2018 = None, None, None


    # UKRAINE
    print("reg")
    cnn_3d_ukraine = cnn_3d(week_idx, '/content/drive/MyDrive/earth_weight/3d_cnn_ukraine.h5', X_train_ukraine, y_train_ukraine, val_size=0.3)
    print("cnn lstm")
    cnn_lstm_ukraine = cnn_lstm(week_idx, '/content/drive/MyDrive/earth_weight/cnn-lstm_ukraine.h5', X_train_ukraine, y_train_ukraine, val_size=0.3)
    print("cnn")
    cnn_ukraine = cnn(week_idx, '/content/drive/MyDrive/earth_weight/cnn_ukraine.h5', X_train_ukraine, y_train_ukraine, val_size=0.3)

    print(2018)
    ukraine_eval_2018 = evaluate([cnn_3d_ukraine, cnn_lstm_ukraine, cnn_ukraine], X_ukraine_2018, y_ukraine_2018)
    for data in ukraine_eval_2018:
        print(data)

    avg_ukraine.append(ukraine_eval_2018)

    print(2017)
    ukraine_eval_2017 = evaluate([cnn_3d_ukraine, cnn_lstm_ukraine, cnn_ukraine], X_ukraine_2017, y_ukraine_2017)
    for data in ukraine_eval_2017:
        print(data)

    avg_ukraine_2017.append(ukraine_eval_2017)

    
    # Transfer learning 
    print("trans")
    cnn_3d_trans = cnn_3d_transfer(week_idx, X_train_ukraine, y_train_ukraine, val_size=0.3)
    cnn_lstm_trans = cnn_lstm_transfer(week_idx, X_train_ukraine, y_train_ukraine, val_size=0.3)
    cnn_trans = cnn_transfer(week_idx, X_train_ukraine, y_train_ukraine, val_size=0.3)
    print(2018)
    ukraine_trans_eval_2018 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_ukraine_2018, y_ukraine_2018)
    for data in ukraine_trans_eval_2018:
        print(data)

    avg_trans_ukraine.append(ukraine_trans_eval_2018)

    ukraine_trans_eval_2017 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_ukraine_2017, y_ukraine_2017)
    for data in ukraine_trans_eval_2017:
        print(data)

    avg_trans_ukraine_2017.append(ukraine_trans_eval_2017)
    


    

    # print("reg")
    # print("cnn3d")
    # cnn_3d_china = cnn_3d(week_idx, '/content/drive/MyDrive/earth_weight/3d_cnn_china.h5', X_train_china, y_train_china, val_size=0.2)
    # print("cnn lstm")
    # cnn_lstm_china = cnn_lstm(week_idx, '/content/drive/MyDrive/earth_weight/cnn-lstm_china.h5', X_train_china, y_train_china, val_size=0.2)
    # print("cnn")
    # cnn_china = cnn(week_idx, '/content/drive/MyDrive/earth_weight/cnn_china.h5', X_train_china, y_train_china, val_size=0.2)

    # print(2016)
    # china_eval_2016 = evaluate([cnn_3d_china, cnn_lstm_china, cnn_china], X_china_2016, y_china_2016)
    # for data in china_eval_2016:
    #     print(data)
    # print(2017)
    # china_eval_2017 = evaluate([cnn_3d_china, cnn_lstm_china, cnn_china], X_china_2017, y_china_2017)
    # for data in china_eval_2016:
    #     print(data)
    # print(2018)
    # china_eval_2018 = evaluate([cnn_3d_china, cnn_lstm_china, cnn_china], X_china_2018, y_china_2018)
    # for data in china_eval_2018:
    #     print(data)

    # avg_china_reg_2016.append(china_eval_2016)
    # avg_china_reg_2017.append(china_eval_2017)
    # avg_china_reg_2018.append(china_eval_2018)

    # cnn_3d_china = None
    # cnn_lstm_china = None
    # cnn_china = None
    # china_eval_2016 = None
    # china_eval_2017 = None
    # china_eval_2018 = None


    # # Transfer learning with china dataset
    # print("trans")
    # cnn_3d_trans = cnn_3d_transfer(week_idx, X_train_china, y_train_china, val_size=0.2)
    # cnn_lstm_trans = cnn_lstm_transfer(week_idx, X_train_china, y_train_china, val_size=0.2)
    # cnn_trans = cnn_transfer(week_idx, X_train_china, y_train_china, val_size=0.2)

    # print("trans")
    # print(2016)
    # china_eval_2016 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_china_2016, y_china_2016)
    # for data in china_eval_2016:
    #     print(data)
    # print(2017)
    # china_eval_2017 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_china_2017, y_china_2017)
    # for data in china_eval_2016:
    #     print(data)
    # print(2018)
    # china_eval_2018 = evaluate([cnn_3d_trans, cnn_lstm_trans, cnn_trans], X_china_2018, y_china_2018)
    # for data in china_eval_2018:
    #     print(data)
        
    # avg_china_trans_2016.append(china_eval_2016)
    # avg_china_trans_2017.append(china_eval_2017)
    # avg_china_trans_2018.append(china_eval_2018)

    # cnn_3d_trans = None
    # cnn_lstm_trans = None
    # cnn_trans = None
    # china_eval_2016 = None
    # china_eval_2017 = None
    # china_eval_2018 = None

     
    