# Imports & Loads

In [1]:
import pandas as pd
import numpy as np
import joblib
import json
from zipfile import ZipFile

In [13]:
exps = ["exp1","exp2","exp3","exp4", "exp5"]
models = dict()

for key in exps:
    models[key] = {}
    path = key+"/feature_extraction_info.json"
    with open(path, 'r') as f:
        feature_extraction_info = json.load(f)
    
    models[key]["features"] = feature_extraction_info
    
    for condition in ["ec", "eo"]:
        test_features_condition_path = key + "/" + condition + "/test_"+condition+".npy"
        models[key][condition] = {}
        models[key][condition]["path"] = test_features_condition_path
        test_features = np.load(test_features_condition_path, allow_pickle=True)
        
        models[key][condition]["test"] = {}
        models[key][condition]["test"]["features"] = test_features
        
        
        df_valid_path = key + "/" + condition + "/df_valid_"+condition+".csv"
        df_valid = pd.read_csv(df_valid_path,index_col=False)
        
        models[key][condition]["valid"] = {}
        
        models[key][condition]["valid"]["features"] = df_valid
        
        models[key][condition]["model"] = {}
        model_path = key + "/" + condition + "/model.pkl"
        model_ec = joblib.load(model_path)
        models[key][condition]["model"]["file"] = model_ec
        models[key][condition]["model"]["score"] = 0.0
    
    
    

### Some required info from the feature extraction process

# Defining EO-EC models ensemble weights

In [9]:
ec_model_weight = 0.635

eo_model_weight = 0.365

assert (ec_model_weight + eo_model_weight == 1), "Sum of weights should be equal to 1"

# Metric function

In [10]:
def calc_MAE(signal1, signal2):
    
    """
    Returns the Mean Absoluete Error between signal1 and signal2 (Both should be 1d arrays)
    """
    
    return np.mean(np.abs(signal1 - signal2))

# Verifying validation scores 

### EC Condition Validation set

In [15]:
exps = ["exp1","exp2","exp3","exp4", "exp5"]
for key in exps:
    for condition in ["ec", "eo"]:
        valid_features = models[key][condition]["valid"]["features"]
        model = models[key][condition]["model"]["file"]
        valid_X = valid_features.iloc[:, :-1].to_numpy()
        valid_Y = valid_features.iloc[:, -1].to_numpy()
        valid_pred = model.predict(valid_X)
        models[key][condition]["pred"] = valid_pred
        model_valid_score = calc_MAE(valid_pred, valid_Y)
        models[key][condition]["model"]["score"] = model_valid_score
    


In [21]:
df_eo_pred = pd.DataFrame()
df_ec_pred = pd.DataFrame()
for condition in ["ec", "eo"]:
    for key in exps:
        if condition == "ec":
            df_ec_pred[key] = models[key][condition]["pred"]
        else:
            df_eo_pred[key] = models[key][condition]["pred"]
            
df_eo_pred['mean'] = df_eo_pred.mean(axis=1)
df_ec_pred['mean'] = df_ec_pred.mean(axis=1)

In [23]:
n_valid_subjects = models["exp1"]["features"]['n_subjects_valid']
n_test_subjects = models["exp1"]["features"]['n_subjects_test']

n_windows_ec = models["exp1"]["features"]['n_windows_ec']
n_windows_eo = models["exp1"]["features"]['n_windows_eo']

test_subjects = models["exp1"]["features"]['test_dataset_subjects']

In [37]:
valid_Y_actual = valid_Y[::n_windows_eo]
valid_ec_Y_pred = df_ec_pred['mean'].to_numpy().reshape(n_valid_subjects, -1)
valid_ec_Y_pred = np.mean(valid_ec_Y_pred, axis=1)

valid_eo_Y_pred = df_eo_pred['mean'].to_numpy().reshape(n_valid_subjects, -1)
valid_eo_Y_pred = np.mean(valid_eo_Y_pred, axis=1)

valid_Y_pred = (ec_model_weight*valid_ec_Y_pred) + (eo_model_weight*valid_eo_Y_pred)

final_validation_score = calc_MAE(valid_Y_pred, valid_Y_actual)
print("Validation set final score: ", final_validation_score)

Validation set final score:  1.563383140119739


In [56]:
test_features  =  models[key][condition]["test"]["features"]
print(test_features.shape)
df_eo_test_pred = pd.DataFrame()
df_ec_test_pred = pd.DataFrame()
for condition in ["ec", "eo"]:
    for key in exps:
        if condition == "ec":
            model = models[key][condition]["model"]["file"]
            test_pred = model.predict(test_features)
            models[key][condition]["test"]["pred"] = test_pred
            df_ec_test_pred[key] =  test_pred
            
        else:
            model = models[key][condition]["model"]["file"]
            test_pred = model.predict(test_features)
            
            models[key][condition]["test"]["pred"] = test_pred
            df_eo_test_pred[key] =  test_pred
            
            
df_eo_test_pred['mean'] = df_eo_test_pred.mean(axis=1)
df_ec_test_pred['mean'] = df_ec_test_pred.mean(axis=1)
print(df_eo_test_pred['mean'].head())



test_eo_pred = df_eo_test_pred['mean'].to_numpy().reshape(n_test_subjects, -1)
test_eo_pred = np.mean(test_eo_pred, axis=1)

test_ec_pred = df_ec_test_pred['mean'].to_numpy().reshape(n_test_subjects, -1)
test_ec_pred = np.mean(test_ec_pred, axis=1)


test_preds_final = (ec_model_weight*test_ec_pred) + (eo_model_weight*test_eo_pred)

df_final = pd.DataFrame({"id":test_subjects, "age":test_preds_final})

df_final.to_csv("df_split_submission.csv", index=False)

print(df_final)

(1600, 4480)
0   9.76
1   8.71
2   9.02
3   9.83
4   9.36
Name: mean, dtype: float64
       id   age
0    1601  9.03
1    1602  9.93
2    1603  9.97
3    1604 10.09
4    1605  8.18
..    ...   ...
395  1996  7.49
396  1997 13.15
397  1998  7.62
398  1999  8.35
399  2000 11.25

[400 rows x 2 columns]


In [57]:
with ZipFile("df_split_submission.zip", 'w') as zipf:
    zipf.write("df_split_submission.csv", arcname="df_split_submission.csv")

### EO Condition validation 

In [49]:
valid_eo_features = df_valid_eo.iloc[:, :-1].to_numpy()
valid_eo_Y = df_valid_eo.iloc[:, -1].to_numpy()

valid_eo_pred = model_eo.predict(valid_eo_features)

model_eo_valid_score = calc_MAE(valid_eo_pred, valid_eo_Y)
print("autoML_model_eo validation set score: ", model_eo_valid_score)

autoML_model_eo validation set score:  1.8187050114823526


### Ensemble score over Validation set

In [50]:
#Actual age values are same for both 'eo' and 'ec' validation sets

valid_Y_actual = valid_ec_Y[::n_windows_ec]

#valid_Y_actual = validY_eo[::n_windows_eo] 

In [51]:
valid_ec_Y_pred = valid_ec_pred.reshape(n_valid_subjects, -1)
valid_ec_Y_pred = np.mean(valid_ec_Y_pred, axis=1)

In [52]:
valid_eo_Y_pred = valid_eo_pred.reshape(n_valid_subjects, -1)
valid_eo_Y_pred = np.mean(valid_eo_Y_pred, axis=1)

In [53]:
valid_Y_pred = (ec_model_weight*valid_ec_Y_pred) + (eo_model_weight*valid_eo_Y_pred)

final_validation_score = calc_MAE(valid_Y_pred, valid_Y_actual)
print("Validation set final score: ", final_validation_score)

Validation set final score:  1.5787076303198


# Test Predictions

### model_ec prediction

In [17]:
test_ec_pred = model_ec.predict(test_features_ec)

test_ec_pred = test_ec_pred.reshape(n_test_subjects, -1)

test_ec_pred = np.mean(test_ec_pred, axis=1)

### model_eo prediction

In [18]:
test_eo_pred = model_eo.predict(test_features_eo)

test_eo_pred = test_eo_pred.reshape(n_test_subjects, -1)

test_eo_pred = np.mean(test_eo_pred, axis=1)

### Ensemble of 'eo' and 'ec' predictions

In [19]:
test_preds_final = (ec_model_weight*test_ec_pred) + (eo_model_weight*test_eo_pred)

### Saving the predictions in a dataframe

In [20]:
df_final = pd.DataFrame({"id":test_subjects, "age":test_preds_final})

df_final.to_csv("df_submission.csv", index=False)

print(df_final)

       id   age
0    1601  9.20
1    1602  9.67
2    1603 10.31
3    1604 10.51
4    1605  8.57
..    ...   ...
395  1996  7.40
396  1997 12.69
397  1998  7.20
398  1999  9.60
399  2000 10.99

[400 rows x 2 columns]


### Zipping the submission dataframe

In [21]:
with ZipFile("df_submission.zip", 'w') as zipf:
    zipf.write("df_submission.csv", arcname="df_submission.csv")