In [1]:
import os
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression, Ridge

In [2]:
path = './descriptors/'
in_path = os.listdir(path)
print(in_path)

['0', '1', '2', '3', '4']


In [3]:
data_all = []

for p in in_path:
    data_dir = path + p

    with open(data_dir + '/x_tr.pkl', 'rb') as f:
        x_tr = pickle.load(f)
    with open(data_dir + '/x_ts.pkl', 'rb') as f:
        x_ts = pickle.load(f)
    with open(data_dir + '/y_tr.pkl', 'rb') as f:
        y_tr = pickle.load(f)
    with open(data_dir + '/y_ts.pkl', 'rb') as f:
        y_ts = pickle.load(f)

    data_all.append([x_tr, x_ts, y_tr, y_ts])

In [7]:
def extend_x(x_list, dimension):

    if dimension < 1:
        print('Wrong dimension')
        return
    
    x = x_list[1]
    
    for i in range(dimension - 1):
        x = np.c_[x, x_list[i+1]]
    return x

meta_r2_list = []

for i in range(len(data_all)):

    r2_list = []
    print(f'Data : {in_path[i]}')
    X = data_all[i]
    x_tr, x_ts, y_tr, y_ts = (X[j] for j in range(len(X)))

    y_tr = y_tr[0]
    y_ts = y_ts[0]
    

    for d in range(1, 11):
        x_tr_ = extend_x(x_tr, d)
        x_ts_ = extend_x(x_ts, d)
        
        model = LinearRegression().fit(x_tr_, y_tr)
        pred_tr, pred_ts = model.predict(x_tr_), model.predict(x_ts_)

        # if d == 10:
        #     print(*np.round(model.coef_, 4), sep='\n')
        #     print('intercept : ', model.intercept_)

        tr_r2, ts_r2 = r2_score(y_tr, pred_tr), r2_score(y_ts, pred_ts)
        r2_list.append([tr_r2, ts_r2])
        print(f'Dimension : {d+1:2} | Training R2 : {tr_r2:.4f} | Test R2 : {ts_r2:.4f}')
        
    meta_r2_list.append(r2_list)


Data : 0
Dimension :  2 | Training R2 : 0.6302 | Test R2 : 0.7306
Dimension :  3 | Training R2 : 0.6391 | Test R2 : 0.7451
Dimension :  4 | Training R2 : 0.7161 | Test R2 : 0.6913
Dimension :  5 | Training R2 : 0.7819 | Test R2 : 0.6419
Dimension :  6 | Training R2 : 0.8204 | Test R2 : 0.6644
Dimension :  7 | Training R2 : 0.8397 | Test R2 : 0.6482
Dimension :  8 | Training R2 : 0.8603 | Test R2 : 0.6793
Dimension :  9 | Training R2 : 0.8698 | Test R2 : 0.6829
Dimension : 10 | Training R2 : 0.8909 | Test R2 : 0.6489
Dimension : 11 | Training R2 : 0.8910 | Test R2 : 0.6515
Data : 1
Dimension :  2 | Training R2 : 0.6883 | Test R2 : 0.6458
Dimension :  3 | Training R2 : 0.6883 | Test R2 : 0.6458
Dimension :  4 | Training R2 : 0.7332 | Test R2 : 0.7022
Dimension :  5 | Training R2 : 0.7871 | Test R2 : 0.6687
Dimension :  6 | Training R2 : 0.8009 | Test R2 : 0.6855
Dimension :  7 | Training R2 : 0.8298 | Test R2 : 0.6786
Dimension :  8 | Training R2 : 0.8550 | Test R2 : 0.7391
Dimension :  

In [8]:
for i in range(10):
    tr_ = []
    ts_ = []
    for j in range(len(meta_r2_list)):
        tr_.append(meta_r2_list[j][i][0])
        ts_.append(meta_r2_list[j][i][1])
    
    tr_avg, tr_std = np.average(tr_), np.std(tr_)
    ts_avg, ts_std = np.average(ts_), np.std(ts_)

    

    print(f'Dimension : {i+1} | Training : {tr_avg:.4f} + {tr_std:.4f} | Test avg : {ts_avg:.4f} + {ts_std:.4f}')# This results are same as the reported results on the paper

Dimension : 1 | Training : 0.5571 + 0.0884 | Test avg : 0.6582 + 0.1052
Dimension : 2 | Training : 0.5588 + 0.0900 | Test avg : 0.6611 + 0.1074
Dimension : 3 | Training : 0.6640 + 0.0575 | Test avg : 0.6987 + 0.0481
Dimension : 4 | Training : 0.7206 + 0.0565 | Test avg : 0.6694 + 0.0628
Dimension : 5 | Training : 0.7565 + 0.0606 | Test avg : 0.6863 + 0.0610
Dimension : 6 | Training : 0.7697 + 0.0672 | Test avg : 0.6903 + 0.0506
Dimension : 7 | Training : 0.7796 + 0.0759 | Test avg : 0.7114 + 0.0438
Dimension : 8 | Training : 0.7864 + 0.0789 | Test avg : 0.7165 + 0.0433
Dimension : 9 | Training : 0.7991 + 0.0820 | Test avg : 0.7121 + 0.0475
Dimension : 10 | Training : 0.8014 + 0.0814 | Test avg : 0.7164 + 0.0540
