
# Summary:

in this notebook, we first make the two Chicago and San Diego datasets similar in terms of features and then we run the baseline ML methods, Transfer learning and domain adaptation models on them. We aim to transfer from one city to another.

- Regression
- Timeline Split
- Results reported after statistical significance test
- Nan values: Noisy Mean
- Augmentation: No Aug

### Libraries

In [1]:
# pip install adapt stats

In [2]:
import numpy as np
import pandas as pd
import numpy
import pickle
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import make_scorer
from tqdm import tqdm
import stats
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
import torch

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor as xgbc
from matplotlib import pyplot

In [4]:
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE
from imblearn.over_sampling import ADASYN
import math

In [5]:
pd.options.display.max_rows = None

In [6]:
results_rrmse = {
    'RF': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
}

results_r2 = {
    'RF': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-FA': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-BW': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-CORAL': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-SA': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-FA': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-BW': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-CORAL': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-SA': {'metric':'r2', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
}
results_rmse = {
    'RF': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-FA': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-BW': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-CORAL': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'RF-SA': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-FA': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-BW': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-CORAL': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'XGB-SA': {'metric':'rsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'MLP-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-FA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-BW': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-CORAL': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
    'TN-SA': {'metric':'rrsme', 'chi-chi': 0, 'chi-san': 0, 'san-chi':0, 'san-san':0},
}

### training functions

In [7]:
def r44(value): return f"{round(value, 3):.3f}"

def r55(value): return f"{round(value, 4):.4f}"

def rrmse(t,p): return np.sqrt(mean_squared_error(t, p))/np.mean(t)
##np.sqrt(1/len(t) * np.sum(np.square((t - p) / p)))
my_rrmse = make_scorer(rrmse, greater_is_better=False)

def metrics(y_test, y_pred, roundd = False):
    #rrmse = np.sqrt(1/len(y_test) * np.sum(np.square((y_test - y_pred) / y_pred)))
    rrmse_ = rrmse(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred) ** (1/2)
    r2 = r2_score(y_test, y_pred)
    return rrmse_, rmse, r2

def stid(source_name, target_name):
    return (source_name[0:3] + '-' + target_name[0:3]).lower()

def np_ratio(arr):
    C = Counter(arr)
    return C[0]/C[1]

In [8]:
def get_MLP_model(XX, YY):
    lr = MLPRegressor()
    param_grid = {
        'hidden_layer_sizes': [(100,), (80, 20)], #, (120,60), 
        'solver': [ 'sgd', 'adam'],#'lbfgs',
        'alpha': [1e-4, 1e-4, 5e-5, 5e-4],#[],
        'learning_rate': ['constant', 'invscaling', 'adaptive'],
        'learning_rate_init': [5e-4, 1e-3],#, , 5e-3
        'max_iter': [400]
    }
    grid_search = GridSearchCV(estimator = lr, param_grid = param_grid, cv = 3, n_jobs = -1, verbose = 10, scoring=my_rrmse)
    grid_search.fit(XX, YY)
    print(grid_search.best_params_)
    return MLPRegressor(hidden_layer_sizes = grid_search.best_params_['hidden_layer_sizes'],
                        learning_rate = grid_search.best_params_['learning_rate'],
                        learning_rate_init = grid_search.best_params_['learning_rate_init'],
                        max_iter = grid_search.best_params_['max_iter'],
                        solver = grid_search.best_params_['solver'], alpha = grid_search.best_params_['alpha']), grid_search.best_params_

def get_MLP(abcd):
    return MLPRegressor(hidden_layer_sizes=abcd['hidden_layer_sizes'],
                               solver=abcd['solver'],
                               alpha=abcd['alpha'],
                               learning_rate=abcd['learning_rate'],
                               learning_rate_init=abcd['learning_rate_init'])
        

def get_RF_model(XX, YY):
    rf = RandomForestRegressor()
    max_depth = [int(x) for x in np.linspace(10, 110, num = 15)]
    max_depth.append(None)
    random_grid = {'n_estimators': [int(x) for x in np.linspace(start = 400, stop = 2000, num = 25)],
               'max_features': ['sqrt', 'log2', None],
               'max_depth': max_depth,
               'min_samples_split': [2, 5, 10, 15, 20],
               'min_samples_leaf': [1, 2, 3, 4, 7],
               'bootstrap': [True, False]

    }

    rf_random = RandomizedSearchCV(estimator = rf, scoring=my_rrmse,
                                 param_distributions = random_grid, n_iter = 85,
                                 cv = 3, verbose=10, random_state=42, n_jobs = -1)
    rf_random.fit(XX, YY)
    print(rf_random.best_params_)
    return RandomForestRegressor(n_estimators=rf_random.best_params_['n_estimators'],
                               max_features=rf_random.best_params_['max_features'],
                               max_depth=rf_random.best_params_['max_depth'],
                               min_samples_split=rf_random.best_params_['min_samples_split'],
                               min_samples_leaf=rf_random.best_params_['min_samples_leaf'],
                               bootstrap=rf_random.best_params_['bootstrap']), rf_random.best_params_

def get_RF(abcd):
    return RandomForestRegressor(n_estimators=abcd['n_estimators'],
                               max_features=abcd['max_features'],
                               max_depth=abcd['max_depth'],
                               min_samples_split=abcd['min_samples_split'],
                               min_samples_leaf=abcd['min_samples_leaf'],
                               bootstrap=abcd['bootstrap'])


def get_xgb_model(XX, YY):
    xx = xgbc()
    param_dist = {
        'max_depth': [int(x) for x in np.linspace(1, 30, num = 2)],
        'min_child_weight':range(1,20,3),
        'gamma':[i/10.0 for i in range(0,5)],
        'subsample':[i/100.0 for i in range(50,100,5)],
        'colsample_bytree':[i/10.0 for i in range(6,10)],
        'learning_rate': [1e-5,1e-4,1e-3, 1e-2, 0.1,0.5, 0.9],
        'n_estimators': [2,5,20,80,200,500,800,1200,1700],
        'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
    }

    rf_random = RandomizedSearchCV(estimator = xx, scoring=my_rrmse,
                                 param_distributions = param_dist, n_iter = 50,
                                 cv = 3, verbose=2, random_state=42, n_jobs = -1)
    rf_random.fit(XX, YY)
    print(rf_random.best_params_)
    return xgbc(max_depth = rf_random.best_params_['max_depth'],
                               min_child_weight=rf_random.best_params_['min_child_weight'],
                               gamma=rf_random.best_params_['gamma'],
                               subsample=rf_random.best_params_['subsample'],
                               colsample_bytree=rf_random.best_params_['colsample_bytree'],
                               learning_rate=rf_random.best_params_['learning_rate'],
                               n_estimators=rf_random.best_params_['n_estimators'],
                               reg_alpha=rf_random.best_params_['reg_alpha']), rf_random.best_params_

def get_xgb(abcd):
    return xgbc(max_depth = abcd['max_depth'],
                               min_child_weight=abcd['min_child_weight'],
                               gamma=abcd['gamma'],
                               subsample=abcd['subsample'],
                               colsample_bytree=abcd['colsample_bytree'],
                               learning_rate=abcd['learning_rate'],
                               n_estimators=abcd['n_estimators'],
                               reg_alpha=abcd['reg_alpha'])

def get_TN_model(XX, YY):
    return TabNetRegressor(
        momentum = 0.15,
        optimizer_fn=torch.optim.Adam,
        optimizer_params={'lr':4e-3},
        scheduler_params={"step_size":10, # how to use learning rate scheduler
                          "gamma":0.9},
        scheduler_fn=torch.optim.lr_scheduler.StepLR,
    )

    
def get_TN(abcd):
    return TabNetRegressor(
        momentum = 0.15,
        optimizer_fn=torch.optim.Adam,
        optimizer_params={'lr':4e-3},
        scheduler_params={"step_size":10, # how to use learning rate scheduler
                          "gamma":0.9},
        scheduler_fn=torch.optim.lr_scheduler.StepLR,
    )

In [9]:
def stat_significance_trainer_tester(model, X_train, y_train, X_test,
                                     y_test, X_target, y_target, source_name,
                                     target_name, model_name):

    source_rrmses, target_rrmses = [], []
    source_r2s, target_r2s = [], []
    source_rmses, target_rmses = [], []

    kf = KFold(n_splits=4, random_state=None, shuffle=True)
    
    for i, (train_index, test_index) in enumerate(kf.split(X_train)):
        if model_name == 'TN':
            model.fit(X_train[train_index], y_train[train_index], max_epochs=350,patience=3)
        else:
            model.fit(X_train[train_index], y_train[train_index])
        src_model_src_test_pred = model.predict(X_test)
        src_model_target_test_pred = model.predict(X_target)
        
        rrmse, rmse, r2 = metrics(y_test, src_model_src_test_pred)
        source_rrmses.append(rrmse)
        source_rmses.append(rmse)
        source_r2s.append(r2)
        
        rrmse, rmse, r2 = metrics(y_target, src_model_target_test_pred)
        target_rrmses.append(rrmse)
        target_rmses.append(rmse)
        target_r2s.append(r2)

    if model_name == 'TN':
        model.fit(X_train[train_index], y_train[train_index], max_epochs=350,patience=3)
    else:
        model.fit(X_train[train_index], y_train[train_index])
    src_model_src_test_pred = model.predict(X_test)
    rrmse, rmse, r2 = metrics(y_test, src_model_src_test_pred)
    source_rrmses.append(rrmse)
    source_rmses.append(rmse)
    source_r2s.append(r2)

    src_model_target_test_pred = model.predict(X_target)
    rrmse, rmse, r2 = metrics(y_target, src_model_target_test_pred)
    target_rrmses.append(rrmse)
    target_rmses.append(rmse)
    target_r2s.append(r2)

    
    results_rrmse[model_name][stid(source_name, source_name)] = np.mean(source_rrmses)
    results_rmse[model_name][stid(source_name, source_name)] = np.mean(source_rmses)
    results_r2[model_name][stid(source_name, source_name)] = np.mean(source_r2s)
    
    results_rrmse[model_name][stid(source_name, target_name)] = np.mean(target_rrmses)
    results_rmse[model_name][stid(source_name, target_name)] = np.mean(target_rmses)
    results_r2[model_name][stid(source_name, target_name)] = np.mean(target_r2s)
    
    print(stid(source_name, source_name), r44(np.mean(source_rrmses)), r44(np.mean(source_rmses)), 
          r44(np.mean(source_r2s)))
    
    print(stid(source_name, target_name), r44(np.mean(target_rrmses)), r44(np.mean(target_rmses)), 
          r44(np.mean(target_r2s)))

    return model

### Create data for LLMs

In [10]:
feat_def = ['Day of year: ', 'Wave Height: ', 'Water Temperature: ', 'Tide: ', 'Solar radiance: ',
 'Did we have rain the past 3 days? ', 'Did we have rain the past 7 days? ',
 'cummulative rain in the last 3 days: ', 'cummulative rain in the last 3 days: ',
 'Is tide value more than the mean tide? ', 'Alongshire wind speed: ', 'offshore wind speed: ']
instructions = 'We want to predict if the bacteria leven in the beaches is above or bellow safety threshold.\
                        you will receive environmental information and you should answer with wither [unsafe] or [safe]'
def make_serial(train_data, test_data, feat_names, bin_feats = [5,6,9]):
    serialized_dataset = []
    for features, target in zip(train_data, test_data):
        inputs = ''
        outputs = r55(target[0])
        for i in range(len(feat_names)):
            definition = feat_def[i]
            if not pd.isnull(features[i]):
                inputs += definition
                if i in bin_feats:
                    inputs += 'Yes. ' if features[i] == 1.0 else 'No. '
                else:
                    inputs += str(r55(features[i])) + '. '
        serialized_dataset.append({'instruction': instructions, 'input': inputs, 'output': outputs})
    return serialized_dataset
# serialized_chi_train = make_serial(chi_X_train, chi_y_train)
# serialized_chi_test = make_serial(chi_X_train, chi_y_train)
# serialized_san_train = make_serial(chi_X_train, chi_y_train)
# serialized_san_test = make_serial(san_X_test, san_y_test)
# with open('./test_text.pkl', 'wb') as file: 
#     pickle.dump(serialized_dataset, file) 

# Dataset

making two datasets similar

In [11]:
chi_data = pd.read_csv('./Data-CHI.csv') #Chicago Dataset

chi_data = chi_data.drop(columns={"Unnamed: 0.1", "Unnamed: 0", 'date', 'beach_area', "dayofyear", 'tide_gtm', 'dtide_1', 'dtide_2', 'PrecipSum6',
       'Precip24', 'solar_noon', 'APD', 'DPD', 'turbidity', 'atemp', 'comment', 'dtemp', 'beach', 'WDIR', 'WSPD'})
chi_data['date'] = pd.to_datetime(chi_data[['year', 'month', 'day']])
chi_data['doy'] = chi_data['date'].dt.dayofyear
chi_data.loc[chi_data['logENT'] <= 0, 'logENT'] = 0
chi_data = chi_data.drop(columns={'date', 'month', 'day'})
chi_data['tide_gtm'] = np.where(chi_data['tide'] > chi_data.tide.mean(), 1, 0)
chi_data['ENT'] = chi_data['ENT'].apply(lambda x: 0.01 if x < 0.01 else x)
chi_data['logENT'] = np.log10(chi_data['ENT'])
chi_data = chi_data.drop(columns={"ENT"})
desc = chi_data.describe()
desc.loc['missing'] = [round(i, 3) for i in (10014-np.array(desc)[0])/10014*100]
desc

Unnamed: 0,year,logENT,tide,lograin3T,wet3,lograin7T,wet7,WVHT,Wtemp_B,rad,awind,owind,doy,tide_gtm
count,10014.0,10014.0,9434.0,8918.0,8918.0,7867.0,7867.0,5346.0,7105.0,6234.0,7561.0,7561.0,10014.0,10014.0
mean,2019.186339,2.05828,177.061424,-1.708177,0.502848,-0.553355,0.8331,0.260896,20.843561,351.164261,-0.12854,-0.724754,195.557619,0.356101
std,1.990949,0.695017,0.190851,1.674389,0.498058,1.143453,0.37091,0.219719,2.882013,252.721353,3.020049,3.151064,29.703664,0.47887
min,2016.0,-2.0,176.64684,-4.0,0.0,-4.0,0.0,0.01,12.4,0.0,-21.570398,-19.96732,144.0,0.0
25%,2017.0,1.618048,176.92116,-4.0,0.0,-0.69897,1.0,0.11,19.1,66.0,-1.574542,-2.338488,169.0,0.0
50%,2019.0,2.084931,177.024792,-0.978811,1.0,-0.130768,1.0,0.19,21.7,405.5,-0.055805,-0.817459,195.0,0.0
75%,2021.0,2.521437,177.13452,-0.267606,1.0,0.076457,1.0,0.31,22.9,575.0,1.340563,0.935142,221.0,1.0
max,2022.0,5.033514,177.600864,0.479287,1.0,0.641474,1.0,1.39,25.4,757.0,16.02841,20.958388,249.0,1.0
missing,0.0,0.0,5.792,10.945,10.945,21.44,21.44,46.615,29.049,37.747,24.496,24.496,0.0,0.0


In [12]:
san_data = pd.read_csv('./Data-SD.csv') #SanDiego Dataset
san_data['ActivityStartDate'] = pd.to_datetime(san_data['ActivityStartDate'])
san_data['year'] = san_data['ActivityStartDate'].dt.year
san_data = san_data.drop(columns={'visibility', 'MonitoringLocationIdentifier', 'ProjectIdentifier', 'tide_mean',\
                                  'LatitudeMeasure', 'LongitudeMeasure', 'beach_angle', 'WindDir', 'SLP',\
                                  'ActivityStartDate', 'hour', 'WindSpd','AirTemp'})

san_data = san_data.rename(columns={"ResultMeasureValue": "ENT", "WTMP": "Wtemp_B", "Water Level": "tide",\
                                   'DNI_1': 'rad', '3T': 'lograin3T', '7T': 'lograin7T'})
san_data['ENT'] = san_data['ENT'].apply(lambda x: 0.01 if x < 0.01 else x)
san_data['logENT'] = np.log10(san_data['ENT'])
san_data = san_data.drop(columns={"ENT"})
desc = san_data.describe()
desc.loc['missing'] = [round(i, 3) for i in (8730-np.array(desc)[0])/10014*100]
desc

Unnamed: 0,doy,WVHT,Wtemp_B,tide,rad,wet3,wet7,lograin3T,lograin7T,tide_gtm,awind,owind,year,logENT
count,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0,8730.0
mean,187.120733,1.034499,18.864313,3.594962,2942.115349,0.184994,0.366781,1.209416,3.687514,0.604582,0.007553,0.764905,2018.130699,1.389429
std,104.049436,0.474261,2.917555,1.712345,3328.580723,0.388315,0.481954,4.831651,9.942902,0.488968,1.975906,1.717646,2.274089,0.912791
min,1.0,0.21,13.3,-1.044,0.0,0.0,0.0,0.0,0.0,0.0,-12.4,-7.7,2014.0,-2.0
25%,103.0,0.72,16.5,2.397,308.0,0.0,0.0,0.0,0.0,0.0,-1.23,0.0,2016.0,1.0
50%,189.0,0.93,18.5,3.677,1622.0,0.0,0.0,0.0,0.0,1.0,0.0,0.45,2019.0,1.0
75%,275.0,1.23,20.8,4.753,4563.25,0.0,1.0,0.0,1.3,1.0,1.47,1.89,2020.0,1.778151
max,365.0,4.38,26.6,7.83,13938.0,1.0,1.0,61.4,79.6,1.0,7.38,8.37,2021.0,4.380211
missing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
common_features = [i for i in san_data.columns if i in chi_data.columns]
print('common_features:', common_features)

common_features: ['doy', 'WVHT', 'Wtemp_B', 'tide', 'rad', 'wet3', 'wet7', 'lograin3T', 'lograin7T', 'tide_gtm', 'awind', 'owind', 'year', 'logENT']


In [14]:
san_data = san_data.loc[:,common_features]
chi_data = chi_data.loc[:,common_features]

In [15]:
names = ['wet3', 'wet7', 'tide_gtm']
for i in [5,6,9]:
    array1 = np.array(san_data).T[i]
    array2 = np.array(chi_data).T[i]
#     print(np_ratio(array1), np_ratio(array2))

# feature_names = ['wet3', 'wet7', 'tide_gtm']
# ratios_san_diego = [4.405572755417957, 1.726420986883198, 0.6540356195528609]
# ratios_chicago = [0.9538633818589026, 0.18284665953218163, 1.8081884464385867]

In [16]:
names = ['rad', 'tide', 'Wtemp_B', 'lograin3T', 'lograin7T', 'WVHT', 'awind', 'owind']
datata = []
for i in [5,4,3,8,9,2,11,12]:
    array1 = np.array(san_data).T[i]
    array2 = np.array(chi_data).T[i]
    datata.append((array1, array2))
# fig, axes = plt.subplots(3, 3, figsize=(16, 16))
# axes = axes.flatten()

# for i, (array1, array2) in enumerate(datata):
#     axes[i].hist(array1, bins=100, alpha=0.5, label='San Diego')
#     axes[i].hist(array2, bins=100, alpha=0.5, label='Chicago')
#     axes[i].set_xlabel('Value')
#     axes[i].set_ylabel('Frequency')
#     axes[i].set_title(f'{names[i]} Distributions')
#     axes[i].legend()

feature_names = ['wet3', 'wet7', 'tide_gtm']
ratios_san_diego = [4.405572755417957, 1.726420986883198, 0.6540356195528609]
ratios_chicago = [0.9538633818589026, 0.18284665953218163, 1.8081884464385867]
bar_width = 0.25
# index = range(len(feature_names))
# axes[8].bar(index, ratios_san_diego, bar_width, label='San Diego', color='skyblue')
# axes[8].bar([i + bar_width for i in index], ratios_chicago, bar_width, label='Chicago', color='orange')
# axes[8].set_xlabel('Features')
# axes[8].set_ylabel('0 to 1 Ratios')
# axes[8].set_title('Distributions of Categorical Features')
# axes[8].set_xticks([i + bar_width / 2 for i in index], feature_names)
# axes[8].legend()

# plt.tight_layout()
# plt.savefig('dists.png')

drop nan disabled

cite 1: drop nan, replace with mean, noisy mean

In [17]:
# chi_data = chi_data.dropna()
# san_data = san_data.dropna()

In [18]:
feature_names = chi_data.columns[:-2]
print('feature_names', list(feature_names))

feature_names ['doy', 'WVHT', 'Wtemp_B', 'tide', 'rad', 'wet3', 'wet7', 'lograin3T', 'lograin7T', 'tide_gtm', 'awind', 'owind']


train/test split

In [19]:
target = 'logENT'

In [20]:
# for i in range(2014,2023,1):
#     print('year:', i)
#     year_sample = np.array(chi_data.loc[chi_data['year'] == i].loc[:,chi_data.columns == target]).flatten()
#     if len(year_sample):
#         print('\tlen', len(year_sample))
#         print('\tnp ratio', np_ratio(year_sample))

In [21]:
# for i in range(2014,2023,1):
#     print('year:', i)
#     year_sample = np.array(san_data.loc[san_data['year'] == i].loc[:,san_data.columns == target]).flatten()
#     if len(year_sample):
#         print('\tlen', len(year_sample))
#         print('\tnp ratio', np_ratio(year_sample))

In [22]:
chi_X_train = np.array(chi_data.loc[chi_data['year'] <= 2020].loc[:,chi_data.columns != target])[:, :-1]
chi_X_test = np.array(chi_data.loc[chi_data['year'] > 2020].loc[:,chi_data.columns != target])[:, :-1]
chi_y_train = np.array(chi_data.loc[chi_data['year'] <= 2020].loc[:,chi_data.columns == target])
chi_y_test = np.array(chi_data.loc[chi_data['year'] > 2020].loc[:,chi_data.columns == target])

san_X_train = np.array(san_data.loc[san_data['year'] <= 2020].loc[:,san_data.columns != target])[:, :-1]
san_X_test = np.array(san_data.loc[san_data['year'] > 2020].loc[:,san_data.columns != target])[:, :-1]
san_y_train = np.array(san_data.loc[san_data['year'] <= 2020].loc[:,san_data.columns == target])
san_y_test = np.array(san_data.loc[san_data['year'] > 2020].loc[:,san_data.columns == target])

In [23]:
print('train-test years', 'chi, train <=2020, test > 2020')
print('train-test years', 'sn, train <=2020, test > 2020')

train-test years chi, train <=2020, test > 2020
train-test years sn, train <=2020, test > 2020


Random split

In [24]:
# target = 'logENT'
# chi_X = np.array(chi_data.loc[:,chi_data.columns != target])
# chi_y = np.array(chi_data[target])
# san_X = np.array(san_data.loc[:,san_data.columns != target])
# san_y = np.array(san_data[target])

# san_X_train, san_X_test, san_y_train, san_y_test = train_test_split(san_X, san_y, test_size=0.2, random_state=42)
# chi_X_train, chi_X_test, chi_y_train, chi_y_test = train_test_split(chi_X, chi_y, test_size=0.2, random_state=42)

In [25]:
print('len chi_X_train:', chi_X_train.shape, chi_X_train.shape[0] == chi_y_train.shape[0])
print('len chi_X_test:', chi_X_test.shape, chi_X_test.shape[0] == chi_y_test.shape[0])

print('len san_X_train:', san_X_train.shape, san_X_train.shape[0] == san_y_train.shape[0])
print('len san_X_test:', san_X_test.shape, san_X_test.shape[0] == san_y_test.shape[0])

len chi_X_train: (6304, 12) True
len chi_X_test: (3710, 12) True
len san_X_train: (7172, 12) True
len san_X_test: (1558, 12) True


replace nan

In [26]:
chi_train_mean = np.ma.array(chi_X_train, mask=np.isnan(chi_X_train)).mean(axis=0)
san_train_mean = np.ma.array(san_X_train, mask=np.isnan(san_X_train)).mean(axis=0)

chi_train_std = np.ma.array(chi_X_train, mask=np.isnan(chi_X_train)).std(axis=0)
san_train_std = np.ma.array(san_X_train, mask=np.isnan(san_X_train)).std(axis=0)

In [27]:
# Nan values handling: Noisy mean
for i in range(len(chi_X_test)):
    for j in range(len(chi_X_test[i])):
        if np.isnan(chi_X_test[i][j]):
            chi_X_test[i][j] = np.random.normal(chi_train_mean[j], chi_train_std[j])

for i in range(len(chi_X_train)):
    for j in range(len(chi_X_train[i])):
        if np.isnan(chi_X_train[i][j]):
            chi_X_train[i][j] = np.random.normal(chi_train_mean[j], chi_train_std[j])

In [28]:
# san_X_test = np.where(np.isnan(san_X_test), san_train_mean, san_X_test)
# san_X_train = np.where(np.isnan(san_X_train), san_train_mean, san_X_train)

# chi_X_test = np.where(np.isnan(chi_X_test), chi_train_mean, chi_X_test)
# chi_X_train = np.where(np.isnan(chi_X_train), chi_train_mean, chi_X_train)

scaling

In [29]:
scaler = MinMaxScaler().fit(chi_X_train)
chi_X_train = scaler.transform(chi_X_train)
chi_X_test = scaler.transform(chi_X_test)

scaler = MinMaxScaler().fit(san_X_train)
san_X_train = scaler.transform(san_X_train)
san_X_test = scaler.transform(san_X_test)

In [30]:
san_y_test = san_y_test.flatten()
san_y_train = san_y_train.flatten()
chi_y_train = chi_y_train.flatten()
chi_y_test = chi_y_test.flatten()

# Models

## Supervised

In [31]:
chi_regr_rf, chirfparam = get_RF_model(chi_X_train, chi_y_train)
model_RF_chi = stat_significance_trainer_tester(chi_regr_rf, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'RF')

Fitting 3 folds for each of 85 candidates, totalling 255 fits
[CV 1/3; 1/85] START bootstrap=False, max_depth=110, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1866
[CV 3/3; 1/85] START bootstrap=False, max_depth=110, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1866
[CV 2/3; 1/85] START bootstrap=False, max_depth=110, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1866
[CV 1/3; 2/85] START bootstrap=True, max_depth=67, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=1733
[CV 2/3; 2/85] START bootstrap=True, max_depth=67, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=1733
[CV 3/3; 2/85] START bootstrap=True, max_depth=67, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=1733
[CV 1/3; 3/85] START bootstrap=True, max_depth=10, max_features=log2, min_samples_leaf=2, min_samples_split=20, n_estimators=1066
[CV 2/3; 3/85] START bootstr

In [32]:
san_regr_rf, sanrfparam = get_RF_model(san_X_train, san_y_train)
model_RF_san = stat_significance_trainer_tester(san_regr_rf, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'RF')

Fitting 3 folds for each of 85 candidates, totalling 255 fits
[CV 1/3; 1/85] START bootstrap=False, max_depth=110, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1866
[CV 2/3; 1/85] START bootstrap=False, max_depth=110, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1866
[CV 3/3; 1/85] START bootstrap=False, max_depth=110, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1866
[CV 1/3; 2/85] START bootstrap=True, max_depth=67, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=1733
[CV 2/3; 2/85] START bootstrap=True, max_depth=67, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=1733
[CV 3/3; 2/85] START bootstrap=True, max_depth=67, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=1733
[CV 1/3; 3/85] START bootstrap=True, max_depth=10, max_features=log2, min_samples_leaf=2, min_samples_split=20, n_estimators=1066
[CV 2/3; 3/85] START bootstr



[CV 1/3; 10/85] END bootstrap=True, max_depth=67, max_features=None, min_samples_leaf=7, min_samples_split=20, n_estimators=400;, score=-0.663 total time=   9.5s
[CV 3/3; 10/85] START bootstrap=True, max_depth=67, max_features=None, min_samples_leaf=7, min_samples_split=20, n_estimators=400
[CV 2/3; 5/85] END bootstrap=False, max_depth=95, max_features=None, min_samples_leaf=7, min_samples_split=15, n_estimators=1600;, score=-0.695 total time=  54.1s
[CV 1/3; 11/85] START bootstrap=False, max_depth=31, max_features=None, min_samples_leaf=3, min_samples_split=10, n_estimators=1666
[CV 2/3; 9/85] END bootstrap=True, max_depth=31, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=1400;, score=-0.565 total time=  17.0s
[CV 1/3; 5/85] END bootstrap=False, max_depth=95, max_features=None, min_samples_leaf=7, min_samples_split=15, n_estimators=1600;, score=-0.804 total time=  57.5s
[CV 2/3; 11/85] START bootstrap=False, max_depth=31, max_features=None, min_samples_leaf=

In [33]:
chi_regr_xgb, chixgbparam = get_xgb_model(chi_X_train, chi_y_train)
model_xgb_chi = stat_significance_trainer_tester(chi_regr_xgb, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'XGB')

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.5, max_depth=1, min_child_weight=16, n_estimators=2, reg_alpha=1e-05, subsample=0.9; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.5, max_depth=1, min_child_weight=16, n_estimators=2, reg_alpha=1e-05, subsample=0.9; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.5, max_depth=1, min_child_weight=16, n_estimators=2, reg_alpha=1e-05, subsample=0.9; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.9, max_depth=30, min_child_weight=19, n_estimators=5, reg_alpha=1, subsample=0.6; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.9, max_depth=30, min_child_weight=19, n_estimators=5, reg_alpha=1, subsample=0.6; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.9, max_depth=30, min_child_weight=19, n_estimators=5, reg_alpha=1, subsample

In [34]:
san_regr_xgb, sanxgbparam = get_xgb_model(san_X_train, san_y_train)
model_xgb_san = stat_significance_trainer_tester(san_regr_xgb, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'XGB')

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.5, max_depth=1, min_child_weight=16, n_estimators=2, reg_alpha=1e-05, subsample=0.9; total time=   0.0s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.5, max_depth=1, min_child_weight=16, n_estimators=2, reg_alpha=1e-05, subsample=0.9; total time=   0.0s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.5, max_depth=1, min_child_weight=16, n_estimators=2, reg_alpha=1e-05, subsample=0.9; total time=   0.0s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.9, max_depth=30, min_child_weight=19, n_estimators=5, reg_alpha=1, subsample=0.6; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.9, max_depth=30, min_child_weight=19, n_estimators=5, reg_alpha=1, subsample=0.6; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.2, learning_rate=0.9, max_depth=30, min_child_weight=19, n_estimators=5, reg_alpha=1, subsample

In [35]:
chi_regr_MLP, chimlpparam = get_MLP_model(chi_X_train, chi_y_train)
model_MLP_chi = stat_significance_trainer_tester(chi_regr_MLP, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'MLP')

Fitting 3 folds for each of 96 candidates, totalling 288 fits
[CV 1/3; 1/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 1/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 1/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 2/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 2/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 2/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 3/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.



[CV 1/3; 5/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.353 total time=   5.2s
[CV 1/3; 10/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 8/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.283 total time=   1.8s
[CV 2/3; 10/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 2/3; 5/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.399 total time=   5.4s
[CV 3/3; 10/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 6/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.286 total time=   4.8s
[CV 1/3; 11/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 6/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.282 total time=   5.0s
[CV 2/3; 11/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 9/96] END alpha=0.0001, hidden_layer_sizes=(100,), learni



[CV 1/3; 17/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.472 total time=   7.6s
[CV 1/3; 23/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 21/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.375 total time=   4.1s
[CV 2/3; 23/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 17/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.419 total time=   7.7s
[CV 3/3; 23/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd




[CV 2/3; 21/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.301 total time=   4.0s
[CV 1/3; 24/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 22/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.364 total time=   3.2s
[CV 2/3; 24/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 2/3; 22/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.279 total time=   3.2s
[CV 3/3; 24/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 3/3; 22/96] END alpha=0.0001, hidden_layer_sizes=(80



[CV 3/3; 29/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.389 total time=   5.2s
[CV 2/3; 35/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 33/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.297 total time=   2.4s
[CV 3/3; 35/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 33/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.300 total time=   3.0s
[CV 1/3; 36/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 34/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_



[CV 2/3; 41/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.682 total time=   7.5s
[CV 2/3; 46/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 44/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.282 total time=   4.2s
[CV 3/3; 46/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 41/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.460 total time=   7.6s
[CV 1/3; 47/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd




[CV 2/3; 45/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.301 total time=   2.9s
[CV 2/3; 47/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 45/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.373 total time=   5.3s
[CV 3/3; 47/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 45/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.300 total time=   3.7s
[CV 1/3; 48/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam




[CV 2/3; 43/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.389 total time=   7.7s
[CV 2/3; 48/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 46/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.367 total time=   4.1s
[CV 3/3; 48/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 2/3; 46/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.284 total time=   4.8s
[CV 1/3; 49/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 48/96] END alpha=0.0001, hidden_layer_sizes=(80, 



[CV 1/3; 47/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.372 total time=   7.4s
[CV 1/3; 52/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 50/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.367 total time=   3.2s
[CV 2/3; 52/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 2/3; 51/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.297 total time=   2.1s
[CV 3/3; 52/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 2/3; 50/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=



[CV 3/3; 47/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.287 total time=   7.7s
[CV 2/3; 53/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 51/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.290 total time=   3.2s
[CV 3/3; 53/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 51/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.365 total time=   3.8s
[CV 1/3; 54/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 50/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_



[CV 1/3; 53/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.354 total time=   5.5s
[CV 1/3; 58/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 54/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.284 total time=   3.9s
[CV 2/3; 58/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 56/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.284 total time=   1.9s
[CV 3/3; 58/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 2/3; 53/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.392 total time=   5.3s
[CV 1/3; 59/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 53/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.416 total time=   5.1s
[CV 2/3; 59/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 57/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.302 total time=   1.5s
[CV 3/3; 59/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 57/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=a



[CV 1/3; 63/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.366 total time=   7.3s
[CV 3/3; 68/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 68/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.374 total time=   1.8s
[CV 1/3; 69/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 66/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.382 total time=   5.7s
[CV 2/3; 69/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 66/96] END alpha=5e-05, hidden_layer_sizes=(80, 2



[CV 3/3; 65/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.518 total time=   7.4s
[CV 3/3; 70/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 68/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.281 total time=   4.4s
[CV 1/3; 71/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 69/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.374 total time=   3.0s
[CV 2/3; 71/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 70/96] END alpha=5e-05, hidden_layer_sizes=(80, 20),



[CV 3/3; 69/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.295 total time=   5.5s
[CV 2/3; 73/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 72/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.378 total time=   2.8s
[CV 3/3; 73/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 73/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.304 total time=   1.7s
[CV 1/3; 74/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 73/96] END alpha=0.0005, hidden_layer_sizes=(100,), learni



[CV 1/3; 71/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.370 total time=   7.4s
[CV 3/3; 75/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd




[CV 2/3; 71/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.292 total time=   7.8s
[CV 1/3; 76/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 74/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.383 total time=   3.4s
[CV 2/3; 76/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 3/3; 71/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.287 total time=   7.8s
[CV 3/3; 76/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam




[CV 1/3; 75/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.361 total time=   2.9s
[CV 1/3; 77/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 74/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.284 total time=   3.6s
[CV 2/3; 77/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 75/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.300 total time=   3.0s
[CV 3/3; 77/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 76/96] END alpha=0.0005, hidden_layer_sizes=(100,), lear



[CV 1/3; 77/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.354 total time=   4.8s
[CV 3/3; 81/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd




[CV 3/3; 77/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.404 total time=   4.8s
[CV 1/3; 82/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 78/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.286 total time=   3.8s
[CV 2/3; 82/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 78/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.286 total time=   3.6s
[CV 3/3; 82/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 81/96] END alpha=0.0005, hidden_layer_sizes=(100,)



[CV 3/3; 89/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.362 total time=   3.7s
[CV 2/3; 91/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.330 total time=   0.3s
[CV 3/3; 91/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 92/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 2/3; 89/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.376 total time=   4.2s
[CV 2/3; 92/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam




[CV 3/3; 87/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.288 total time=   7.3s
[CV 3/3; 92/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 2/3; 90/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.291 total time=   4.4s
[CV 1/3; 93/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 90/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.282 total time=   3.3s
[CV 2/3; 93/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 92/96] END alpha=0.0005, hidden_layer_size



[CV 1/3; 89/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.348 total time=   7.3s
[CV 1/3; 94/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 92/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.290 total time=   2.8s
[CV 2/3; 94/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 92/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.284 total time=   3.1s
[CV 3/3; 94/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 91/96] END alpha=0.0005, hidden_layer_si



[CV 1/3; 95/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.363 total time=   6.4s




[CV 3/3; 95/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.283 total time=   6.0s
{'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'learning_rate_init': 0.0005, 'max_iter': 400, 'solver': 'adam'}
chi-chi 0.403 0.786 -0.209
chi-san 0.655 0.925 -0.186


In [36]:
san_regr_MLP, sanmlpparam = get_MLP_model(san_X_train, san_y_train)
model_MLP_san = stat_significance_trainer_tester(san_regr_MLP, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'MLP')

Fitting 3 folds for each of 96 candidates, totalling 288 fits
[CV 1/3; 1/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 1/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 1/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 2/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 2/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 2/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 3/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.



[CV 3/3; 5/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.771 total time=   2.1s
[CV 1/3; 7/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 4/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.654 total time=   4.1s
[CV 2/3; 7/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 4/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.689 total time=   4.2s
[CV 3/3; 7/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 7/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_ra



[CV 1/3; 6/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.692 total time=   6.1s
[CV 2/3; 10/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 8/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.605 total time=   4.2s
[CV 1/3; 9/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.674 total time=   3.6s
[CV 3/3; 10/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 11/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd




[CV 3/3; 6/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.652 total time=   6.2s
[CV 2/3; 11/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 9/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.592 total time=   3.8s
[CV 3/3; 11/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 8/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.669 total time=   5.2s
[CV 1/3; 12/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 11/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_



[CV 1/3; 10/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.692 total time=   6.0s
[CV 1/3; 14/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 10/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.613 total time=   5.3s
[CV 2/3; 14/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 12/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.686 total time=   5.4s
[CV 3/3; 14/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 12/96] END alpha=0.0001, hidden_layer_sizes=(100,)



[CV 1/3; 14/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.712 total time=   9.3s
[CV 3/3; 17/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 15/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.628 total time=   5.3s
[CV 1/3; 18/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 17/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.684 total time=   0.4s
[CV 2/3; 18/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 15/96] END alpha=0.0001, hidden_layer_s



[CV 1/3; 17/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.707 total time=   8.5s
[CV 2/3; 21/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 20/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.645 total time=   1.7s
[CV 3/3; 21/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 18/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.665 total time=   6.5s
[CV 1/3; 22/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 18/96] END alpha=0.0001, hidden_layer_siz



[CV 2/3; 18/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.609 total time=   8.7s
[CV 3/3; 22/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 20/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.713 total time=   5.9s
[CV 1/3; 23/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 21/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.683 total time=   5.0s
[CV 2/3; 23/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 20/96] END alpha=0.0001, hidden_layer_sizes=(



[CV 1/3; 26/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.687 total time=   5.4s
[CV 1/3; 29/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 27/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.587 total time=   4.1s
[CV 2/3; 29/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 27/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.626 total time=   2.8s
[CV 3/3; 29/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 2/3; 26/96] END alpha=0.0001, hidden_layer_sizes=(100,), lear



[CV 3/3; 26/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.650 total time=   6.4s
[CV 3/3; 30/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 28/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.693 total time=   4.0s
[CV 1/3; 31/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 28/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.599 total time=   3.9s
[CV 2/3; 31/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 31/96] END alpha=0.0001, hidden_layer_sizes=(100,), lea



[CV 1/3; 30/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.692 total time=   6.0s
[CV 1/3; 35/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 30/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.606 total time=   6.0s
[CV 2/3; 35/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 33/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.679 total time=   3.8s
[CV 3/3; 35/96] START alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 33/96] END alpha=0.0001, hidden_layer_sizes=(100,), learni



[CV 3/3; 34/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.656 total time=   6.3s
[CV 1/3; 39/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 37/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.677 total time=   4.6s
[CV 2/3; 39/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 36/96] END alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.651 total time=   5.2s
[CV 3/3; 39/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 38/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), l



[CV 1/3; 38/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.705 total time=   8.6s
[CV 3/3; 42/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 41/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.740 total time=   2.0s
[CV 1/3; 43/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 40/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.608 total time=   5.5s
[CV 2/3; 43/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 43/96] END alpha=0.0001, hidden_layer_siz



[CV 1/3; 41/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.687 total time=   8.7s
[CV 2/3; 45/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 42/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.655 total time=   6.2s
[CV 3/3; 45/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 42/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.703 total time=   8.4s
[CV 1/3; 46/96] START alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 44/96] END alpha=0.0001, hidden_layer_si



[CV 2/3; 46/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.611 total time=   9.1s
[CV 1/3; 50/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 3/3; 46/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.659 total time=   9.5s
[CV 2/3; 50/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 48/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.625 total time=   5.5s
[CV 3/3; 50/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 48/96] END alpha=0.0001, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.707 total time=   6.4s
[CV 1/3; 51/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 49/96] END alpha=5e-05, hidden_layer_sizes=(100,), learn



[CV 2/3; 50/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.604 total time=   6.1s
[CV 2/3; 54/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 52/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.680 total time=   4.1s
[CV 3/3; 54/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 3/3; 53/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.745 total time=   2.3s
[CV 1/3; 55/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 53/96] END alpha=5e-05, hidden_layer_sizes=(100,), learni



[CV 3/3; 56/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.657 total time=   6.0s
[CV 2/3; 60/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 59/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.675 total time=   3.4s
[CV 3/3; 60/96] START alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 58/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.688 total time=   6.1s
[CV 1/3; 61/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 59/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rat



[CV 2/3; 58/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.604 total time=   6.6s
[CV 1/3; 62/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 60/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.698 total time=   4.4s
[CV 2/3; 62/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 3/3; 58/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.647 total time=   6.4s
[CV 3/3; 62/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 60/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.614 total time=   4.7s
[CV 1/3; 63/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 60/96] END alpha=5e-05, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.662 total time=   3.5s
[CV 2/3; 63/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 61/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning



[CV 2/3; 62/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.614 total time=   9.4s
[CV 3/3; 66/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 64/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.636 total time=   6.7s
[CV 1/3; 67/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 64/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.677 total time=   5.6s
[CV 2/3; 67/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 67/96] END alpha=5e-05, hidden_layer_sizes=(80, 2



[CV 1/3; 65/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-1.080 total time=   8.5s
[CV 1/3; 69/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 66/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.658 total time=   6.1s
[CV 2/3; 69/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 1/3; 66/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.712 total time=   8.9s
[CV 3/3; 69/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd




[CV 2/3; 66/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.616 total time=   9.2s
[CV 1/3; 70/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 3/3; 67/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.750 total time=   8.6s
[CV 2/3; 70/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 1/3; 68/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.726 total time=   9.5s
[CV 3/3; 70/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 69/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.684 total time=   6.2s
[CV 1/3; 71/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 68/96] END alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.688 total time=   7.3s
[CV 2/3; 71/96] START alpha=5e-05, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 68/96] END alpha=5e-05, hidden_layer_sizes=(80, 20),



[CV 1/3; 77/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.722 total time=   5.8s
[CV 3/3; 81/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd
[CV 3/3; 77/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.836 total time=   5.8s
[CV 1/3; 82/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 80/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.608 total time=   3.4s
[CV 2/3; 82/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 1/3; 78/96] END alpha=0.0005, hidden_layer_sizes=(100,), l



[CV 1/3; 81/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=sgd;, score=-0.682 total time=   3.4s
[CV 1/3; 83/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 80/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.656 total time=   3.8s
[CV 2/3; 83/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 80/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.708 total time=   5.2s
[CV 3/3; 83/96] START alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 81/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning



[CV 3/3; 82/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.663 total time=   6.1s
[CV 3/3; 86/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam
[CV 2/3; 84/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.606 total time=   3.0s
[CV 1/3; 87/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 84/96] END alpha=0.0005, hidden_layer_sizes=(100,), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.651 total time=   3.1s
[CV 2/3; 87/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 2/3; 85/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), l



[CV 2/3; 86/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.606 total time=   9.2s
[CV 3/3; 90/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.0005, max_iter=400, solver=adam




[CV 3/3; 86/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.670 total time=   9.3s
[CV 1/3; 91/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 3/3; 88/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=constant, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.678 total time=   5.9s
[CV 2/3; 91/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 91/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.693 total time=   0.5s
[CV 3/3; 91/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=sgd
[CV 1/3; 88/96] END alpha=0.0005, hidden_layer_sizes=



[CV 2/3; 92/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=invscaling, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.629 total time=   9.2s
[CV 2/3; 96/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 1/3; 94/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.714 total time=   6.4s
[CV 3/3; 96/96] START alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam
[CV 3/3; 95/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=sgd;, score=-0.629 total time=   4.4s
[CV 1/3; 96/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.705 total time=   4.6s
[CV 1/3; 95/96] END alph



[CV 2/3; 94/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.0005, max_iter=400, solver=adam;, score=-0.625 total time=   8.6s
[CV 3/3; 96/96] END alpha=0.0005, hidden_layer_sizes=(80, 20), learning_rate=adaptive, learning_rate_init=0.001, max_iter=400, solver=adam;, score=-0.712 total time=   5.5s
{'alpha': 0.0005, 'hidden_layer_sizes': (80, 20), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_iter': 400, 'solver': 'sgd'}
san-san 0.579 0.817 0.075
san-chi 0.543 1.059 -1.198


In [37]:
chi_regr_TN, chiTNparam = get_TN_model(chi_X_train, chi_y_train), ''
model_TN_chi = stat_significance_trainer_tester(chi_regr_TN, chi_X_train, chi_y_train.reshape(-1, 1), chi_X_test, chi_y_test.reshape(-1, 1), san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'TN')



epoch 0  | loss: 5.94476 |  0:00:00s
epoch 1  | loss: 4.09619 |  0:00:01s
epoch 2  | loss: 2.8284  |  0:00:01s
epoch 3  | loss: 2.05948 |  0:00:01s
epoch 4  | loss: 1.66461 |  0:00:01s
epoch 5  | loss: 1.27581 |  0:00:02s
epoch 6  | loss: 1.01555 |  0:00:02s
epoch 7  | loss: 0.82439 |  0:00:02s
epoch 8  | loss: 0.73387 |  0:00:02s
epoch 9  | loss: 0.6186  |  0:00:02s
epoch 10 | loss: 0.58606 |  0:00:03s
epoch 11 | loss: 0.5413  |  0:00:03s
epoch 12 | loss: 0.50778 |  0:00:03s
epoch 13 | loss: 0.50284 |  0:00:03s
epoch 14 | loss: 0.49446 |  0:00:04s
epoch 15 | loss: 0.47436 |  0:00:04s
epoch 16 | loss: 0.46972 |  0:00:04s
epoch 17 | loss: 0.44877 |  0:00:04s
epoch 18 | loss: 0.46114 |  0:00:05s
epoch 19 | loss: 0.45182 |  0:00:05s
epoch 20 | loss: 0.43537 |  0:00:05s
epoch 21 | loss: 0.42363 |  0:00:05s
epoch 22 | loss: 0.44037 |  0:00:06s
epoch 23 | loss: 0.42425 |  0:00:06s
epoch 24 | loss: 0.43496 |  0:00:06s
epoch 25 | loss: 0.43201 |  0:00:06s
epoch 26 | loss: 0.43556 |  0:00:07s
e



epoch 0  | loss: 5.98582 |  0:00:00s
epoch 1  | loss: 4.00767 |  0:00:00s
epoch 2  | loss: 2.89244 |  0:00:00s
epoch 3  | loss: 2.06698 |  0:00:01s
epoch 4  | loss: 1.61531 |  0:00:01s
epoch 5  | loss: 1.22932 |  0:00:01s
epoch 6  | loss: 1.04901 |  0:00:01s
epoch 7  | loss: 0.88274 |  0:00:02s
epoch 8  | loss: 0.78833 |  0:00:02s
epoch 9  | loss: 0.70955 |  0:00:02s
epoch 10 | loss: 0.63296 |  0:00:02s
epoch 11 | loss: 0.61643 |  0:00:03s
epoch 12 | loss: 0.54975 |  0:00:03s
epoch 13 | loss: 0.54981 |  0:00:03s
epoch 14 | loss: 0.52197 |  0:00:03s
epoch 15 | loss: 0.50906 |  0:00:03s
epoch 16 | loss: 0.47478 |  0:00:04s
epoch 17 | loss: 0.48943 |  0:00:04s
epoch 18 | loss: 0.46609 |  0:00:04s
epoch 19 | loss: 0.47356 |  0:00:04s
epoch 20 | loss: 0.47543 |  0:00:05s
epoch 21 | loss: 0.45797 |  0:00:05s
epoch 22 | loss: 0.47669 |  0:00:05s
epoch 23 | loss: 0.45253 |  0:00:05s
epoch 24 | loss: 0.45798 |  0:00:06s
epoch 25 | loss: 0.46165 |  0:00:06s
epoch 26 | loss: 0.46545 |  0:00:06s
e



epoch 0  | loss: 5.90008 |  0:00:00s
epoch 1  | loss: 4.08427 |  0:00:00s
epoch 2  | loss: 2.87895 |  0:00:00s
epoch 3  | loss: 2.11023 |  0:00:00s
epoch 4  | loss: 1.63187 |  0:00:01s
epoch 5  | loss: 1.25157 |  0:00:01s
epoch 6  | loss: 1.05171 |  0:00:01s
epoch 7  | loss: 0.83798 |  0:00:01s
epoch 8  | loss: 0.74696 |  0:00:02s
epoch 9  | loss: 0.68571 |  0:00:02s
epoch 10 | loss: 0.63712 |  0:00:02s
epoch 11 | loss: 0.59908 |  0:00:02s
epoch 12 | loss: 0.55955 |  0:00:03s
epoch 13 | loss: 0.52872 |  0:00:03s
epoch 14 | loss: 0.52558 |  0:00:03s
epoch 15 | loss: 0.48209 |  0:00:03s
epoch 16 | loss: 0.48253 |  0:00:03s
epoch 17 | loss: 0.47659 |  0:00:04s
epoch 18 | loss: 0.46427 |  0:00:04s
epoch 19 | loss: 0.45984 |  0:00:04s
epoch 20 | loss: 0.4543  |  0:00:04s
epoch 21 | loss: 0.44914 |  0:00:05s
epoch 22 | loss: 0.44965 |  0:00:05s
epoch 23 | loss: 0.44592 |  0:00:05s
epoch 24 | loss: 0.43721 |  0:00:05s
epoch 25 | loss: 0.43724 |  0:00:06s
epoch 26 | loss: 0.44222 |  0:00:06s
e



epoch 0  | loss: 5.93936 |  0:00:00s
epoch 1  | loss: 4.07084 |  0:00:00s
epoch 2  | loss: 2.86678 |  0:00:00s
epoch 3  | loss: 2.08062 |  0:00:00s
epoch 4  | loss: 1.62119 |  0:00:01s
epoch 5  | loss: 1.29797 |  0:00:01s
epoch 6  | loss: 1.04128 |  0:00:01s
epoch 7  | loss: 0.90898 |  0:00:01s
epoch 8  | loss: 0.77138 |  0:00:02s
epoch 9  | loss: 0.69401 |  0:00:02s
epoch 10 | loss: 0.62121 |  0:00:02s
epoch 11 | loss: 0.58002 |  0:00:02s
epoch 12 | loss: 0.55566 |  0:00:03s
epoch 13 | loss: 0.54121 |  0:00:03s
epoch 14 | loss: 0.51259 |  0:00:03s
epoch 15 | loss: 0.51293 |  0:00:03s
epoch 16 | loss: 0.50228 |  0:00:04s
epoch 17 | loss: 0.49034 |  0:00:04s
epoch 18 | loss: 0.47341 |  0:00:04s
epoch 19 | loss: 0.47722 |  0:00:04s
epoch 20 | loss: 0.45611 |  0:00:05s
epoch 21 | loss: 0.45472 |  0:00:05s
epoch 22 | loss: 0.46541 |  0:00:05s
epoch 23 | loss: 0.45324 |  0:00:05s
epoch 24 | loss: 0.45703 |  0:00:05s
epoch 25 | loss: 0.44901 |  0:00:06s
epoch 26 | loss: 0.44274 |  0:00:06s
e



epoch 0  | loss: 5.93936 |  0:00:00s
epoch 1  | loss: 4.07084 |  0:00:00s
epoch 2  | loss: 2.86678 |  0:00:00s
epoch 3  | loss: 2.08062 |  0:00:00s
epoch 4  | loss: 1.62119 |  0:00:01s
epoch 5  | loss: 1.29797 |  0:00:01s
epoch 6  | loss: 1.04128 |  0:00:01s
epoch 7  | loss: 0.90898 |  0:00:01s
epoch 8  | loss: 0.77138 |  0:00:02s
epoch 9  | loss: 0.69401 |  0:00:02s
epoch 10 | loss: 0.62121 |  0:00:02s
epoch 11 | loss: 0.58002 |  0:00:02s
epoch 12 | loss: 0.55566 |  0:00:03s
epoch 13 | loss: 0.54121 |  0:00:03s
epoch 14 | loss: 0.51259 |  0:00:03s
epoch 15 | loss: 0.51293 |  0:00:03s
epoch 16 | loss: 0.50228 |  0:00:04s
epoch 17 | loss: 0.49034 |  0:00:04s
epoch 18 | loss: 0.47341 |  0:00:04s
epoch 19 | loss: 0.47722 |  0:00:04s
epoch 20 | loss: 0.45611 |  0:00:05s
epoch 21 | loss: 0.45472 |  0:00:05s
epoch 22 | loss: 0.46541 |  0:00:05s
epoch 23 | loss: 0.45324 |  0:00:05s
epoch 24 | loss: 0.45703 |  0:00:05s
epoch 25 | loss: 0.44901 |  0:00:06s
epoch 26 | loss: 0.44274 |  0:00:06s
e

In [38]:
chi_regr_TN, sanTNparam = get_TN_model(san_X_train, san_y_train), ''
model_RF_san = stat_significance_trainer_tester(chi_regr_TN, san_X_train, san_y_train.reshape(-1, 1), san_X_test, san_y_test.reshape(-1, 1), chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'TN')



epoch 0  | loss: 4.05426 |  0:00:00s
epoch 1  | loss: 2.64254 |  0:00:00s
epoch 2  | loss: 1.89943 |  0:00:00s
epoch 3  | loss: 1.52869 |  0:00:01s
epoch 4  | loss: 1.27723 |  0:00:01s
epoch 5  | loss: 1.13047 |  0:00:01s
epoch 6  | loss: 1.02622 |  0:00:02s
epoch 7  | loss: 0.91531 |  0:00:02s
epoch 8  | loss: 0.88081 |  0:00:02s
epoch 9  | loss: 0.85665 |  0:00:03s
epoch 10 | loss: 0.82162 |  0:00:03s
epoch 11 | loss: 0.79366 |  0:00:03s
epoch 12 | loss: 0.80053 |  0:00:04s
epoch 13 | loss: 0.78459 |  0:00:04s
epoch 14 | loss: 0.76369 |  0:00:04s
epoch 15 | loss: 0.77311 |  0:00:04s
epoch 16 | loss: 0.76238 |  0:00:05s
epoch 17 | loss: 0.75816 |  0:00:05s
epoch 18 | loss: 0.75007 |  0:00:05s
epoch 19 | loss: 0.74963 |  0:00:06s
epoch 20 | loss: 0.73327 |  0:00:06s
epoch 21 | loss: 0.74283 |  0:00:06s
epoch 22 | loss: 0.72857 |  0:00:06s
epoch 23 | loss: 0.72614 |  0:00:07s
epoch 24 | loss: 0.73647 |  0:00:07s
epoch 25 | loss: 0.72092 |  0:00:07s
epoch 26 | loss: 0.7275  |  0:00:08s
e



epoch 0  | loss: 4.05066 |  0:00:00s
epoch 1  | loss: 2.63848 |  0:00:00s
epoch 2  | loss: 1.90819 |  0:00:01s
epoch 3  | loss: 1.45855 |  0:00:01s
epoch 4  | loss: 1.28808 |  0:00:01s
epoch 5  | loss: 1.13252 |  0:00:01s
epoch 6  | loss: 1.02562 |  0:00:02s
epoch 7  | loss: 0.93617 |  0:00:02s
epoch 8  | loss: 0.87573 |  0:00:02s
epoch 9  | loss: 0.83479 |  0:00:03s
epoch 10 | loss: 0.80088 |  0:00:03s
epoch 11 | loss: 0.79878 |  0:00:03s
epoch 12 | loss: 0.77821 |  0:00:04s
epoch 13 | loss: 0.75991 |  0:00:04s
epoch 14 | loss: 0.75542 |  0:00:04s
epoch 15 | loss: 0.73976 |  0:00:04s
epoch 16 | loss: 0.75957 |  0:00:05s
epoch 17 | loss: 0.74478 |  0:00:05s
epoch 18 | loss: 0.73236 |  0:00:05s
epoch 19 | loss: 0.73665 |  0:00:06s
epoch 20 | loss: 0.73621 |  0:00:06s
epoch 21 | loss: 0.72369 |  0:00:06s
epoch 22 | loss: 0.71575 |  0:00:06s
epoch 23 | loss: 0.72225 |  0:00:07s
epoch 24 | loss: 0.72489 |  0:00:07s
epoch 25 | loss: 0.70616 |  0:00:07s
epoch 26 | loss: 0.71107 |  0:00:08s
e



epoch 0  | loss: 3.88211 |  0:00:00s
epoch 1  | loss: 2.64194 |  0:00:00s
epoch 2  | loss: 1.87439 |  0:00:00s
epoch 3  | loss: 1.48282 |  0:00:01s
epoch 4  | loss: 1.25459 |  0:00:01s
epoch 5  | loss: 1.07087 |  0:00:01s
epoch 6  | loss: 0.98779 |  0:00:02s
epoch 7  | loss: 0.88302 |  0:00:02s
epoch 8  | loss: 0.86058 |  0:00:02s
epoch 9  | loss: 0.81847 |  0:00:02s
epoch 10 | loss: 0.79034 |  0:00:03s
epoch 11 | loss: 0.78454 |  0:00:03s
epoch 12 | loss: 0.76771 |  0:00:03s
epoch 13 | loss: 0.75812 |  0:00:04s
epoch 14 | loss: 0.7524  |  0:00:04s
epoch 15 | loss: 0.7534  |  0:00:04s
epoch 16 | loss: 0.74582 |  0:00:04s
epoch 17 | loss: 0.72958 |  0:00:05s
epoch 18 | loss: 0.72664 |  0:00:05s
epoch 19 | loss: 0.72822 |  0:00:05s
epoch 20 | loss: 0.71135 |  0:00:06s
epoch 21 | loss: 0.71029 |  0:00:06s
epoch 22 | loss: 0.71263 |  0:00:06s
epoch 23 | loss: 0.71355 |  0:00:07s
epoch 24 | loss: 0.70505 |  0:00:07s
epoch 25 | loss: 0.71044 |  0:00:07s
epoch 26 | loss: 0.70953 |  0:00:07s
e



epoch 0  | loss: 3.91025 |  0:00:00s
epoch 1  | loss: 2.42432 |  0:00:00s
epoch 2  | loss: 1.81959 |  0:00:00s
epoch 3  | loss: 1.4265  |  0:00:01s
epoch 4  | loss: 1.23338 |  0:00:01s
epoch 5  | loss: 1.07206 |  0:00:01s
epoch 6  | loss: 0.96528 |  0:00:02s
epoch 7  | loss: 0.89206 |  0:00:02s
epoch 8  | loss: 0.84175 |  0:00:02s
epoch 9  | loss: 0.82081 |  0:00:02s
epoch 10 | loss: 0.80909 |  0:00:03s
epoch 11 | loss: 0.80392 |  0:00:03s
epoch 12 | loss: 0.78965 |  0:00:03s
epoch 13 | loss: 0.76641 |  0:00:04s
epoch 14 | loss: 0.76345 |  0:00:04s
epoch 15 | loss: 0.75807 |  0:00:04s
epoch 16 | loss: 0.75929 |  0:00:04s
epoch 17 | loss: 0.75083 |  0:00:05s
epoch 18 | loss: 0.75339 |  0:00:05s
epoch 19 | loss: 0.74659 |  0:00:05s
epoch 20 | loss: 0.73309 |  0:00:06s
epoch 21 | loss: 0.74618 |  0:00:06s
epoch 22 | loss: 0.75344 |  0:00:06s
epoch 23 | loss: 0.72799 |  0:00:07s
epoch 24 | loss: 0.73861 |  0:00:07s
epoch 25 | loss: 0.71691 |  0:00:07s
epoch 26 | loss: 0.73868 |  0:00:07s
e



epoch 0  | loss: 3.91025 |  0:00:00s
epoch 1  | loss: 2.42432 |  0:00:00s
epoch 2  | loss: 1.81959 |  0:00:01s
epoch 3  | loss: 1.4265  |  0:00:01s
epoch 4  | loss: 1.23338 |  0:00:01s
epoch 5  | loss: 1.07206 |  0:00:01s
epoch 6  | loss: 0.96528 |  0:00:02s
epoch 7  | loss: 0.89206 |  0:00:02s
epoch 8  | loss: 0.84175 |  0:00:02s
epoch 9  | loss: 0.82081 |  0:00:03s
epoch 10 | loss: 0.80909 |  0:00:03s
epoch 11 | loss: 0.80392 |  0:00:03s
epoch 12 | loss: 0.78965 |  0:00:03s
epoch 13 | loss: 0.76641 |  0:00:04s
epoch 14 | loss: 0.76345 |  0:00:04s
epoch 15 | loss: 0.75807 |  0:00:04s
epoch 16 | loss: 0.75929 |  0:00:05s
epoch 17 | loss: 0.75083 |  0:00:05s
epoch 18 | loss: 0.75339 |  0:00:05s
epoch 19 | loss: 0.74659 |  0:00:05s
epoch 20 | loss: 0.73309 |  0:00:06s
epoch 21 | loss: 0.74618 |  0:00:06s
epoch 22 | loss: 0.75344 |  0:00:06s
epoch 23 | loss: 0.72799 |  0:00:07s
epoch 24 | loss: 0.73861 |  0:00:07s
epoch 25 | loss: 0.71691 |  0:00:07s
epoch 26 | loss: 0.73868 |  0:00:08s
e

## Domain Adaptation

In [39]:
from adapt.feature_based import FA
from adapt.instance_based import BalancedWeighting
from adapt.feature_based import CORAL
from adapt.feature_based import SA

2024-09-08 20:42:48.835196: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [40]:
# targ_labeled_X_san, targ_labeled_y_san = balanced_subset(lax, bur, 100)
# targ_labeled_X_chi, targ_labeled_y_chi = balanced_subset(ohr, mid, 100)

__, targ_labeled_X_san, __, targ_labeled_y_san = train_test_split(san_X_train, san_y_train, test_size = 0.1196)
__, targ_labeled_X_chi, __, targ_labeled_y_chi = train_test_split(chi_X_train, chi_y_train, test_size = 0.1364)

In [41]:
targ_labeled_X_san.shape, targ_labeled_X_chi.shape

((858, 12), (860, 12))

## FA

RF

In [42]:
crf = get_RF(chirfparam)
chi_rf_fa = FA(crf, Xt=targ_labeled_X_san, yt=targ_labeled_y_san, random_state=0, verbose = 0)
chi_rf_fa = stat_significance_trainer_tester(chi_rf_fa, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'RF-FA')

srf = get_RF(sanrfparam)
san_rf_fa = FA(srf, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi, random_state=0, verbose = 0)
san_rf_fa = stat_significance_trainer_tester(san_rf_fa, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'RF-FA')

chi-chi 0.489 0.953 -0.775
chi-san 0.582 0.821 0.068
san-san 0.653 0.922 -0.177
san-chi 0.374 0.730 -0.042


xgb

In [43]:
cxgb = get_xgb(chixgbparam)
chi_xgb_fa = FA(cxgb, Xt=targ_labeled_X_san, yt=targ_labeled_y_san, random_state=0, verbose = 0)
chi_xgb_fa = stat_significance_trainer_tester(chi_xgb_fa, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'XGB-FA')

sxgb = get_xgb(sanxgbparam)
san_xgb_fa = FA(sxgb, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi, random_state=0, verbose = 0)
san_xgb_fa = stat_significance_trainer_tester(san_xgb_fa, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'XGB-FA')

chi-chi 0.556 1.083 -1.294
chi-san 0.605 0.854 -0.010
san-san 0.614 0.866 -0.039
san-chi 0.363 0.708 0.020


MLP

In [44]:
clf = get_MLP(chimlpparam)
chi_MLP_fa = FA(clf, Xt=targ_labeled_X_san, yt=targ_labeled_y_san, random_state=42, verbose = 0)
chi_MLP_fa = stat_significance_trainer_tester(chi_MLP_fa, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'MLP-FA')

slf = get_MLP(chimlpparam)
san_MLP_fa = FA(slf, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi, random_state=42, verbose = 0)
san_MLP_fa = stat_significance_trainer_tester(san_MLP_fa, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'MLP-FA')



chi-chi 0.841 1.640 -4.281
chi-san 0.585 0.826 0.056




san-san 0.656 0.926 -0.187
san-chi 0.387 0.754 -0.113




TabNet

In [45]:
clf = get_TN(chiTNparam)
chi_TN_fa = FA(clf, Xt=targ_labeled_X_san, yt=targ_labeled_y_san.reshape(-1, 1), random_state=42, verbose = 0)
chi_TN_fa = stat_significance_trainer_tester(chi_TN_fa, chi_X_train, chi_y_train.reshape(-1, 1), chi_X_test, chi_y_test.reshape(-1, 1), san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'TN-FA')

slf = get_TN(chiTNparam)
san_TN_fa = FA(slf, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi.reshape(-1, 1), random_state=42, verbose = 0)
san_TN_fa = stat_significance_trainer_tester(san_TN_fa, san_X_train, san_y_train.reshape(-1, 1), san_X_test, san_y_test.reshape(-1, 1), chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'TN-FA')



epoch 0  | loss: 3.90713 |  0:00:00s
epoch 1  | loss: 3.019   |  0:00:00s
epoch 2  | loss: 2.37859 |  0:00:00s
epoch 3  | loss: 1.92529 |  0:00:01s
epoch 4  | loss: 1.56108 |  0:00:01s
epoch 5  | loss: 1.26811 |  0:00:01s
epoch 6  | loss: 1.02627 |  0:00:02s
epoch 7  | loss: 0.89692 |  0:00:02s
epoch 8  | loss: 0.85544 |  0:00:02s
epoch 9  | loss: 0.75294 |  0:00:03s
epoch 10 | loss: 0.67893 |  0:00:03s
epoch 11 | loss: 0.67042 |  0:00:03s
epoch 12 | loss: 0.62879 |  0:00:03s
epoch 13 | loss: 0.60108 |  0:00:04s
epoch 14 | loss: 0.5737  |  0:00:04s
epoch 15 | loss: 0.58231 |  0:00:04s
epoch 16 | loss: 0.55416 |  0:00:05s
epoch 17 | loss: 0.54877 |  0:00:05s
epoch 18 | loss: 0.55417 |  0:00:05s
epoch 19 | loss: 0.52956 |  0:00:05s
epoch 20 | loss: 0.52453 |  0:00:06s
epoch 21 | loss: 0.52097 |  0:00:06s
epoch 22 | loss: 0.53468 |  0:00:06s
epoch 23 | loss: 0.51694 |  0:00:07s
epoch 24 | loss: 0.5307  |  0:00:07s
epoch 25 | loss: 0.52292 |  0:00:07s
epoch 26 | loss: 0.52312 |  0:00:08s
e



epoch 0  | loss: 4.02543 |  0:00:00s
epoch 1  | loss: 2.99319 |  0:00:00s
epoch 2  | loss: 2.36296 |  0:00:00s
epoch 3  | loss: 1.82869 |  0:00:01s
epoch 4  | loss: 1.42032 |  0:00:01s
epoch 5  | loss: 1.18722 |  0:00:01s
epoch 6  | loss: 0.96338 |  0:00:02s
epoch 7  | loss: 0.89724 |  0:00:02s
epoch 8  | loss: 0.80382 |  0:00:02s
epoch 9  | loss: 0.74004 |  0:00:03s
epoch 10 | loss: 0.72337 |  0:00:03s
epoch 11 | loss: 0.70436 |  0:00:03s
epoch 12 | loss: 0.68872 |  0:00:03s
epoch 13 | loss: 0.663   |  0:00:04s
epoch 14 | loss: 0.63833 |  0:00:04s
epoch 15 | loss: 0.60035 |  0:00:04s
epoch 16 | loss: 0.60716 |  0:00:05s
epoch 17 | loss: 0.58341 |  0:00:05s
epoch 18 | loss: 0.5687  |  0:00:05s
epoch 19 | loss: 0.57592 |  0:00:05s
epoch 20 | loss: 0.55391 |  0:00:06s
epoch 21 | loss: 0.54722 |  0:00:06s
epoch 22 | loss: 0.54575 |  0:00:07s
epoch 23 | loss: 0.53957 |  0:00:07s
epoch 24 | loss: 0.53193 |  0:00:07s
epoch 25 | loss: 0.5189  |  0:00:08s
epoch 26 | loss: 0.51188 |  0:00:08s
e



epoch 0  | loss: 3.99722 |  0:00:00s
epoch 1  | loss: 3.22214 |  0:00:00s
epoch 2  | loss: 2.67945 |  0:00:00s
epoch 3  | loss: 2.0539  |  0:00:01s
epoch 4  | loss: 1.55622 |  0:00:01s
epoch 5  | loss: 1.3117  |  0:00:01s
epoch 6  | loss: 1.06388 |  0:00:02s
epoch 7  | loss: 0.95302 |  0:00:02s
epoch 8  | loss: 0.84369 |  0:00:02s
epoch 9  | loss: 0.76863 |  0:00:02s
epoch 10 | loss: 0.69588 |  0:00:03s
epoch 11 | loss: 0.68581 |  0:00:03s
epoch 12 | loss: 0.66409 |  0:00:03s
epoch 13 | loss: 0.66106 |  0:00:04s
epoch 14 | loss: 0.64206 |  0:00:04s
epoch 15 | loss: 0.60408 |  0:00:04s
epoch 16 | loss: 0.58802 |  0:00:05s
epoch 17 | loss: 0.57254 |  0:00:05s
epoch 18 | loss: 0.5833  |  0:00:05s
epoch 19 | loss: 0.56487 |  0:00:05s
epoch 20 | loss: 0.56212 |  0:00:06s
epoch 21 | loss: 0.54673 |  0:00:06s
epoch 22 | loss: 0.55558 |  0:00:06s
epoch 23 | loss: 0.54526 |  0:00:07s
epoch 24 | loss: 0.54633 |  0:00:07s
epoch 25 | loss: 0.53699 |  0:00:07s
epoch 26 | loss: 0.52762 |  0:00:08s
e



epoch 0  | loss: 3.86538 |  0:00:00s
epoch 1  | loss: 3.12103 |  0:00:00s
epoch 2  | loss: 2.5634  |  0:00:00s
epoch 3  | loss: 2.05353 |  0:00:01s
epoch 4  | loss: 1.63656 |  0:00:01s
epoch 5  | loss: 1.30573 |  0:00:01s
epoch 6  | loss: 1.071   |  0:00:02s
epoch 7  | loss: 0.97915 |  0:00:02s
epoch 8  | loss: 0.86157 |  0:00:02s
epoch 9  | loss: 0.75617 |  0:00:03s
epoch 10 | loss: 0.70428 |  0:00:03s
epoch 11 | loss: 0.66071 |  0:00:03s
epoch 12 | loss: 0.66669 |  0:00:03s
epoch 13 | loss: 0.61812 |  0:00:04s
epoch 14 | loss: 0.57562 |  0:00:04s
epoch 15 | loss: 0.56284 |  0:00:04s
epoch 16 | loss: 0.5605  |  0:00:05s
epoch 17 | loss: 0.54067 |  0:00:05s
epoch 18 | loss: 0.54745 |  0:00:05s
epoch 19 | loss: 0.54326 |  0:00:05s
epoch 20 | loss: 0.52046 |  0:00:06s
epoch 21 | loss: 0.53348 |  0:00:06s
epoch 22 | loss: 0.52168 |  0:00:06s
epoch 23 | loss: 0.50638 |  0:00:07s
epoch 24 | loss: 0.51242 |  0:00:07s
epoch 25 | loss: 0.49424 |  0:00:07s
epoch 26 | loss: 0.48362 |  0:00:08s
e



epoch 0  | loss: 3.86538 |  0:00:00s
epoch 1  | loss: 3.12103 |  0:00:00s
epoch 2  | loss: 2.5634  |  0:00:00s
epoch 3  | loss: 2.05353 |  0:00:01s
epoch 4  | loss: 1.63656 |  0:00:01s
epoch 5  | loss: 1.30573 |  0:00:01s
epoch 6  | loss: 1.071   |  0:00:02s
epoch 7  | loss: 0.97915 |  0:00:02s
epoch 8  | loss: 0.86157 |  0:00:02s
epoch 9  | loss: 0.75617 |  0:00:02s
epoch 10 | loss: 0.70428 |  0:00:03s
epoch 11 | loss: 0.66071 |  0:00:03s
epoch 12 | loss: 0.66669 |  0:00:03s
epoch 13 | loss: 0.61812 |  0:00:04s
epoch 14 | loss: 0.57562 |  0:00:04s
epoch 15 | loss: 0.56284 |  0:00:04s
epoch 16 | loss: 0.5605  |  0:00:05s
epoch 17 | loss: 0.54067 |  0:00:05s
epoch 18 | loss: 0.54745 |  0:00:05s
epoch 19 | loss: 0.54326 |  0:00:05s
epoch 20 | loss: 0.52046 |  0:00:06s
epoch 21 | loss: 0.53348 |  0:00:06s
epoch 22 | loss: 0.52168 |  0:00:06s
epoch 23 | loss: 0.50638 |  0:00:07s
epoch 24 | loss: 0.51242 |  0:00:07s
epoch 25 | loss: 0.49424 |  0:00:07s
epoch 26 | loss: 0.48362 |  0:00:08s
e



epoch 0  | loss: 2.24084 |  0:00:00s
epoch 1  | loss: 1.83649 |  0:00:00s
epoch 2  | loss: 1.38158 |  0:00:01s
epoch 3  | loss: 1.18797 |  0:00:01s
epoch 4  | loss: 1.02754 |  0:00:01s
epoch 5  | loss: 0.96445 |  0:00:02s
epoch 6  | loss: 0.92778 |  0:00:02s
epoch 7  | loss: 0.89368 |  0:00:02s
epoch 8  | loss: 0.8511  |  0:00:03s
epoch 9  | loss: 0.82869 |  0:00:03s
epoch 10 | loss: 0.80269 |  0:00:03s
epoch 11 | loss: 0.79978 |  0:00:04s
epoch 12 | loss: 0.7761  |  0:00:04s
epoch 13 | loss: 0.76128 |  0:00:05s
epoch 14 | loss: 0.74803 |  0:00:05s
epoch 15 | loss: 0.75622 |  0:00:05s
epoch 16 | loss: 0.74345 |  0:00:06s
epoch 17 | loss: 0.72462 |  0:00:06s
epoch 18 | loss: 0.73322 |  0:00:06s
epoch 19 | loss: 0.719   |  0:00:07s
epoch 20 | loss: 0.7184  |  0:00:07s
epoch 21 | loss: 0.71084 |  0:00:07s
epoch 22 | loss: 0.7158  |  0:00:08s
epoch 23 | loss: 0.69919 |  0:00:08s
epoch 24 | loss: 0.70331 |  0:00:08s
epoch 25 | loss: 0.69106 |  0:00:09s
epoch 26 | loss: 0.70087 |  0:00:09s
e



epoch 0  | loss: 2.30082 |  0:00:00s
epoch 1  | loss: 1.75599 |  0:00:00s
epoch 2  | loss: 1.34499 |  0:00:01s
epoch 3  | loss: 1.16401 |  0:00:01s
epoch 4  | loss: 1.0221  |  0:00:01s
epoch 5  | loss: 0.92193 |  0:00:02s
epoch 6  | loss: 0.89088 |  0:00:02s
epoch 7  | loss: 0.86429 |  0:00:02s
epoch 8  | loss: 0.86281 |  0:00:03s
epoch 9  | loss: 0.82585 |  0:00:03s
epoch 10 | loss: 0.81384 |  0:00:04s
epoch 11 | loss: 0.84895 |  0:00:04s
epoch 12 | loss: 0.79805 |  0:00:04s
epoch 13 | loss: 0.77577 |  0:00:05s
epoch 14 | loss: 0.77454 |  0:00:05s
epoch 15 | loss: 0.75593 |  0:00:06s
epoch 16 | loss: 0.75575 |  0:00:06s
epoch 17 | loss: 0.74692 |  0:00:06s
epoch 18 | loss: 0.74122 |  0:00:07s
epoch 19 | loss: 0.73298 |  0:00:07s
epoch 20 | loss: 0.72366 |  0:00:07s
epoch 21 | loss: 0.71064 |  0:00:08s
epoch 22 | loss: 0.71014 |  0:00:08s
epoch 23 | loss: 0.71494 |  0:00:08s
epoch 24 | loss: 0.70534 |  0:00:09s
epoch 25 | loss: 0.70537 |  0:00:09s
epoch 26 | loss: 0.70026 |  0:00:09s
e



epoch 0  | loss: 2.2892  |  0:00:00s
epoch 1  | loss: 1.66764 |  0:00:00s
epoch 2  | loss: 1.37216 |  0:00:01s
epoch 3  | loss: 1.14898 |  0:00:01s
epoch 4  | loss: 1.06792 |  0:00:01s
epoch 5  | loss: 0.96862 |  0:00:02s
epoch 6  | loss: 0.93668 |  0:00:02s
epoch 7  | loss: 0.90609 |  0:00:02s
epoch 8  | loss: 0.85683 |  0:00:03s
epoch 9  | loss: 0.83067 |  0:00:03s
epoch 10 | loss: 0.81467 |  0:00:03s
epoch 11 | loss: 0.81615 |  0:00:04s
epoch 12 | loss: 0.81433 |  0:00:04s
epoch 13 | loss: 0.79393 |  0:00:05s
epoch 14 | loss: 0.78188 |  0:00:05s
epoch 15 | loss: 0.76875 |  0:00:05s
epoch 16 | loss: 0.74026 |  0:00:06s
epoch 17 | loss: 0.75129 |  0:00:06s
epoch 18 | loss: 0.72978 |  0:00:06s
epoch 19 | loss: 0.72517 |  0:00:07s
epoch 20 | loss: 0.72391 |  0:00:07s
epoch 21 | loss: 0.71814 |  0:00:07s
epoch 22 | loss: 0.71486 |  0:00:08s
epoch 23 | loss: 0.70685 |  0:00:08s
epoch 24 | loss: 0.70077 |  0:00:08s
epoch 25 | loss: 0.69978 |  0:00:09s
epoch 26 | loss: 0.70686 |  0:00:09s
e



epoch 0  | loss: 2.19808 |  0:00:00s
epoch 1  | loss: 1.61129 |  0:00:00s
epoch 2  | loss: 1.29592 |  0:00:01s
epoch 3  | loss: 1.12668 |  0:00:01s
epoch 4  | loss: 1.02446 |  0:00:01s
epoch 5  | loss: 0.92567 |  0:00:02s
epoch 6  | loss: 0.903   |  0:00:02s
epoch 7  | loss: 0.87498 |  0:00:02s
epoch 8  | loss: 0.86021 |  0:00:03s
epoch 9  | loss: 0.81939 |  0:00:03s
epoch 10 | loss: 0.80511 |  0:00:03s
epoch 11 | loss: 0.79109 |  0:00:04s
epoch 12 | loss: 0.77596 |  0:00:04s
epoch 13 | loss: 0.7651  |  0:00:04s
epoch 14 | loss: 0.75388 |  0:00:05s
epoch 15 | loss: 0.76498 |  0:00:05s
epoch 16 | loss: 0.73977 |  0:00:06s
epoch 17 | loss: 0.75214 |  0:00:06s
epoch 18 | loss: 0.74129 |  0:00:06s
epoch 19 | loss: 0.73124 |  0:00:07s
epoch 20 | loss: 0.72906 |  0:00:07s
epoch 21 | loss: 0.73348 |  0:00:07s
epoch 22 | loss: 0.72272 |  0:00:08s
epoch 23 | loss: 0.73476 |  0:00:08s
epoch 24 | loss: 0.72413 |  0:00:08s
epoch 25 | loss: 0.72289 |  0:00:09s
epoch 26 | loss: 0.71793 |  0:00:09s
e



epoch 0  | loss: 2.19808 |  0:00:00s
epoch 1  | loss: 1.61129 |  0:00:00s
epoch 2  | loss: 1.29592 |  0:00:01s
epoch 3  | loss: 1.12668 |  0:00:01s
epoch 4  | loss: 1.02446 |  0:00:01s
epoch 5  | loss: 0.92567 |  0:00:02s
epoch 6  | loss: 0.903   |  0:00:02s
epoch 7  | loss: 0.87498 |  0:00:02s
epoch 8  | loss: 0.86021 |  0:00:03s
epoch 9  | loss: 0.81939 |  0:00:03s
epoch 10 | loss: 0.80511 |  0:00:03s
epoch 11 | loss: 0.79109 |  0:00:04s
epoch 12 | loss: 0.77596 |  0:00:04s
epoch 13 | loss: 0.7651  |  0:00:05s
epoch 14 | loss: 0.75388 |  0:00:05s
epoch 15 | loss: 0.76498 |  0:00:05s
epoch 16 | loss: 0.73977 |  0:00:06s
epoch 17 | loss: 0.75214 |  0:00:06s
epoch 18 | loss: 0.74129 |  0:00:06s
epoch 19 | loss: 0.73124 |  0:00:07s
epoch 20 | loss: 0.72906 |  0:00:07s
epoch 21 | loss: 0.73348 |  0:00:07s
epoch 22 | loss: 0.72272 |  0:00:08s
epoch 23 | loss: 0.73476 |  0:00:08s
epoch 24 | loss: 0.72413 |  0:00:09s
epoch 25 | loss: 0.72289 |  0:00:09s
epoch 26 | loss: 0.71793 |  0:00:09s
e

## BW

RF

In [46]:
regr = get_RF(chirfparam)
crb = BalancedWeighting(regr, Xt=targ_labeled_X_san, yt=targ_labeled_y_san, random_state=0, verbose = 0)
crb = stat_significance_trainer_tester(crb, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'RF-BW')

regr =  get_RF(sanrfparam)
srb = BalancedWeighting(regr, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi, random_state=0, verbose = 0)
srb = stat_significance_trainer_tester(srb, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'RF-BW')

chi-chi 0.376 0.733 -0.049
chi-san 0.581 0.820 0.069
san-san 0.598 0.844 0.014
san-chi 0.375 0.732 -0.048


XGB

In [47]:
regr = get_xgb(chixgbparam)
crb = BalancedWeighting(regr, Xt=targ_labeled_X_san, yt=targ_labeled_y_san, random_state=0, verbose = 0)
crb = stat_significance_trainer_tester(crb, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'XGB-BW')

regr =  get_xgb(sanxgbparam)
srb = BalancedWeighting(regr, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi, random_state=0, verbose = 0)
srb = stat_significance_trainer_tester(srb, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'XGB-BW')

chi-chi 0.379 0.738 -0.066
chi-san 0.603 0.850 -0.002
san-san 0.594 0.838 0.028
san-chi 0.366 0.713 0.007


MLP

In [48]:
regr = get_MLP(chimlpparam)
clb = BalancedWeighting(regr, Xt=targ_labeled_X_san, yt=targ_labeled_y_san, random_state=0, verbose = 0)
clb = stat_significance_trainer_tester(clb, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'MLP-BW')

regr = get_MLP(sanmlpparam)
slb = BalancedWeighting(regr, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi, random_state=0, verbose = 0)
slb = stat_significance_trainer_tester(slb, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'MLP-BW')

chi-chi 0.425 0.828 -0.340
chi-san 0.574 0.810 0.091
san-san 0.589 0.830 0.045
san-chi 0.393 0.767 -0.151


TabNet

In [49]:
regr = get_TN(chiTNparam)
clb = BalancedWeighting(regr, Xt=targ_labeled_X_san, yt=targ_labeled_y_san.reshape(-1, 1), random_state=0, verbose = 0)
clb = stat_significance_trainer_tester(clb, chi_X_train, chi_y_train.reshape(-1, 1), chi_X_test, chi_y_test.reshape(-1, 1), san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'TN-BW')

regr = get_TN(sanTNparam)
slb = BalancedWeighting(regr, Xt=targ_labeled_X_chi, yt=targ_labeled_y_chi.reshape(-1, 1), random_state=0, verbose = 0)
slb = stat_significance_trainer_tester(slb, san_X_train, san_y_train.reshape(-1, 1), san_X_test, san_y_test.reshape(-1, 1), chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'TN-BW')



epoch 0  | loss: 4.47622 |  0:00:00s
epoch 1  | loss: 2.72792 |  0:00:00s
epoch 2  | loss: 1.81846 |  0:00:00s
epoch 3  | loss: 1.37394 |  0:00:01s
epoch 4  | loss: 1.10964 |  0:00:01s
epoch 5  | loss: 0.94965 |  0:00:01s
epoch 6  | loss: 0.83613 |  0:00:02s
epoch 7  | loss: 0.75825 |  0:00:02s
epoch 8  | loss: 0.72696 |  0:00:02s
epoch 9  | loss: 0.69778 |  0:00:02s
epoch 10 | loss: 0.67015 |  0:00:03s
epoch 11 | loss: 0.65239 |  0:00:03s
epoch 12 | loss: 0.62511 |  0:00:03s
epoch 13 | loss: 0.63444 |  0:00:04s
epoch 14 | loss: 0.62123 |  0:00:04s
epoch 15 | loss: 0.60741 |  0:00:04s
epoch 16 | loss: 0.59878 |  0:00:04s
epoch 17 | loss: 0.59563 |  0:00:05s
epoch 18 | loss: 0.59697 |  0:00:05s
epoch 19 | loss: 0.58854 |  0:00:05s
epoch 20 | loss: 0.58761 |  0:00:05s
epoch 21 | loss: 0.59536 |  0:00:06s
epoch 22 | loss: 0.57295 |  0:00:06s
epoch 23 | loss: 0.56768 |  0:00:06s
epoch 24 | loss: 0.55734 |  0:00:06s
epoch 25 | loss: 0.57397 |  0:00:07s
epoch 26 | loss: 0.55885 |  0:00:07s
e



epoch 0  | loss: 4.38571 |  0:00:00s
epoch 1  | loss: 2.50288 |  0:00:00s
epoch 2  | loss: 1.75403 |  0:00:00s
epoch 3  | loss: 1.31224 |  0:00:01s
epoch 4  | loss: 1.03935 |  0:00:01s
epoch 5  | loss: 0.90246 |  0:00:01s
epoch 6  | loss: 0.8121  |  0:00:02s
epoch 7  | loss: 0.75295 |  0:00:02s
epoch 8  | loss: 0.71982 |  0:00:02s
epoch 9  | loss: 0.68137 |  0:00:03s
epoch 10 | loss: 0.66305 |  0:00:03s
epoch 11 | loss: 0.65215 |  0:00:03s
epoch 12 | loss: 0.63039 |  0:00:04s
epoch 13 | loss: 0.62412 |  0:00:04s
epoch 14 | loss: 0.62122 |  0:00:04s
epoch 15 | loss: 0.6248  |  0:00:05s
epoch 16 | loss: 0.62091 |  0:00:05s
epoch 17 | loss: 0.60992 |  0:00:05s
epoch 18 | loss: 0.60041 |  0:00:05s
epoch 19 | loss: 0.59975 |  0:00:06s
epoch 20 | loss: 0.60252 |  0:00:06s
epoch 21 | loss: 0.60066 |  0:00:06s
epoch 22 | loss: 0.58293 |  0:00:07s
epoch 23 | loss: 0.59796 |  0:00:07s
epoch 24 | loss: 0.58721 |  0:00:08s
epoch 25 | loss: 0.5814  |  0:00:08s
epoch 26 | loss: 0.58664 |  0:00:08s
e

## CORAL

RF

In [50]:
regr = get_RF(chirfparam)
crc = CORAL(regr, Xt=san_X_train, random_state=0, verbose = 0)
crc = stat_significance_trainer_tester(crc, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'RF-CORAL')

regr = get_RF(sanrfparam)
src = CORAL(regr, Xt=chi_X_train, random_state=0, verbose = 0)
src = stat_significance_trainer_tester(src, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'RF-CORAL')

chi-chi 0.395 0.770 -0.159
chi-san 0.747 1.054 -0.538
san-san 0.610 0.860 -0.025
san-chi 0.504 0.983 -0.890


XGB

In [51]:
regr = get_xgb(chixgbparam)
crc = CORAL(regr, Xt=san_X_train, random_state=0, verbose = 0)
crc = stat_significance_trainer_tester(crc, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'XGB-CORAL')

regr = get_xgb(sanxgbparam)
src = CORAL(regr, Xt=chi_X_train, random_state=0, verbose = 0)
src = stat_significance_trainer_tester(src, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'XGB-CORAL')

chi-chi 0.407 0.793 -0.229
chi-san 0.777 1.097 -0.667
san-san 0.576 0.813 0.085
san-chi 0.438 0.853 -0.426


MLP

In [52]:
regr = get_MLP(chimlpparam)
clc = CORAL(regr, Xt=san_X_train, random_state=0, verbose = 0)
clc = stat_significance_trainer_tester(clc, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'MLP-CORAL')

regr = get_MLP(sanmlpparam)
slc = CORAL(regr, Xt=chi_X_train, random_state=0, verbose = 0)
slc = stat_significance_trainer_tester(slc, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'MLP-CORAL')



chi-chi 0.437 0.852 -0.420
chi-san 0.613 0.865 -0.036
san-san 0.644 0.908 -0.143
san-chi 0.508 0.990 -0.917


TabNet

In [53]:
regr = get_TN(chiTNparam)
clc = CORAL(regr, Xt=san_X_train, random_state=0, verbose = 0)
clc = stat_significance_trainer_tester(clc, chi_X_train, chi_y_train.reshape(-1, 1), chi_X_test, chi_y_test.reshape(-1, 1), san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'TN-CORAL')

regr = get_TN(sanTNparam)
slc = CORAL(regr, Xt=chi_X_train, random_state=0, verbose = 0)
slc = stat_significance_trainer_tester(slc, san_X_train, san_y_train.reshape(-1, 1), san_X_test, san_y_test.reshape(-1, 1), chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'TN-CORAL')



epoch 0  | loss: 6.07575 |  0:00:00s
epoch 1  | loss: 4.23598 |  0:00:00s
epoch 2  | loss: 3.12169 |  0:00:00s
epoch 3  | loss: 2.35561 |  0:00:01s
epoch 4  | loss: 1.78055 |  0:00:01s
epoch 5  | loss: 1.40588 |  0:00:01s
epoch 6  | loss: 1.08431 |  0:00:01s
epoch 7  | loss: 0.92219 |  0:00:01s
epoch 8  | loss: 0.79379 |  0:00:02s
epoch 9  | loss: 0.70691 |  0:00:02s
epoch 10 | loss: 0.63073 |  0:00:02s
epoch 11 | loss: 0.59159 |  0:00:02s
epoch 12 | loss: 0.54134 |  0:00:02s
epoch 13 | loss: 0.5443  |  0:00:03s
epoch 14 | loss: 0.50985 |  0:00:03s
epoch 15 | loss: 0.49775 |  0:00:03s
epoch 16 | loss: 0.49581 |  0:00:03s
epoch 17 | loss: 0.49748 |  0:00:03s
epoch 18 | loss: 0.48011 |  0:00:04s
epoch 19 | loss: 0.45939 |  0:00:04s
epoch 20 | loss: 0.46196 |  0:00:04s
epoch 21 | loss: 0.47247 |  0:00:04s
epoch 22 | loss: 0.46641 |  0:00:05s
epoch 23 | loss: 0.44208 |  0:00:05s
epoch 24 | loss: 0.4433  |  0:00:05s
epoch 25 | loss: 0.44466 |  0:00:05s
epoch 26 | loss: 0.43911 |  0:00:05s
e



epoch 0  | loss: 5.94807 |  0:00:00s
epoch 1  | loss: 4.04446 |  0:00:00s
epoch 2  | loss: 2.89005 |  0:00:00s
epoch 3  | loss: 2.17139 |  0:00:00s
epoch 4  | loss: 1.77612 |  0:00:01s
epoch 5  | loss: 1.37337 |  0:00:01s
epoch 6  | loss: 1.12422 |  0:00:01s
epoch 7  | loss: 0.90349 |  0:00:01s
epoch 8  | loss: 0.78819 |  0:00:01s
epoch 9  | loss: 0.71224 |  0:00:02s
epoch 10 | loss: 0.64481 |  0:00:02s
epoch 11 | loss: 0.58711 |  0:00:02s
epoch 12 | loss: 0.54679 |  0:00:02s
epoch 13 | loss: 0.51441 |  0:00:02s
epoch 14 | loss: 0.4963  |  0:00:03s
epoch 15 | loss: 0.49283 |  0:00:03s
epoch 16 | loss: 0.48971 |  0:00:03s
epoch 17 | loss: 0.46337 |  0:00:03s
epoch 18 | loss: 0.46029 |  0:00:04s
epoch 19 | loss: 0.45411 |  0:00:04s
epoch 20 | loss: 0.44115 |  0:00:04s
epoch 21 | loss: 0.45751 |  0:00:04s
epoch 22 | loss: 0.4593  |  0:00:04s
epoch 23 | loss: 0.44476 |  0:00:05s
epoch 24 | loss: 0.448   |  0:00:05s
epoch 25 | loss: 0.43574 |  0:00:05s
epoch 26 | loss: 0.43442 |  0:00:05s
e



epoch 0  | loss: 6.04383 |  0:00:00s
epoch 1  | loss: 4.16003 |  0:00:00s
epoch 2  | loss: 3.08512 |  0:00:00s
epoch 3  | loss: 2.37313 |  0:00:00s
epoch 4  | loss: 1.8769  |  0:00:01s
epoch 5  | loss: 1.46313 |  0:00:01s
epoch 6  | loss: 1.19746 |  0:00:01s
epoch 7  | loss: 0.97059 |  0:00:01s
epoch 8  | loss: 0.84758 |  0:00:01s
epoch 9  | loss: 0.76243 |  0:00:02s
epoch 10 | loss: 0.66469 |  0:00:02s
epoch 11 | loss: 0.61566 |  0:00:02s
epoch 12 | loss: 0.55862 |  0:00:02s
epoch 13 | loss: 0.54294 |  0:00:03s
epoch 14 | loss: 0.51736 |  0:00:03s
epoch 15 | loss: 0.50113 |  0:00:03s
epoch 16 | loss: 0.47256 |  0:00:03s
epoch 17 | loss: 0.4856  |  0:00:03s
epoch 18 | loss: 0.47163 |  0:00:04s
epoch 19 | loss: 0.47165 |  0:00:04s
epoch 20 | loss: 0.45983 |  0:00:04s
epoch 21 | loss: 0.46627 |  0:00:04s
epoch 22 | loss: 0.45601 |  0:00:04s
epoch 23 | loss: 0.45685 |  0:00:05s
epoch 24 | loss: 0.4511  |  0:00:05s
epoch 25 | loss: 0.441   |  0:00:05s
epoch 26 | loss: 0.43744 |  0:00:05s
e



epoch 0  | loss: 5.97017 |  0:00:00s
epoch 1  | loss: 4.12734 |  0:00:00s
epoch 2  | loss: 2.86368 |  0:00:00s
epoch 3  | loss: 2.10997 |  0:00:00s
epoch 4  | loss: 1.65561 |  0:00:01s
epoch 5  | loss: 1.32279 |  0:00:01s
epoch 6  | loss: 1.084   |  0:00:01s
epoch 7  | loss: 0.88469 |  0:00:01s
epoch 8  | loss: 0.76668 |  0:00:01s
epoch 9  | loss: 0.67621 |  0:00:02s
epoch 10 | loss: 0.58952 |  0:00:02s
epoch 11 | loss: 0.55268 |  0:00:02s
epoch 12 | loss: 0.53146 |  0:00:02s
epoch 13 | loss: 0.51054 |  0:00:03s
epoch 14 | loss: 0.50543 |  0:00:03s
epoch 15 | loss: 0.4834  |  0:00:03s
epoch 16 | loss: 0.47814 |  0:00:03s
epoch 17 | loss: 0.47028 |  0:00:03s
epoch 18 | loss: 0.45296 |  0:00:04s
epoch 19 | loss: 0.45523 |  0:00:04s
epoch 20 | loss: 0.45742 |  0:00:04s
epoch 21 | loss: 0.44348 |  0:00:04s
epoch 22 | loss: 0.44341 |  0:00:05s
epoch 23 | loss: 0.44135 |  0:00:05s
epoch 24 | loss: 0.43384 |  0:00:05s
epoch 25 | loss: 0.44054 |  0:00:05s
epoch 26 | loss: 0.42895 |  0:00:05s
e



epoch 0  | loss: 5.97017 |  0:00:00s
epoch 1  | loss: 4.12734 |  0:00:00s
epoch 2  | loss: 2.86368 |  0:00:00s
epoch 3  | loss: 2.10997 |  0:00:00s
epoch 4  | loss: 1.65561 |  0:00:01s
epoch 5  | loss: 1.32279 |  0:00:01s
epoch 6  | loss: 1.084   |  0:00:01s
epoch 7  | loss: 0.88469 |  0:00:01s
epoch 8  | loss: 0.76668 |  0:00:01s
epoch 9  | loss: 0.67621 |  0:00:02s
epoch 10 | loss: 0.58952 |  0:00:02s
epoch 11 | loss: 0.55268 |  0:00:02s
epoch 12 | loss: 0.53146 |  0:00:02s
epoch 13 | loss: 0.51054 |  0:00:03s
epoch 14 | loss: 0.50543 |  0:00:03s
epoch 15 | loss: 0.4834  |  0:00:03s
epoch 16 | loss: 0.47814 |  0:00:03s
epoch 17 | loss: 0.47028 |  0:00:03s
epoch 18 | loss: 0.45296 |  0:00:04s
epoch 19 | loss: 0.45523 |  0:00:04s
epoch 20 | loss: 0.45742 |  0:00:04s
epoch 21 | loss: 0.44348 |  0:00:04s
epoch 22 | loss: 0.44341 |  0:00:04s
epoch 23 | loss: 0.44135 |  0:00:05s
epoch 24 | loss: 0.43384 |  0:00:05s
epoch 25 | loss: 0.44054 |  0:00:05s
epoch 26 | loss: 0.42895 |  0:00:05s
e



epoch 0  | loss: 3.96349 |  0:00:00s
epoch 1  | loss: 2.67675 |  0:00:00s
epoch 2  | loss: 1.97075 |  0:00:00s
epoch 3  | loss: 1.52042 |  0:00:01s
epoch 4  | loss: 1.28063 |  0:00:01s
epoch 5  | loss: 1.09927 |  0:00:01s
epoch 6  | loss: 1.00696 |  0:00:01s
epoch 7  | loss: 0.93457 |  0:00:02s
epoch 8  | loss: 0.87774 |  0:00:02s
epoch 9  | loss: 0.8703  |  0:00:02s
epoch 10 | loss: 0.83565 |  0:00:02s
epoch 11 | loss: 0.82305 |  0:00:03s
epoch 12 | loss: 0.796   |  0:00:03s
epoch 13 | loss: 0.79199 |  0:00:03s
epoch 14 | loss: 0.79508 |  0:00:04s
epoch 15 | loss: 0.79123 |  0:00:04s
epoch 16 | loss: 0.76145 |  0:00:04s
epoch 17 | loss: 0.76917 |  0:00:04s
epoch 18 | loss: 0.76961 |  0:00:05s
epoch 19 | loss: 0.76997 |  0:00:05s
epoch 20 | loss: 0.75815 |  0:00:05s
epoch 21 | loss: 0.75573 |  0:00:05s
epoch 22 | loss: 0.74393 |  0:00:06s
epoch 23 | loss: 0.74547 |  0:00:06s
epoch 24 | loss: 0.75432 |  0:00:06s
epoch 25 | loss: 0.74095 |  0:00:06s
epoch 26 | loss: 0.73279 |  0:00:07s
e



epoch 0  | loss: 4.10107 |  0:00:00s
epoch 1  | loss: 2.72103 |  0:00:00s
epoch 2  | loss: 1.9554  |  0:00:00s
epoch 3  | loss: 1.51353 |  0:00:01s
epoch 4  | loss: 1.31318 |  0:00:01s
epoch 5  | loss: 1.13243 |  0:00:01s
epoch 6  | loss: 1.03155 |  0:00:01s
epoch 7  | loss: 0.95546 |  0:00:02s
epoch 8  | loss: 0.93264 |  0:00:02s
epoch 9  | loss: 0.854   |  0:00:02s
epoch 10 | loss: 0.84956 |  0:00:02s
epoch 11 | loss: 0.82752 |  0:00:03s
epoch 12 | loss: 0.82129 |  0:00:03s
epoch 13 | loss: 0.79961 |  0:00:03s
epoch 14 | loss: 0.77846 |  0:00:04s
epoch 15 | loss: 0.77945 |  0:00:04s
epoch 16 | loss: 0.77404 |  0:00:04s
epoch 17 | loss: 0.76232 |  0:00:04s
epoch 18 | loss: 0.7631  |  0:00:05s
epoch 19 | loss: 0.77513 |  0:00:05s
epoch 20 | loss: 0.7533  |  0:00:05s
epoch 21 | loss: 0.74911 |  0:00:05s
epoch 22 | loss: 0.76007 |  0:00:06s
epoch 23 | loss: 0.73696 |  0:00:06s
epoch 24 | loss: 0.74086 |  0:00:06s
epoch 25 | loss: 0.73901 |  0:00:06s
epoch 26 | loss: 0.75185 |  0:00:07s
e



epoch 0  | loss: 4.02701 |  0:00:00s
epoch 1  | loss: 2.64972 |  0:00:00s
epoch 2  | loss: 1.95668 |  0:00:00s
epoch 3  | loss: 1.57438 |  0:00:01s
epoch 4  | loss: 1.29942 |  0:00:01s
epoch 5  | loss: 1.15333 |  0:00:01s
epoch 6  | loss: 1.03939 |  0:00:01s
epoch 7  | loss: 0.93818 |  0:00:02s
epoch 8  | loss: 0.88585 |  0:00:02s
epoch 9  | loss: 0.85326 |  0:00:02s
epoch 10 | loss: 0.82233 |  0:00:02s
epoch 11 | loss: 0.79028 |  0:00:03s
epoch 12 | loss: 0.78883 |  0:00:03s
epoch 13 | loss: 0.78073 |  0:00:03s
epoch 14 | loss: 0.76703 |  0:00:04s
epoch 15 | loss: 0.77403 |  0:00:04s
epoch 16 | loss: 0.75782 |  0:00:04s
epoch 17 | loss: 0.76319 |  0:00:04s
epoch 18 | loss: 0.74688 |  0:00:05s
epoch 19 | loss: 0.7462  |  0:00:05s
epoch 20 | loss: 0.74468 |  0:00:05s
epoch 21 | loss: 0.74    |  0:00:05s
epoch 22 | loss: 0.73485 |  0:00:06s
epoch 23 | loss: 0.72849 |  0:00:06s
epoch 24 | loss: 0.73027 |  0:00:06s
epoch 25 | loss: 0.72453 |  0:00:06s
epoch 26 | loss: 0.7225  |  0:00:07s
e



epoch 0  | loss: 3.92431 |  0:00:00s
epoch 1  | loss: 2.59895 |  0:00:00s
epoch 2  | loss: 1.97878 |  0:00:00s
epoch 3  | loss: 1.58594 |  0:00:01s
epoch 4  | loss: 1.29548 |  0:00:01s
epoch 5  | loss: 1.10128 |  0:00:01s
epoch 6  | loss: 0.97164 |  0:00:01s
epoch 7  | loss: 0.87237 |  0:00:02s
epoch 8  | loss: 0.85938 |  0:00:02s
epoch 9  | loss: 0.83875 |  0:00:02s
epoch 10 | loss: 0.79815 |  0:00:03s
epoch 11 | loss: 0.77896 |  0:00:03s
epoch 12 | loss: 0.76537 |  0:00:03s
epoch 13 | loss: 0.77568 |  0:00:03s
epoch 14 | loss: 0.76843 |  0:00:04s
epoch 15 | loss: 0.7452  |  0:00:04s
epoch 16 | loss: 0.75807 |  0:00:04s
epoch 17 | loss: 0.74445 |  0:00:04s
epoch 18 | loss: 0.74017 |  0:00:05s
epoch 19 | loss: 0.73214 |  0:00:05s
epoch 20 | loss: 0.71421 |  0:00:05s
epoch 21 | loss: 0.72649 |  0:00:05s
epoch 22 | loss: 0.72786 |  0:00:06s
epoch 23 | loss: 0.72603 |  0:00:06s
epoch 24 | loss: 0.71437 |  0:00:06s
epoch 25 | loss: 0.71973 |  0:00:06s
epoch 26 | loss: 0.71385 |  0:00:07s
e



epoch 0  | loss: 3.92431 |  0:00:00s
epoch 1  | loss: 2.59895 |  0:00:00s
epoch 2  | loss: 1.97878 |  0:00:00s
epoch 3  | loss: 1.58594 |  0:00:01s
epoch 4  | loss: 1.29548 |  0:00:01s
epoch 5  | loss: 1.10128 |  0:00:01s
epoch 6  | loss: 0.97164 |  0:00:02s
epoch 7  | loss: 0.87237 |  0:00:02s
epoch 8  | loss: 0.85938 |  0:00:02s
epoch 9  | loss: 0.83875 |  0:00:03s
epoch 10 | loss: 0.79815 |  0:00:03s
epoch 11 | loss: 0.77896 |  0:00:03s
epoch 12 | loss: 0.76537 |  0:00:03s
epoch 13 | loss: 0.77568 |  0:00:04s
epoch 14 | loss: 0.76843 |  0:00:04s
epoch 15 | loss: 0.7452  |  0:00:04s
epoch 16 | loss: 0.75807 |  0:00:04s
epoch 17 | loss: 0.74445 |  0:00:05s
epoch 18 | loss: 0.74017 |  0:00:05s
epoch 19 | loss: 0.73214 |  0:00:05s
epoch 20 | loss: 0.71421 |  0:00:05s
epoch 21 | loss: 0.72649 |  0:00:06s
epoch 22 | loss: 0.72786 |  0:00:06s
epoch 23 | loss: 0.72603 |  0:00:06s
epoch 24 | loss: 0.71437 |  0:00:07s
epoch 25 | loss: 0.71973 |  0:00:07s
epoch 26 | loss: 0.71385 |  0:00:07s
e

## SA

RF

In [54]:
regr = get_RF(chirfparam)
crs = SA(regr, Xt=san_X_train, random_state=0, verbose = 0)
crs = stat_significance_trainer_tester(crs, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'RF-SA')

regr = get_RF(sanrfparam)
srs = SA(regr, Xt=chi_X_train, random_state=0, verbose = 0)
srs = stat_significance_trainer_tester(srs, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'RF-SA')

chi-chi 0.376 0.732 -0.049
chi-san 0.801 1.130 -0.768
san-san 0.621 0.876 -0.063
san-chi 0.436 0.850 -0.414


XGB

In [55]:
regr = get_xgb(chixgbparam)
crs = SA(regr, Xt=san_X_train, random_state=0, verbose = 0)
crs = stat_significance_trainer_tester(crs, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'XGB-SA')

regr = get_xgb(sanxgbparam)
srs = SA(regr, Xt=chi_X_train, random_state=0, verbose = 0)
srs = stat_significance_trainer_tester(srs, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'XGB-SA')

chi-chi 0.379 0.739 -0.067
chi-san 0.825 1.164 -0.876
san-san 0.606 0.855 -0.012
san-chi 0.474 0.925 -0.673


MLP

In [56]:
regr = get_MLP(chimlpparam)
cls = SA(regr, Xt=san_X_train, random_state=0, verbose = 0)
cls = stat_significance_trainer_tester(cls, chi_X_train, chi_y_train, chi_X_test, chi_y_test, san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'MLP-SA')

regr = get_MLP(sanmlpparam)
sls = SA(regr, Xt=chi_X_train, random_state=0, verbose = 0)
sls = stat_significance_trainer_tester(sls, san_X_train, san_y_train, san_X_test, san_y_test, chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'MLP-SA')



chi-chi 0.533 1.040 -1.127
chi-san 0.783 1.106 -0.692
san-san 0.696 0.983 -0.337
san-chi 0.505 0.984 -0.895


TabNet

In [57]:
regr = get_TN(chiTNparam)
cls = SA(regr, Xt=san_X_train, random_state=0, verbose = 0)
cls = stat_significance_trainer_tester(cls, chi_X_train, chi_y_train.reshape(-1, 1), chi_X_test, chi_y_test.reshape(-1, 1), san_X_test, san_y_test, source_name = "Chicago", target_name = 'San Diego', model_name = 'TN-SA')

regr = get_TN(sanTNparam)
sls = SA(regr, Xt=chi_X_train, random_state=0, verbose = 0)
sls = stat_significance_trainer_tester(sls, san_X_train, san_y_train.reshape(-1, 1), san_X_test, san_y_test.reshape(-1, 1), chi_X_test, chi_y_test, source_name = "San Diego", target_name = 'Chicago', model_name = 'TN-SA')



epoch 0  | loss: 6.48486 |  0:00:00s
epoch 1  | loss: 4.25564 |  0:00:00s
epoch 2  | loss: 3.07503 |  0:00:00s
epoch 3  | loss: 2.34205 |  0:00:01s
epoch 4  | loss: 1.85965 |  0:00:01s
epoch 5  | loss: 1.53164 |  0:00:01s
epoch 6  | loss: 1.20377 |  0:00:01s
epoch 7  | loss: 1.00579 |  0:00:01s
epoch 8  | loss: 0.85681 |  0:00:02s
epoch 9  | loss: 0.73171 |  0:00:02s
epoch 10 | loss: 0.67138 |  0:00:02s
epoch 11 | loss: 0.62354 |  0:00:02s
epoch 12 | loss: 0.57353 |  0:00:02s
epoch 13 | loss: 0.54829 |  0:00:03s
epoch 14 | loss: 0.5251  |  0:00:03s
epoch 15 | loss: 0.512   |  0:00:03s
epoch 16 | loss: 0.50626 |  0:00:03s
epoch 17 | loss: 0.48774 |  0:00:04s
epoch 18 | loss: 0.48569 |  0:00:04s
epoch 19 | loss: 0.47799 |  0:00:04s
epoch 20 | loss: 0.46729 |  0:00:04s
epoch 21 | loss: 0.47558 |  0:00:04s
epoch 22 | loss: 0.47598 |  0:00:05s
epoch 23 | loss: 0.46233 |  0:00:05s
epoch 24 | loss: 0.45402 |  0:00:05s
epoch 25 | loss: 0.47137 |  0:00:05s
epoch 26 | loss: 0.45305 |  0:00:05s
e



epoch 0  | loss: 6.39674 |  0:00:00s
epoch 1  | loss: 4.36278 |  0:00:00s
epoch 2  | loss: 3.1836  |  0:00:00s
epoch 3  | loss: 2.37246 |  0:00:00s
epoch 4  | loss: 1.82645 |  0:00:01s
epoch 5  | loss: 1.44617 |  0:00:01s
epoch 6  | loss: 1.18786 |  0:00:01s
epoch 7  | loss: 0.99803 |  0:00:01s
epoch 8  | loss: 0.82034 |  0:00:01s
epoch 9  | loss: 0.7052  |  0:00:02s
epoch 10 | loss: 0.63292 |  0:00:02s
epoch 11 | loss: 0.58099 |  0:00:02s
epoch 12 | loss: 0.54661 |  0:00:02s
epoch 13 | loss: 0.52351 |  0:00:03s
epoch 14 | loss: 0.4828  |  0:00:03s
epoch 15 | loss: 0.48504 |  0:00:03s
epoch 16 | loss: 0.47188 |  0:00:03s
epoch 17 | loss: 0.4707  |  0:00:03s
epoch 18 | loss: 0.45596 |  0:00:04s
epoch 19 | loss: 0.4551  |  0:00:04s
epoch 20 | loss: 0.44937 |  0:00:04s
epoch 21 | loss: 0.45878 |  0:00:04s
epoch 22 | loss: 0.45322 |  0:00:04s
epoch 23 | loss: 0.43822 |  0:00:05s
epoch 24 | loss: 0.43767 |  0:00:05s
epoch 25 | loss: 0.43366 |  0:00:05s
epoch 26 | loss: 0.43003 |  0:00:05s
e



epoch 0  | loss: 6.47429 |  0:00:00s
epoch 1  | loss: 4.3522  |  0:00:00s
epoch 2  | loss: 3.22755 |  0:00:00s
epoch 3  | loss: 2.37971 |  0:00:00s
epoch 4  | loss: 1.8729  |  0:00:01s
epoch 5  | loss: 1.46217 |  0:00:01s
epoch 6  | loss: 1.14779 |  0:00:01s
epoch 7  | loss: 0.95388 |  0:00:01s
epoch 8  | loss: 0.83265 |  0:00:01s
epoch 9  | loss: 0.70315 |  0:00:02s
epoch 10 | loss: 0.65401 |  0:00:02s
epoch 11 | loss: 0.60497 |  0:00:02s
epoch 12 | loss: 0.57277 |  0:00:02s
epoch 13 | loss: 0.53482 |  0:00:03s
epoch 14 | loss: 0.5234  |  0:00:03s
epoch 15 | loss: 0.50847 |  0:00:03s
epoch 16 | loss: 0.49205 |  0:00:03s
epoch 17 | loss: 0.4921  |  0:00:03s
epoch 18 | loss: 0.48135 |  0:00:04s
epoch 19 | loss: 0.47481 |  0:00:04s
epoch 20 | loss: 0.46587 |  0:00:04s
epoch 21 | loss: 0.45753 |  0:00:04s
epoch 22 | loss: 0.44932 |  0:00:04s
epoch 23 | loss: 0.4557  |  0:00:05s
epoch 24 | loss: 0.44197 |  0:00:05s
epoch 25 | loss: 0.43638 |  0:00:05s
epoch 26 | loss: 0.44207 |  0:00:05s
e



epoch 0  | loss: 6.38397 |  0:00:00s
epoch 1  | loss: 4.26653 |  0:00:00s
epoch 2  | loss: 3.07621 |  0:00:00s
epoch 3  | loss: 2.3528  |  0:00:00s
epoch 4  | loss: 1.82286 |  0:00:01s
epoch 5  | loss: 1.45615 |  0:00:01s
epoch 6  | loss: 1.15709 |  0:00:01s
epoch 7  | loss: 0.93102 |  0:00:01s
epoch 8  | loss: 0.80316 |  0:00:01s
epoch 9  | loss: 0.70493 |  0:00:02s
epoch 10 | loss: 0.62701 |  0:00:02s
epoch 11 | loss: 0.58697 |  0:00:02s
epoch 12 | loss: 0.55636 |  0:00:02s
epoch 13 | loss: 0.53623 |  0:00:03s
epoch 14 | loss: 0.50636 |  0:00:03s
epoch 15 | loss: 0.50229 |  0:00:03s
epoch 16 | loss: 0.4854  |  0:00:03s
epoch 17 | loss: 0.48427 |  0:00:03s
epoch 18 | loss: 0.47179 |  0:00:04s
epoch 19 | loss: 0.45114 |  0:00:04s
epoch 20 | loss: 0.45681 |  0:00:04s
epoch 21 | loss: 0.45081 |  0:00:04s
epoch 22 | loss: 0.44573 |  0:00:04s
epoch 23 | loss: 0.44966 |  0:00:05s
epoch 24 | loss: 0.4358  |  0:00:05s
epoch 25 | loss: 0.43268 |  0:00:05s
epoch 26 | loss: 0.42505 |  0:00:05s
e



epoch 0  | loss: 6.38397 |  0:00:00s
epoch 1  | loss: 4.26653 |  0:00:00s
epoch 2  | loss: 3.07621 |  0:00:00s
epoch 3  | loss: 2.3528  |  0:00:00s
epoch 4  | loss: 1.82286 |  0:00:01s
epoch 5  | loss: 1.45615 |  0:00:01s
epoch 6  | loss: 1.15709 |  0:00:01s
epoch 7  | loss: 0.93102 |  0:00:01s
epoch 8  | loss: 0.80316 |  0:00:02s
epoch 9  | loss: 0.70493 |  0:00:02s
epoch 10 | loss: 0.62701 |  0:00:02s
epoch 11 | loss: 0.58697 |  0:00:02s
epoch 12 | loss: 0.55636 |  0:00:02s
epoch 13 | loss: 0.53623 |  0:00:03s
epoch 14 | loss: 0.50636 |  0:00:03s
epoch 15 | loss: 0.50229 |  0:00:03s
epoch 16 | loss: 0.4854  |  0:00:03s
epoch 17 | loss: 0.48427 |  0:00:03s
epoch 18 | loss: 0.47179 |  0:00:04s
epoch 19 | loss: 0.45114 |  0:00:04s
epoch 20 | loss: 0.45681 |  0:00:04s
epoch 21 | loss: 0.45081 |  0:00:04s
epoch 22 | loss: 0.44573 |  0:00:05s
epoch 23 | loss: 0.44966 |  0:00:05s
epoch 24 | loss: 0.4358  |  0:00:05s
epoch 25 | loss: 0.43268 |  0:00:05s
epoch 26 | loss: 0.42505 |  0:00:05s
e



epoch 0  | loss: 3.96974 |  0:00:00s
epoch 1  | loss: 2.45431 |  0:00:00s
epoch 2  | loss: 1.73602 |  0:00:00s
epoch 3  | loss: 1.39085 |  0:00:01s
epoch 4  | loss: 1.19138 |  0:00:01s
epoch 5  | loss: 1.05962 |  0:00:01s
epoch 6  | loss: 0.98054 |  0:00:02s
epoch 7  | loss: 0.92683 |  0:00:02s
epoch 8  | loss: 0.8844  |  0:00:02s
epoch 9  | loss: 0.87417 |  0:00:02s
epoch 10 | loss: 0.85838 |  0:00:03s
epoch 11 | loss: 0.84365 |  0:00:03s
epoch 12 | loss: 0.83401 |  0:00:03s
epoch 13 | loss: 0.81802 |  0:00:04s
epoch 14 | loss: 0.80524 |  0:00:04s
epoch 15 | loss: 0.8031  |  0:00:04s
epoch 16 | loss: 0.7917  |  0:00:04s
epoch 17 | loss: 0.78215 |  0:00:05s
epoch 18 | loss: 0.7895  |  0:00:05s
epoch 19 | loss: 0.78141 |  0:00:05s
epoch 20 | loss: 0.78151 |  0:00:05s
epoch 21 | loss: 0.78081 |  0:00:06s
epoch 22 | loss: 0.78453 |  0:00:06s
epoch 23 | loss: 0.76743 |  0:00:06s
epoch 24 | loss: 0.76757 |  0:00:06s
epoch 25 | loss: 0.76548 |  0:00:07s
epoch 26 | loss: 0.76305 |  0:00:07s
e



epoch 0  | loss: 3.81361 |  0:00:00s
epoch 1  | loss: 2.34443 |  0:00:00s
epoch 2  | loss: 1.59935 |  0:00:00s
epoch 3  | loss: 1.25679 |  0:00:01s
epoch 4  | loss: 1.08853 |  0:00:01s
epoch 5  | loss: 1.00248 |  0:00:01s
epoch 6  | loss: 0.94132 |  0:00:01s
epoch 7  | loss: 0.87733 |  0:00:02s
epoch 8  | loss: 0.84348 |  0:00:02s
epoch 9  | loss: 0.82956 |  0:00:02s
epoch 10 | loss: 0.81933 |  0:00:02s
epoch 11 | loss: 0.79795 |  0:00:03s
epoch 12 | loss: 0.79135 |  0:00:03s
epoch 13 | loss: 0.77884 |  0:00:03s
epoch 14 | loss: 0.76911 |  0:00:04s
epoch 15 | loss: 0.77103 |  0:00:04s
epoch 16 | loss: 0.76164 |  0:00:04s
epoch 17 | loss: 0.76096 |  0:00:04s
epoch 18 | loss: 0.75477 |  0:00:05s
epoch 19 | loss: 0.75112 |  0:00:05s
epoch 20 | loss: 0.75544 |  0:00:05s
epoch 21 | loss: 0.74611 |  0:00:05s
epoch 22 | loss: 0.74188 |  0:00:06s
epoch 23 | loss: 0.73861 |  0:00:06s
epoch 24 | loss: 0.73927 |  0:00:06s
epoch 25 | loss: 0.71671 |  0:00:06s
epoch 26 | loss: 0.72781 |  0:00:07s
e



epoch 0  | loss: 3.82235 |  0:00:00s
epoch 1  | loss: 2.30449 |  0:00:00s
epoch 2  | loss: 1.68698 |  0:00:00s
epoch 3  | loss: 1.38339 |  0:00:01s
epoch 4  | loss: 1.21404 |  0:00:01s
epoch 5  | loss: 1.04214 |  0:00:01s
epoch 6  | loss: 0.97491 |  0:00:01s
epoch 7  | loss: 0.91755 |  0:00:02s
epoch 8  | loss: 0.88791 |  0:00:02s
epoch 9  | loss: 0.84664 |  0:00:02s
epoch 10 | loss: 0.82287 |  0:00:03s
epoch 11 | loss: 0.81726 |  0:00:03s
epoch 12 | loss: 0.80113 |  0:00:03s
epoch 13 | loss: 0.79298 |  0:00:03s
epoch 14 | loss: 0.78664 |  0:00:04s
epoch 15 | loss: 0.77762 |  0:00:04s
epoch 16 | loss: 0.77869 |  0:00:04s
epoch 17 | loss: 0.77359 |  0:00:04s
epoch 18 | loss: 0.76583 |  0:00:05s
epoch 19 | loss: 0.75308 |  0:00:05s
epoch 20 | loss: 0.76081 |  0:00:05s
epoch 21 | loss: 0.75685 |  0:00:05s
epoch 22 | loss: 0.75323 |  0:00:06s
epoch 23 | loss: 0.75346 |  0:00:06s
epoch 24 | loss: 0.75044 |  0:00:06s
epoch 25 | loss: 0.73275 |  0:00:06s
epoch 26 | loss: 0.73771 |  0:00:07s
e



epoch 0  | loss: 3.83935 |  0:00:00s
epoch 1  | loss: 2.47735 |  0:00:00s
epoch 2  | loss: 1.7421  |  0:00:01s
epoch 3  | loss: 1.40051 |  0:00:01s
epoch 4  | loss: 1.22471 |  0:00:01s
epoch 5  | loss: 1.0663  |  0:00:02s
epoch 6  | loss: 0.97877 |  0:00:02s
epoch 7  | loss: 0.92443 |  0:00:02s
epoch 8  | loss: 0.8728  |  0:00:02s
epoch 9  | loss: 0.87087 |  0:00:03s
epoch 10 | loss: 0.84566 |  0:00:03s
epoch 11 | loss: 0.7963  |  0:00:03s
epoch 12 | loss: 0.80028 |  0:00:04s
epoch 13 | loss: 0.78597 |  0:00:04s
epoch 14 | loss: 0.77975 |  0:00:04s
epoch 15 | loss: 0.77166 |  0:00:04s
epoch 16 | loss: 0.7664  |  0:00:05s
epoch 17 | loss: 0.76339 |  0:00:05s
epoch 18 | loss: 0.76533 |  0:00:05s
epoch 19 | loss: 0.75462 |  0:00:05s
epoch 20 | loss: 0.75668 |  0:00:06s
epoch 21 | loss: 0.74323 |  0:00:06s
epoch 22 | loss: 0.74527 |  0:00:06s
epoch 23 | loss: 0.74357 |  0:00:06s
epoch 24 | loss: 0.72599 |  0:00:07s
epoch 25 | loss: 0.73299 |  0:00:07s
epoch 26 | loss: 0.73119 |  0:00:07s
e



epoch 0  | loss: 3.83935 |  0:00:00s
epoch 1  | loss: 2.47735 |  0:00:00s
epoch 2  | loss: 1.7421  |  0:00:00s
epoch 3  | loss: 1.40051 |  0:00:01s
epoch 4  | loss: 1.22471 |  0:00:01s
epoch 5  | loss: 1.0663  |  0:00:01s
epoch 6  | loss: 0.97877 |  0:00:02s
epoch 7  | loss: 0.92443 |  0:00:02s
epoch 8  | loss: 0.8728  |  0:00:02s
epoch 9  | loss: 0.87087 |  0:00:02s
epoch 10 | loss: 0.84566 |  0:00:03s
epoch 11 | loss: 0.7963  |  0:00:03s
epoch 12 | loss: 0.80028 |  0:00:03s
epoch 13 | loss: 0.78597 |  0:00:04s
epoch 14 | loss: 0.77975 |  0:00:04s
epoch 15 | loss: 0.77166 |  0:00:04s
epoch 16 | loss: 0.7664  |  0:00:04s
epoch 17 | loss: 0.76339 |  0:00:05s
epoch 18 | loss: 0.76533 |  0:00:05s
epoch 19 | loss: 0.75462 |  0:00:05s
epoch 20 | loss: 0.75668 |  0:00:06s
epoch 21 | loss: 0.74323 |  0:00:06s
epoch 22 | loss: 0.74527 |  0:00:06s
epoch 23 | loss: 0.74357 |  0:00:06s
epoch 24 | loss: 0.72599 |  0:00:07s
epoch 25 | loss: 0.73299 |  0:00:07s
epoch 26 | loss: 0.73119 |  0:00:07s
e

# Results Export

In [58]:
results_rrmse_transposed = {key: [value['metric'], value['chi-chi'], value['chi-san'], value['san-chi'], value['san-san']] for key, value in results_rrmse.items()}
results_rrmsef = pd.DataFrame.from_dict(results_rrmse_transposed, orient='index', columns=['metric', 'chi-chi', 'chi-san', 'san-chi', 'san-san'])

results_rrmsef
results_rrmsef.to_csv('./chi-san0.csv')


In [66]:
df0 = pd.read_csv('./results/run0.csv', index_col=0)
df1 = pd.read_csv('./results/run1.csv', index_col=0)
df2 = pd.read_csv('./results/run2.csv', index_col=0)

df0_numeric = df0.apply(pd.to_numeric, errors='coerce')
df1_numeric = df1.apply(pd.to_numeric, errors='coerce')
df2_numeric = df2.apply(pd.to_numeric, errors='coerce')

mean_df = (df0_numeric + df1_numeric + df2_numeric) / 3
mean_df = mean_df.round(3)

mean_df = mean_df.applymap(lambda x: f"{x:.3f}")

mean_df.to_csv('mean_run.csv', index=True)
columns = ['NB-NB', 'CB-CB', 'SB-SB', 'chi-chi', 'san-san']
rows = ['RF', 'XG', 'TN']
new_df = mean_df.loc[rows, columns]
print(new_df)


        NB-NB  CB-CB  SB-SB chi-chi san-san
method                                     
RF      0.334  0.358  0.311   0.374   0.597
XG      0.341  0.402  0.353   0.378   0.593
TN      0.364  0.358  0.331   0.421   0.624


In [68]:
mean_df = mean_df.drop(columns={'NB-NB', 'CB-CB', 'SB-SB', 'chi-chi', "san-san"})
print(mean_df)

          NB-CB  NB-SB  CB-NB  CB-SB  SB-NB  SB-CB chi-san san-chi
method                                                            
RF        0.426  0.323  0.326  0.317  0.368  0.462   0.747   0.528
RF-FA     0.360  0.312  0.325  0.311  0.329  0.353   0.588   0.374
RF-BW     0.391  0.315  0.313  0.301  0.340  0.419   0.587   0.375
RF-CORAL  0.475  0.330  0.328  0.326  0.368  0.479   0.747   0.511
RF-SA     0.456  0.333  0.326  0.308  0.353  0.443   0.799   0.448
XG        0.436  0.361  0.350  0.353  0.419  0.510   0.760   0.449
XG-FA     0.391  0.341  0.329  0.339  0.334  0.393   0.616   0.362
XG-BW     0.402  0.339  0.341  0.331  0.359  0.448   0.615   0.365
XG-CORAL  0.468  0.358  0.359  0.352  0.424  0.513   0.777   0.444
XG-SA     0.456  0.362  0.366  0.344  0.372  0.473   0.813   0.483
TN        0.443  0.325  0.334  0.319  0.386  0.471   1.018   0.623
TN-FA     0.422  0.334  0.340  0.319  0.373  0.404   0.594   0.422
TN-BW     0.405  0.355  0.347  0.324  0.389  0.432   0.650   0

In [90]:
RF       = [0.426, 0.323, 0.326, 0.317, 0.368, 0.462, 0.747,0.528]
RF_FA    = [0.360, 0.312, 0.325, 0.311, 0.329, 0.353, 0.588,0.374]
RF_BW    = [0.391, 0.315, 0.313, 0.301, 0.340, 0.419, 0.587,0.375]
RF_CORAL = [0.475, 0.330, 0.328, 0.326, 0.368, 0.479, 0.747,0.511]
RF_SA    = [0.456, 0.333, 0.326, 0.308, 0.353, 0.443, 0.799,0.448]
XG       = [0.436, 0.361, 0.350, 0.353, 0.419, 0.510, 0.760,0.449]
XG_FA    = [0.391, 0.341, 0.329, 0.339, 0.334, 0.393, 0.616,0.362]
XG_BW    = [0.402, 0.339, 0.341, 0.331, 0.359, 0.448, 0.615,0.365]
XG_CORAL = [0.468, 0.358, 0.359, 0.352, 0.424, 0.513, 0.777,0.444]
XG_SA    = [0.456, 0.362, 0.366, 0.344, 0.372, 0.473, 0.813,0.483]
TN       = [0.443, 0.325, 0.334, 0.319, 0.386, 0.471, 1.018,0.623]
TN_FA    = [0.422, 0.334, 0.340, 0.319, 0.373, 0.404, 0.594,0.422]
TN_BW    = [0.405, 0.355, 0.347, 0.324, 0.389, 0.432, 0.650,0.439]
TN_CORAL = [0.451, 0.320, 0.348, 0.323, 0.386, 0.465, 0.784,0.572]
TN_SA    = [0.421, 0.326, 0.333, 0.332, 0.368, 0.425, 0.809,0.586]
print('&'.join([str(i) for i in TN]) + '\\')
def tolatx(ref, obj):
    res = ''
    for i,j in zip(ref,obj):
        a = min(int(max(0, i-j)*10000),100)
        if a != 0:
            a = max(a,20)
        b = '\dec{'+str(a)+'}{'+r44(j)+'}&'
        res += b
    return res[:-1]
print(tolatx(TN, TN_SA))

0.443&0.325&0.334&0.319&0.386&0.471&1.018&0.623\
\dec{100}{0.421}&\dec{0}{0.326}&\dec{20}{0.333}&\dec{0}{0.332}&\dec{100}{0.368}&\dec{100}{0.425}&\dec{100}{0.809}&\dec{100}{0.586}


## serialization

In [33]:
feat_def = ['Day of year: ', 'Wave Height (meters): ', 'Water Temperature (degree Celcius): ', 'Tide: ', 'Solar radiance (watts/meter squared): ',
 'Did we have high rain fall the past 3 days? ', 'Did we have high rain fall the past 7 days? ',
 'Log10 of cummulative rain in the last 3 days in inches: ', 'Log10 of cummulative rain in the last 3 days in inches: ',
 'Is tide value more than the mean tide? ', 'Alongshire wind speed (meters/second): ', 'offshore wind speed (meters/second): ', 'Beach location: ']
instructions = 'We want to predict the log10 of bacteria concentration level in the beaches. \
you will receive environmental information and you should answer with a precise float number between -0.200 and 5.000'
def make_serial(train_data, target_data, feat_names = feat_def, bin_feats = [5,6,9], cat_feats = [12]):
    serialized_dataset = []
    for features, target in zip(train_data, target_data):
        inputs = ''
        outputs = r55(target)
        for i in range(len(feat_names[:-1])):
            definition = feat_def[i]
            if not pd.isnull(features[i]):
                inputs += definition
                if i in bin_feats:
                    inputs += 'Yes. ' if features[i] == 1.0 else 'No. '
                elif i in cat_feats:
                    inputs += features[i]
                else:
                    inputs += str(r55(features[i])) + '. '
        serialized_dataset.append({'instruction': instructions, 'input': inputs, 'output': outputs})
    return serialized_dataset

def lad(dataset, location):
    return np.array([list(sub_array) + [location] for sub_array in dataset], dtype=object)


In [34]:
serialized_chi_train = make_serial(chi_X_train, chi_y_train)
serialized_chi_test = make_serial(chi_X_test, chi_y_test)
serialized_san_train = make_serial(san_X_train , san_y_train)
serialized_san_test = make_serial(san_X_test , san_y_test)

# with open('./serialized_chi_train.pkl', 'wb') as file: 
#     pickle.dump(serialized_chi_train, file) 
# with open('./serialized_chi_test.pkl', 'wb') as file: 
#     pickle.dump(serialized_chi_test, file) 
# with open('./serialized_san_train.pkl', 'wb') as file: 
#     pickle.dump(serialized_san_train, file) 
# with open('./serialized_san_test.pkl', 'wb') as file: 
#     pickle.dump(serialized_san_test, file) 

In [37]:
serialized_chi_train = make_serial(lad(chi_X_train,'Chicago'), chi_y_train)
serialized_chi_test = make_serial(lad(chi_X_test,'Chicago'), chi_y_test)
serialized_san_train = make_serial(lad(san_X_train, 'San Diego') , san_y_train)
serialized_san_test = make_serial(lad(san_X_test,'San Diego') , san_y_test)

with open('./serialized_chi_train.pkl', 'wb') as file: 
    pickle.dump(serialized_chi_train, file) 
with open('./serialized_chi_test.pkl', 'wb') as file: 
    pickle.dump(serialized_chi_test, file) 
with open('./serialized_san_train.pkl', 'wb') as file: 
    pickle.dump(serialized_san_train, file) 
with open('./serialized_san_test.pkl', 'wb') as file: 
    pickle.dump(serialized_san_test, file) 

{'instruction': 'We want to predict the log10 of bacteria concentration level in the beaches. you will receive environmental information and you should answer with a precise float number between -0.200 and 5.000',
 'input': 'Day of year: 0.1154. Wave Height (meters): 0.1511. Water Temperature (degree Celcius): 0.1212. Tide: 0.5145. Solar radiance (watts/meter squared): 0.3760. Did we have rain the past 3 days? No. Did we have rain the past 7 days? No. Log10 of cummulative rain in the last 3 days in inches: 0.0000. Log10 of cummulative rain in the last 3 days in inches: 0.0000. Is tide value more than the mean tide? Yes. Alongshire wind speed (meters/second): 0.5814. offshore wind speed (meters/second): 0.5538. ',
 'output': '3.0378'}

In [38]:
random_indices = np.random.choice(lad(san_X_train, 'San Diego').shape[0], 500, replace=False)
san_X_train_sampled = lad(san_X_train, 'San Diego')[random_indices]
san_y_train_sampled = san_y_train[random_indices]
chi_san_X_train = np.concatenate((lad(chi_X_train, 'Chicago'), san_X_train_sampled), axis=0)
chi_san_y_train = np.concatenate((chi_y_train, san_y_train_sampled), axis=0)

serialized_chi_san_train = make_serial(chi_san_X_train, chi_san_y_train)


random_indices = np.random.choice(lad(chi_X_train, 'Chicago').shape[0], 500, replace=False)
chi_X_train_sampled = lad(chi_X_train, 'Chicago')[random_indices]
chi_y_train_sampled = chi_y_train[random_indices]
san_chi_X_train = np.concatenate((lad(san_X_train, 'San Diego'), chi_X_train_sampled), axis=0)
san_chi_y_train = np.concatenate((san_y_train, chi_y_train_sampled), axis=0)

serialized_san_chi_train = make_serial(san_chi_X_train, san_chi_y_train)

with open('./serialized_chi_san_train.pkl', 'wb') as file: 
    pickle.dump(serialized_chi_san_train, file) 
with open('./serialized_san_chi_train.pkl', 'wb') as file: 
    pickle.dump(serialized_san_chi_train, file) 

zero-shot

In [38]:
from tqdm import tqdm
from sklearn.metrics import mean_squared_error, r2_score
from collections import Counter
import numpy as np
import re
import os

def extract_float(text):
    match = re.search(r"[-+]?\d*\.\d+", text)
    if match:
        return float(match.group(0))
    else:
        return None

def r44(value): return f"{round(value, 3):.3f}"

def r55(value): return f"{round(value, 4):.4f}"

def rrmse(t,p): return np.sqrt(1/len(t) * np.sum(np.square((t - p) / p)))

def metrics(y_test, y_pred, roundd = False):
    rrmse = np.sqrt(1/len(y_test) * np.sum(np.square((y_test - y_pred) / y_pred)))
    rmse = mean_squared_error(y_test, y_pred) ** (1/2)
    r2 = r2_score(y_test, y_pred)
    return rrmse, rmse, r2

def stid(source_name, target_name):
    return (source_name[0:3] + '-' + target_name[0:3]).lower()

def np_ratio(arr):
    C = Counter(arr)
    return C[0]/C[1]

def evaluator(test_data, model):
    generated_output = []
    real_output = []
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference
    for testcases in tqdm(test_data):
        inputs = tokenizer(
        [
          alpaca_prompt.format(
              testcases["instruction"], # instruction
              testcases["input"], # input
              "", # output - leave this blank for generation!
          )
      ], return_tensors = "pt").to("cuda")

        outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)

        outputs = tokenizer.batch_decode(outputs)[0]

        outputs = extract_float(outputs.split("### Response:")[1].strip())
        benchmark = extract_float(testcases["output"])

        generated_output.append(outputs)
        real_output.append(benchmark)

    valid_indices = [i for i, output in enumerate(generated_output) if output is not None]
    generated_output_filtered = np.array([generated_output[i] for i in valid_indices])
    real_output_filtered = np.array([real_output[i] for i in valid_indices])
    print('validated generations:', len(generated_output_filtered)/len(test_data))

    rrmse, rmse, r2 = metrics(real_output_filtered, generated_output_filtered)
    print(rrmse, rmse, r2)
    return real_output_filtered, generated_output_filtered

In [37]:
import os
import replicate
os.environ["REPLICATE_API_TOKEN"] = "r8_04rPEJPKNfBUADRXnqucmHpQmFAW5NU0Anovt"
api = replicate.Client(api_token=os.environ["REPLICATE_API_TOKEN"])

In [None]:
import replicate
for i in serialized_chi_test[:3]:
    input = {
            "top_p": 1,
            "prompt": "what name rhyme with my name? my name is Allie.",# i['input'] + 'Answer with a float number. Answer:\n',
            "temperature": 0.2,
            "system_prompt": "my name is Allie. tell me a name that rhyme with it",# i['instruction'] + " Predict the concentration of the bacteria using the information.",
            "max_new_tokens": 10,
            "repetition_penalty":1.25
        }
    output = ''.join(api.run("meta/meta-llama-3-8b",input=input))
    print(output)

In [45]:
serialized_chi_test[0]

{'instruction': 'We want to predict the log10 of bacteria concentration level in the beaches. you will receive environmental information and you should answer with a precise float number between -0.200 and 5.000',
 'input': 'Day of year: 0.0385. Wave Height (meters): 0.6579. Water Temperature (degree Celcius): 0.5805. Tide: 0.6233. Solar radiance (watts/meter squared): 0.0186. Did we have high rain fall the past 3 days? No. Did we have high rain fall the past 7 days? No. Log10 of cummulative rain in the last 3 days in inches: 0.6758. Log10 of cummulative rain in the last 3 days in inches: 0.5200. Is tide value more than the mean tide? Yes. Alongshire wind speed (meters/second): 0.6136. offshore wind speed (meters/second): 0.7862. ',
 'output': '3.3709'}

In [None]:

\begin{table*}[t]
\centering
        \caption{Regression results measured by rRMSE for transfer learning and domain adaptation between Chicago groups of beaches. An improvement over the transfer learning model is marked with \increase. Best domain adaptation method for each test set is showed using \textbf{bold} font. The lower rRMSE scores show a better performance. Timeline split using 2018 and prior for train/development and post 2018 for test.}
    \resizebox{1.8\columnwidth}!{
    \begin{tabular}{c|c|c|*{2}{c}|*{2}{c}|*{2}{c}||c|c}
        \toprule
        \multicolumn{3}{c}{\bf{Trained on}} & \multicolumn{2}{|c}{\bf{NB}} & \multicolumn{2}{|c}{\bf{CB}} & \multicolumn{2}{|c||}{\bf{SB}}& \multicolumn{1}{c}{\bf{Chicago}}& \multicolumn{1}{|c}{\bf{San Diego}}\\

        \midrule
        \bf{Model} & \multicolumn{2}{c|}{\bf{Approaches}} &  \multicolumn{1}{|c|}{\bf{CB}} & \bf{SB} & \multicolumn{1}{|c|}{\bf{NB}} & \multicolumn{1}{|c|}{\bf{SB}} & \bf{NB} & \multicolumn{1}{|c||}{\bf{CB}}& \multicolumn{1}{c}{\bf{San Diego}}& \multicolumn{1}{|c}{\bf{Chicago}}\\

        \midrule
        \multirow{6}{*}{RF} & \multicolumn{2}{c|}{Supervised} & \multicolumn{2}{c|}{NB--NB: 0.336} & \multicolumn{2}{c|}{CB--CB: 0.358}&\multicolumn{2}{c||}{SB--SB: 0.312}&\multicolumn{1}{c|}{Chi--Chi: 0.374}&\multicolumn{1}{c}{San--San: 0.597}\\

        \cmidrule{2-11}
        & \multicolumn{2}{c|}{TL} & 0.426&0.322&0.325&0.316&0.369&0.460&0.763&0.526\\
        
        \cmidrule{2-11}
        
        & \multirow{4}{*}{DA} & FA & 
        \dec{100}{\bf{0.343}}&\dec{40}{0.318}&\dec{0}{0.332}&\dec{0}{0.319}&\dec{100}{0.339}&\dec{100}{0.344}&\dec{100}{\bf{0.587}}&\dec{100}{0.373}\\

        & & BWT & 
        \dec{100}{0.383}&\dec{70}{0.315}&\dec{100}{\bf{0.314}}&\dec{100}{\bf{0.300}}&\dec{100}{0.343}&\dec{100}{0.401}&\dec{100}{\bf{0.587}}&\dec{100}{0.374}\\

        % \cline{3-11}
        & & CORAL &\dec{0}{0.473}&\dec{0}{0.328}&\dec{0}{0.326}&\dec{0}{0.323}&\dec{0}{0.369}&\dec{0}{0.477}&\dec{0}{0.771}&\dec{30}{0.523}\\

        % \cline{3-11}
        & & SA & \dec{0}{0.456}&\dec{0}{0.333}&\dec{0}{0.326}&\dec{80}{0.308}&\dec{100}{0.354}&\dec{100}{0.443}&\dec{0}{0.807}&\dec{100}{0.458}\\

        \midrule
        \multirow{6}{*}{XGBoost} & \multicolumn{2}{c|}{Supervised} & \multicolumn{2}{c|}{NB--NB: 0.341} & \multicolumn{2}{c|}{CB--CB: 0.402}&\multicolumn{2}{c||}{SB--SB: 0.353}&\multicolumn{1}{c|}{Chi--Chi: 0.379}&\multicolumn{1}{c}{San--San: 0.593}\\

        \cmidrule{2-11}
        
        & \multicolumn{2}{c|}{TL} &0.436&0.361&0.35&0.353&0.419&0.510&0.771&0.447\\
        \cmidrule{2-11}
        & \multirow{4}{*}{DA} & FA & 
        \dec{100}{0.394}&\dec{100}{0.340}&\dec{100}{0.332}&\dec{70}{0.346}&\dec{100}{0.336}&\dec{100}{0.368}&\dec{100}{0.621}&\dec{100}{\bf{0.362}}\\

        & & BWT & 
        \dec{100}{0.404}&\dec{100}{0.336}&\dec{69}{0.343}&\dec{100}{0.323}&\dec{100}{0.355}&\dec{100}{0.445}&\dec{100}{0.623}&\dec{100}{0.364}\\

        & & CORAL & 
        \dec{0}{0.468}&\dec{30}{0.358}&\dec{0}{0.359}&\dec{20}{0.352}&\dec{0}{0.424}&\dec{0}{0.513}&\dec{0}{0.806}&\dec{0}{0.450}\\

        % \cline{3-11}
        & & SA & 
        \dec{0}{0.456}&\dec{0}{0.362}&\dec{0}{0.366}&\dec{90}{0.344}&\dec{100}{0.372}&\dec{100}{0.473}&\dec{0}{0.820}&\dec{0}{0.487}\\
        \midrule

        \multirow{6}{*}{TabNet} & \multicolumn{2}{c|}{Supervised} & \multicolumn{2}{c|}{NB--NB: 0.364} & \multicolumn{2}{c|}{CB--CB: 0.358}&\multicolumn{2}{c||}{SB--SB: 0.471}&\multicolumn{1}{c|}{Chi--Chi: 0.424}&\multicolumn{1}{c}{San--San: 0.625}\\
        \cmidrule{2-11}
        & \multicolumn{2}{c|}{TL} & 0.443&0.325&0.334&0.319&0.386&0.471&1.177&0.693\\

        \cmidrule{2-11}
        & \multirow{4}{*}{DA} & FA & 
        \dec{50}{0.438}&\dec{20}{0.323}&\dec{0}{0.337}&\dec{60}{0.313}&\dec{100}{0.362}&\dec{100}{0.400}&\dec{100}{0.593}&\dec{100}{0.410}\\

        & & BWT & 
        \dec{100}{0.392}&\dec{0}{0.343}&\dec{0}{0.363}&\dec{0}{0.321}&\dec{0}{0.391}&\dec{100}{0.432}&\dec{100}{0.663}&\dec{100}{0.434}\\

        % \cline{3-11}
        & & CORAL & 
        \dec{0}{0.451}&\dec{50}{0.320}&\dec{0}{0.348}&\dec{0}{0.323}&\dec{0}{0.386}&\dec{59}{0.465}&\dec{100}{0.770}&\dec{100}{0.562}\\
 	 	 	 	 	 	 	 	
        % \cline{3-11}
        & & SA & 
        \dec{100}{0.421}&\dec{0}{0.326}&\dec{20}{0.333}&\dec{0}{0.332}&\dec{100}{0.368}&\dec{100}{0.425}&\dec{100}{0.820}&\dec{100}{0.532}\\
        \midrule

        \multirow{3}{*}{LLaMA-3 8B} & \multicolumn{2}{c|}{Supervised} & \multicolumn{2}{c|}{NB--NB: 0.360} & \multicolumn{2}{c|}{CB--CB: 0.383}&\multicolumn{2}{c||}{SB--SB: 0.360}&\multicolumn{1}{c|}{Chi--Chi: 0.397}&\multicolumn{1}{c}{San--San: 0.960}\\
        \cmidrule{2-11}
        
        & \multicolumn{2}{c|}{TL} & 0.390&0.350&0.353&0.344&0.364&0.386&0.785&0.860\\

        \cmidrule{2-11}
        & \multirow{1}{*}{DA} & FA &\dec{60}{0.384}&\dec{100}{0.319}&\dec{100}{0.340}&\dec{29}{0.341}&\dec{100}{0.347}&\dec{0}{0.391}&\dec{100}{0.771}&\dec{100}{0.742}\\
        \bottomrule
    \end{tabular}
    }

    \label{tab:reg2}
\end{table*}

