# Setup

In [29]:
import warnings
warnings.filterwarnings('ignore')
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import root_mean_squared_error, r2_score
import numpy as np
from pandas.api.types import CategoricalDtype

# Reduced model

In [20]:
# Read data
test_rf = pd.read_csv('output/regression_tuning/test_results_rf.csv')
reduced_rf = pd.read_csv('output/regression_reduced/test_results_reduced.csv')

In [22]:
r1 = {
    'model': 'Outdoor',
    'assets_r2': round(r2_score(test_rf.assets_true, test_rf.assets_outdoor), 2),
    'assets_p': round(spearmanr(test_rf.assets_true, test_rf.assets_outdoor)[0], 2),
    'assets_rmse': round(root_mean_squared_error(test_rf.assets_true, test_rf.assets_outdoor), 2),
    'exp_r2': round(r2_score(test_rf.exp_true, test_rf.exp_outdoor), 2),
    'exp_p': round(spearmanr(test_rf.exp_true, test_rf.exp_outdoor)[0], 2),
    'exp_rmse': round(root_mean_squared_error(test_rf.exp_true, test_rf.exp_outdoor), 2),
    'inc_r2': round(r2_score(test_rf.inc_true, test_rf.inc_outdoor), 2),
    'inc_p': round(spearmanr(test_rf.inc_true, test_rf.inc_outdoor)[0], 2),
    'inc_rmse': round(root_mean_squared_error(test_rf.inc_true, test_rf.inc_outdoor), 2),
}
r2 = {
    'model': 'Reduced',
    'assets_r2': round(r2_score(reduced_rf.assets_true, reduced_rf.assets_red), 2),
    'assets_p': round(spearmanr(reduced_rf.assets_true, reduced_rf.assets_red)[0], 2),
    'assets_rmse': round(root_mean_squared_error(reduced_rf.assets_true, reduced_rf.assets_red), 2),
    'exp_r2': round(r2_score(reduced_rf.exp_true, reduced_rf.exp_red), 2),
    'exp_p': round(spearmanr(reduced_rf.exp_true, reduced_rf.exp_red)[0], 2),
    'exp_rmse': round(root_mean_squared_error(reduced_rf.exp_true, reduced_rf.exp_red), 2),
    'inc_r2': round(r2_score(reduced_rf.inc_true, reduced_rf.inc_red), 2),
    'inc_p': round(spearmanr(reduced_rf.inc_true, reduced_rf.inc_red)[0], 2),
    'inc_rmse': round(root_mean_squared_error(reduced_rf.inc_true, reduced_rf.inc_red), 2),
}
r3 = {
    'model': 'Complete',
    'assets_r2': round(r2_score(test_rf.assets_true, test_rf.assets_all), 2),
    'assets_p': round(spearmanr(test_rf.assets_true, test_rf.assets_all)[0], 2),
    'assets_rmse': round(root_mean_squared_error(test_rf.assets_true, test_rf.assets_all), 2),
    'exp_r2': round(r2_score(test_rf.exp_true, test_rf.exp_all), 2),
    'exp_p': round(spearmanr(test_rf.exp_true, test_rf.exp_all)[0], 2),
    'exp_rmse': round(root_mean_squared_error(test_rf.exp_true, test_rf.exp_all), 2),
    'inc_r2': round(r2_score(test_rf.inc_true, test_rf.inc_all), 2),
    'inc_p': round(spearmanr(test_rf.inc_true, test_rf.inc_all)[0], 2),
    'inc_rmse': round(root_mean_squared_error(test_rf.inc_true, test_rf.inc_all), 2),
}
results = pd.concat([pd.DataFrame(r1, index=[0]), pd.DataFrame(r2, index=[0]), pd.DataFrame(r3, index=[0])])
results

Unnamed: 0,model,assets_r2,assets_p,assets_rmse,exp_r2,exp_p,exp_rmse,inc_r2,inc_p,inc_rmse
0,Outdoor,0.5,0.7,0.37,0.24,0.45,5162.62,0.1,0.41,11735.23
0,Reduced,0.62,0.78,0.32,0.28,0.51,5032.7,0.14,0.43,11427.49
0,Complete,0.7,0.85,0.28,0.38,0.57,4684.25,0.16,0.5,11323.67


In [39]:
results2 = pd.melt(results, ['model'])

cond1 = [
    results2['variable'].str.contains('assets', case=False, na=False),
    results2['variable'].str.contains('exp', case=False, na=False),
    results2['variable'].str.contains('inc', case=False, na=False)
]
choice1 = ['Assets', 'Expenditure', 'Income']
cond2 = [
    results2['variable'].str.contains('_p', case=False, na=False),
    results2['variable'].str.contains('_r2', case=False, na=False),
    results2['variable'].str.contains('rmse', case=False, na=False)
]
choice2 = ['p', 'R2', 'RMSE']
cond3 = [
    results2['model'].str.contains('Reduced', case=False, na=False),
    results2['model'].str.contains('Outdoor', case=False, na=False),
    results2['model'].str.contains('Complete', case=False, na=False)
]
choice3 = ['Reduced', 'Outdoor', 'Complete']
results2['Outcome'] = np.select(cond1, choice1, default='other')
results2['Metric'] = np.select(cond2, choice2, default='other')
results2['Features'] = np.select(cond3, choice3, default='other')
results2 = results2.drop(columns = ['model', 'variable'])
results2 = results2.pivot(index=['Outcome', 'Features'], columns='Metric', values='value').reset_index()
results2 = results2[['Outcome', 'Features', 'p', 'R2', 'RMSE']]

custom_order = ['Outdoor', 'Reduced', 'Complete']
cat_type = CategoricalDtype(categories = custom_order, ordered=True)
results2['Features'] = results2['Features'].astype(cat_type)
results2 = results2.sort_values(by = ['Outcome', 'Features'])
results2 = results2.style.format(decimal='.', thousands=',', precision=2).hide(axis="index")

results2

Outcome,Features,p,R2,RMSE
Assets,Outdoor,0.7,0.5,0.37
Assets,Reduced,0.78,0.62,0.32
Assets,Complete,0.85,0.7,0.28
Expenditure,Outdoor,0.45,0.24,5162.62
Expenditure,Reduced,0.51,0.28,5032.7
Expenditure,Complete,0.57,0.38,4684.25
Income,Outdoor,0.41,0.1,11735.23
Income,Reduced,0.43,0.14,11427.49
Income,Complete,0.5,0.16,11323.67


In [40]:
print(results2.to_latex())

\begin{tabular}{llrrr}
Outcome & Features & p & R2 & RMSE \\
Assets & Outdoor & 0.70 & 0.50 & 0.37 \\
Assets & Reduced & 0.78 & 0.62 & 0.32 \\
Assets & Complete & 0.85 & 0.70 & 0.28 \\
Expenditure & Outdoor & 0.45 & 0.24 & 5,162.62 \\
Expenditure & Reduced & 0.51 & 0.28 & 5,032.70 \\
Expenditure & Complete & 0.57 & 0.38 & 4,684.25 \\
Income & Outdoor & 0.41 & 0.10 & 11,735.23 \\
Income & Reduced & 0.43 & 0.14 & 11,427.49 \\
Income & Complete & 0.50 & 0.16 & 11,323.67 \\
\end{tabular}

