# Experimental Results for ECE

In [1]:
import pandas as pd
import os
from utils import *

In [2]:
res_columns = ['Calibrated on', 'Art', 'Clipart', 'Product', 'RealWorld', 'Average']

## Load the ECE Dataframe when source domain is included in calibration domains.

### KFold results of best Rho based on both Error and ECE

In [3]:
best_rho_er_in = best_rho_calib('Error', 'in', 'crossentropy')
best_rho_ece_in = best_rho_calib('ECE', 'in', 'crossentropy')

In [4]:
best_rho_er_in

{'Art': {'Product_RealWorld': 0.2,
  'Clipart_RealWorld': 0.2,
  'Clipart_Product': 0.2},
 'Clipart': {'Product_RealWorld': 0.3,
  'Art_RealWorld': 0.3,
  'Art_Product': 0.3},
 'Product': {'Clipart_RealWorld': 0.2,
  'Art_RealWorld': 0.2,
  'Art_Clipart': 0.3},
 'RealWorld': {'Clipart_Product': 0.2, 'Art_Product': 0.3, 'Art_Clipart': 0.2}}

In [5]:
ece_file_in = os.path.join('..', '..', 'results', 'OfficeHome', 'crossentropy', 'in_ECE_mean.csv')
ece_in = pd.read_csv(ece_file_in)
ece_in = get_res_df(ece_in, best_rho_er_in, best_rho_ece_in)

### Average the domain wise results for target domains
We also have stored results for std. Here we are abusing the statistics by taking the standard deviation over columns. We ran the 20 iterations over 500 randomly selected samples. By abusing we are treating these columns as single iteration results.

In [6]:
res_mean_in = ece_in[MAIN_COLUMNS+['Domain']][(ece_in['Valid'] == False) & (ece_in['Train'] == False)].groupby(['Domain']).mean().T
res_std_in = ece_in[MAIN_COLUMNS+['Domain']][(ece_in['Valid'] == False) & (ece_in['Train'] == False)].groupby(['Domain']).std().T

In [7]:
res_mean_in['Average'] = res_mean_in[['Art', 'Clipart', 'Product', 'RealWorld']].mean(axis=1)

Get the standard deviations for Average. The function is in utils.py

In [8]:
res_std_in['std'] = get_std(res_mean_in, res_std_in)

In [9]:
res_mean_in = res_mean_in.round(2)
res_std_in = res_std_in.round(2)

In [10]:
res_mean_in['Art'] = res_mean_in['Art'].astype(str) + '+' + res_std_in['Art'].astype(str)
res_mean_in['Clipart'] = res_mean_in['Clipart'].astype(str) + '+' + res_std_in['Clipart'].astype(str)
res_mean_in['Product'] = res_mean_in['Product'].astype(str) + '+' + res_std_in['Product'].astype(str)
res_mean_in['RealWorld'] = res_mean_in['RealWorld'].astype(str) + '+' + res_std_in['RealWorld'].astype(str)
res_mean_in['Average'] = res_mean_in['Average'].astype(str) + '+' + res_std_in['std'].astype(str)

#### Show the results

In [11]:
res_mean_in['Calibrated on'] = ['in'] * len(res_mean_in)
res_mean_in.rename_axis('Method', axis='columns', inplace=True)

In [12]:
res_mean_in[res_columns]

Method,Calibrated on,Art,Clipart,Product,RealWorld,Average
Uncalibrated,in,37.61+5.21,40.32+0.28,29.64+4.81,26.05+7.48,33.41+7.75
TS Source,in,18.12+5.42,24.44+5.38,11.64+5.34,11.25+0.74,16.36+7.15
TS Oracle,in,4.77+0.51,6.06+0.88,6.6+0.9,6.59+1.13,6.0+1.16
HB,in,17.13+1.33,20.42+5.33,13.08+2.48,9.76+1.12,15.1+5.07
Isotonic,in,12.82+2.11,18.77+5.59,9.41+2.15,7.81+0.83,12.2+5.28
Beta abm,in,9.48+1.41,15.44+5.17,6.99+1.05,7.51+1.67,9.86+4.4
Beta am,in,7.92+1.98,13.8+3.88,6.83+1.51,7.33+1.52,8.97+3.72
Beta ab,in,33.71+3.41,33.86+1.83,23.84+4.51,20.71+5.83,28.03+7.19
TS,in,8.24+3.15,15.87+1.4,7.73+3.09,8.77+2.91,10.15+4.3
Cluster NN,in,7.93+2.87,17.11+1.64,8.08+3.08,7.83+1.77,10.24+4.65


## Load the ECE Dataframe when source domain is not included in calibration domains.

### KFold results of best Rho based on both Error and ECE

In [15]:
best_rho_er_out = best_rho_calib('Error', 'out', 'crossentropy')
best_rho_ece_out = best_rho_calib('ECE', 'out', 'crossentropy')

In [16]:
ece_file_out = os.path.join('..', '..', 'results', 'OfficeHome', 'crossentropy', 'out_ECE_mean.csv')
ece_out = pd.read_csv(ece_file_out)
ece_out = get_res_df(ece_out, best_rho_er_out, best_rho_ece_out)

### Average the domain wise results for target domains when source domain not included in calibration domain
We also have stored results for std. Here we are abusing the statistics by taking the standard deviation over columns. We ran the 20 iterations over 500 randomly selected samples. By abusing we are treating these columns as single iteration results.

In [17]:
res_mean_out = ece_out[MAIN_COLUMNS+['Domain']][(ece_out['Valid'] == False) & (ece_out['Train'] == False)].groupby(['Domain']).mean().T
res_std_out = ece_out[MAIN_COLUMNS+['Domain']][(ece_out['Valid'] == False) & (ece_out['Train'] == False)].groupby(['Domain']).std().T

Get the standard deviations for Average. The function is in utils.py

In [18]:
res_mean_out['Average'] = res_mean_out[['Art', 'Clipart', 'Product', 'RealWorld']].mean(axis=1)
res_std_out['std'] = get_std(res_mean_out, res_std_out)

In [19]:
res_mean_out = res_mean_out.round(2)
res_std_out = res_std_out.round(2)

In [20]:
res_mean_out['Art'] = res_mean_out['Art'].astype(str) + '+' + res_std_out['Art'].astype(str)
res_mean_out['Clipart'] = res_mean_out['Clipart'].astype(str) + '+' + res_std_out['Clipart'].astype(str)
res_mean_out['Product'] = res_mean_out['Product'].astype(str) + '+' + res_std_out['Product'].astype(str)
res_mean_out['RealWorld'] = res_mean_out['RealWorld'].astype(str) + '+' + res_std_out['RealWorld'].astype(str)
res_mean_out['Average'] = res_mean_out['Average'].astype(str) + '+' + res_std_out['std'].astype(str)

#### Show the results

In [21]:
res_mean_out['Calibrated on'] = ['out'] * len(res_mean_out)
res_mean_out.rename_axis('Method', axis='columns', inplace=True)

In [22]:
res_mean_out[res_columns]

Method,Calibrated on,Art,Clipart,Product,RealWorld,Average
Uncalibrated,out,37.61+5.21,40.32+0.28,29.64+4.81,26.05+7.48,33.41+7.75
TS Source,out,18.12+5.42,24.44+5.38,11.64+5.34,11.25+0.74,16.36+7.15
TS Oracle,out,4.77+0.51,6.06+0.88,6.6+0.9,6.59+1.13,6.0+1.16
HB,out,15.7+1.88,17.72+1.61,12.38+2.52,8.88+0.42,13.67+3.8
Isotonic,out,12.56+3.62,15.87+3.89,8.49+1.47,8.71+1.09,11.41+4.14
Beta abm,out,8.98+2.63,12.76+3.55,8.1+2.43,10.34+3.26,10.04+3.48
Beta am,out,6.31+1.49,12.0+3.01,8.02+3.58,10.68+4.14,9.25+3.91
Beta ab,out,33.13+2.53,31.8+0.63,23.17+4.35,19.27+5.34,26.84+6.88
TS,out,6.32+2.18,12.99+1.58,10.71+5.88,12.89+5.06,10.73+4.92
Cluster NN,out,6.16+1.46,14.65+1.2,9.54+5.72,10.88+3.81,10.31+4.68


## Combined results when source domain included (in) and not included (out) in calibration domains

In [23]:
res = pd.concat([res_mean_in[res_columns], res_mean_out[res_columns]])

In [24]:
res

Method,Calibrated on,Art,Clipart,Product,RealWorld,Average
Uncalibrated,in,37.61+5.21,40.32+0.28,29.64+4.81,26.05+7.48,33.41+7.75
TS Source,in,18.12+5.42,24.44+5.38,11.64+5.34,11.25+0.74,16.36+7.15
TS Oracle,in,4.77+0.51,6.06+0.88,6.6+0.9,6.59+1.13,6.0+1.16
HB,in,17.13+1.33,20.42+5.33,13.08+2.48,9.76+1.12,15.1+5.07
Isotonic,in,12.82+2.11,18.77+5.59,9.41+2.15,7.81+0.83,12.2+5.28
Beta abm,in,9.48+1.41,15.44+5.17,6.99+1.05,7.51+1.67,9.86+4.4
Beta am,in,7.92+1.98,13.8+3.88,6.83+1.51,7.33+1.52,8.97+3.72
Beta ab,in,33.71+3.41,33.86+1.83,23.84+4.51,20.71+5.83,28.03+7.19
TS,in,8.24+3.15,15.87+1.4,7.73+3.09,8.77+2.91,10.15+4.3
Cluster NN,in,7.93+2.87,17.11+1.64,8.08+3.08,7.83+1.77,10.24+4.65


### Average the domain wise results for source domains
We also have stored results for std. Here we are abusing the statistics by taking the standard deviation over columns. We ran the 20 iterations over 500 randomly selected samples. By abusing we are treating these columns as single iteration results.

In [75]:
res_mean_in = ece_in[MAIN_COLUMNS+['Domain']][(ece_in['Train'] == True)].groupby(['Domain']).mean().T
res_std_in = ece_in[MAIN_COLUMNS+['Domain']][(ece_in['Train'] == True)].groupby(['Domain']).std().T

In [76]:
res_mean_in['Average'] = res_mean_in[['Art', 'Clipart', 'Product', 'RealWorld']].mean(axis=1)

Get the standard deviations for Average. The function is in utils.py

In [77]:
res_std_in['std'] = get_std(res_mean_in, res_std_in)

In [78]:
res_mean_in = res_mean_in.round(2)
res_std_in = res_std_in.round(2)

In [79]:
res_mean_in['Art'] = res_mean_in['Art'].astype(str) + '+' + res_std_in['Art'].astype(str)
res_mean_in['Clipart'] = res_mean_in['Clipart'].astype(str) + '+' + res_std_in['Clipart'].astype(str)
res_mean_in['Product'] = res_mean_in['Product'].astype(str) + '+' + res_std_in['Product'].astype(str)
res_mean_in['RealWorld'] = res_mean_in['RealWorld'].astype(str) + '+' + res_std_in['RealWorld'].astype(str)
res_mean_in['Average'] = res_mean_in['Average'].astype(str) + '+' + res_std_in['std'].astype(str)

In [80]:
res_mean_in['Calibrated on'] = ['in'] * len(res_mean_in)
res_mean_in.rename_axis('Method', axis='columns', inplace=True)

In [81]:
res_mean_in[res_columns]

Method,Calibrated on,Art,Clipart,Product,RealWorld,Average
Uncalibrated,in,0.11+0.0,0.63+0.0,0.1+0.0,2.42+0.0,0.82+0.95
TS Source,in,0.23+0.0,2.74+0.0,0.18+0.0,13.55+0.0,4.17+5.51
TS Oracle,in,0.15+0.0,0.58+0.0,0.08+0.0,0.71+0.0,0.38+0.27
HB,in,21.98+1.44,15.72+1.08,10.98+2.41,18.29+1.8,16.74+4.37
Isotonic,in,1.55+0.65,3.15+0.19,0.58+0.15,9.7+1.21,3.74+3.63
Beta abm,in,13.14+3.49,6.22+0.25,2.65+0.67,16.48+1.7,9.62+5.81
Beta am,in,4.79+2.18,7.6+0.84,2.77+1.39,20.49+4.41,8.91+7.37
Beta ab,in,14.41+2.17,7.69+0.42,4.96+1.44,13.19+0.97,10.06+4.13
TS,in,2.21+1.21,7.45+0.85,2.11+1.15,23.48+5.16,8.81+9.16
Cluster NN,in,2.16+0.99,6.61+0.59,2.11+1.11,22.86+5.38,8.44+8.97


### Average the domain wise results for source domains when source domain not included in calibration domain
We also have stored results for std. Here we are abusing the statistics by taking the standard deviation over columns. We ran the 20 iterations over 500 randomly selected samples. By abusing we are treating these columns as single iteration results.

In [82]:
res_mean_out = ece_out[MAIN_COLUMNS+['Domain']][(ece_out['Train'] == True)].groupby(['Domain']).mean().T
res_std_out = ece_out[MAIN_COLUMNS+['Domain']][(ece_out['Train'] == True)].groupby(['Domain']).std().T

Get the standard deviations for Average. The function is in utils.py

In [83]:
res_mean_out['Average'] = res_mean_out[['Art', 'Clipart', 'Product', 'RealWorld']].mean(axis=1)
res_std_out['std'] = get_std(res_mean_out, res_std_out)

In [84]:
res_mean_out = res_mean_out.round(2)
res_std_out = res_std_out.round(2)
res_mean_out['Art'] = res_mean_out['Art'].astype(str) + '+' + res_std_out['Art'].astype(str)
res_mean_out['Clipart'] = res_mean_out['Clipart'].astype(str) + '+' + res_std_out['Clipart'].astype(str)
res_mean_out['Product'] = res_mean_out['Product'].astype(str) + '+' + res_std_out['Product'].astype(str)
res_mean_out['RealWorld'] = res_mean_out['RealWorld'].astype(str) + '+' + res_std_out['RealWorld'].astype(str)
res_mean_out['Average'] = res_mean_out['Average'].astype(str) + '+' + res_std_out['std'].astype(str)

#### Show the results

In [85]:
res_mean_out['Calibrated on'] = ['out'] * len(res_mean_out)
res_mean_out.rename_axis('Method', axis='columns', inplace=True)

In [86]:
res_mean_out[res_columns]

Method,Calibrated on,Art,Clipart,Product,RealWorld,Average
Uncalibrated,out,0.11+0.0,0.63+0.0,0.1+0.0,2.42+0.0,0.82+0.95
TS Source,out,0.23+0.0,2.74+0.0,0.18+0.0,13.55+0.0,4.17+5.51
TS Oracle,out,0.15+0.0,0.58+0.0,0.08+0.0,0.71+0.0,0.38+0.27
HB,out,25.11+1.32,17.8+2.26,18.2+5.49,16.45+1.3,19.39+4.58
Isotonic,out,1.74+0.97,2.25+0.29,0.96+0.68,9.92+1.36,3.72+3.72
Beta abm,out,21.49+7.18,17.65+3.32,16.07+3.8,23.72+6.84,19.73+6.33
Beta am,out,7.69+3.87,13.92+3.46,10.69+7.25,27.44+8.28,14.93+9.69
Beta ab,out,22.44+5.36,17.55+1.24,14.2+1.33,15.37+1.72,17.39+4.32
TS,out,3.7+2.27,11.53+2.42,5.3+4.22,30.11+9.26,12.66+11.78
Cluster NN,out,3.05+1.61,9.66+1.49,4.54+3.24,28.47+8.25,11.43+11.12


## Combined results when source domain included (in) and not included (out) in calibration domains

In [87]:
res = pd.concat([res_mean_in[res_columns], res_mean_out[res_columns]])

In [88]:
res

Method,Calibrated on,Art,Clipart,Product,RealWorld,Average
Uncalibrated,in,0.11+0.0,0.63+0.0,0.1+0.0,2.42+0.0,0.82+0.95
TS Source,in,0.23+0.0,2.74+0.0,0.18+0.0,13.55+0.0,4.17+5.51
TS Oracle,in,0.15+0.0,0.58+0.0,0.08+0.0,0.71+0.0,0.38+0.27
HB,in,21.98+1.44,15.72+1.08,10.98+2.41,18.29+1.8,16.74+4.37
Isotonic,in,1.55+0.65,3.15+0.19,0.58+0.15,9.7+1.21,3.74+3.63
Beta abm,in,13.14+3.49,6.22+0.25,2.65+0.67,16.48+1.7,9.62+5.81
Beta am,in,4.79+2.18,7.6+0.84,2.77+1.39,20.49+4.41,8.91+7.37
Beta ab,in,14.41+2.17,7.69+0.42,4.96+1.44,13.19+0.97,10.06+4.13
TS,in,2.21+1.21,7.45+0.85,2.11+1.15,23.48+5.16,8.81+9.16
Cluster NN,in,2.16+0.99,6.61+0.59,2.11+1.11,22.86+5.38,8.44+8.97
