# Comparison of algorithm performance

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set(color_codes=True)
sns.set_style("whitegrid")
sns.set(rc={'figure.figsize':(13,10)})

pd.set_option('display.max_columns', 999)

In [None]:
df = pd.read_csv('~/EDA_notebooks/Input/algorithm_comparison.csv')

In [None]:
df = df.append(pd.Series(['RF-7-MLI', 'RF', 7, 'HD, LD', 'MLI', -2.52, '' ,0.569], index=df.columns ), ignore_index=True)

In [None]:
df = df.append(pd.Series(['NNSL-7-MLI-HL10', 'NN-SL', 7, 'HD, LD', 'MLI', -2.673, '' ,0.581], index=df.columns ), ignore_index=True)

In [None]:
df = df.append(pd.Series(['NNSL-7-10k-Chr1-HL10', 'NN-SL', 7, 'HD, LD', 'Chr1', 0.522, '' ,0.088], index=df.columns ), ignore_index=True)
df

In [None]:
rf = df[df['Algorithm']=='RF']
rf

In [None]:
rf['CVgroup'] = [1,1,1,1,1,1,2,2,2,2,2,3,3,2] 
rf

In [None]:
rf_sort = rf.groupby('CVgroup').apply(pd.DataFrame.sort_values, 'Rsqu')
rf_sort

In [None]:
# https://gist.github.com/pfandzelter/0ae861f0dee1fb4fd1d11344e3f85c9e
sns.set(rc={'figure.figsize':(13,6)})
sns.set_style("whitegrid")

f, (ax1, ax2) = plt.subplots(ncols=1, nrows=2,
                             sharex=True)

ax1 = sns.barplot(x='Name', y='Rsqu',
                  data=rf_sort, palette='viridis', ax=ax1)
ax2 = sns.barplot(x='Name', y='Rsqu',
                  data=rf_sort, palette='viridis', ax=ax2)

ax1.set_ylim(-3, 1.5)
ax2.set_ylim(-7.4, -6)
ax1.get_xaxis().set_visible(False)
ax1.set_ylabel("")
ax2.set_ylabel("")

# adjust:
f.text(0.08, 0.55, 'R² values', va='center', rotation='vertical', size=16)

ax1.xaxis.tick_top()
ax2.xaxis.tick_bottom()

f.subplots_adjust(left=0.15, right=0.85, bottom=0.15, top=0.85)

plt.setp(ax2.get_xticklabels(), rotation=30, ha="right",
             rotation_mode="anchor") 

plt.xlabel("RF models", size=16)
ax1.set_title('CV by test-train split | location | chromosome', size = 16, pad=10)

d = .01  # how big to make the diagonal lines in axes coordinates
kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
ax1.plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal

# separation between groups
ax2.vlines(x=5.5, ymin=-33.5, ymax=-1, ls='--', lw=2)
ax1.vlines(x=5.5, ymin=-5, ymax=1.5, ls='--', lw=2)

ax2.vlines(x=10.5, ymin=-33.5, ymax=-1, ls='--', lw=2)
ax1.vlines(x=10.5, ymin=-5, ymax=1.5, ls='--', lw=2)

f.suptitle('Random forest models comparison', size = 20)
f.show()
#f.savefig('RF_models.png', bbox_inches='tight')

In [None]:
# replace negatives with zero
rf_sort['Rsqu'][rf_sort['Rsqu'] < 0] = 0
rf_sort

In [None]:
sns.set(rc={'figure.figsize':(13,6)})
sns.set_style("whitegrid")


fig = sns.barplot(x='Name', y='Rsqu',
                  data=rf_sort, palette='viridis')

fig.set_ylabel("") # remove original y-label
fig.text(-2, 0.6, 'R² values', va='center', rotation='vertical', size=16) # add new one and adjust position

#ax1.xaxis.tick_top()
fig.xaxis.tick_bottom()
plt.setp(fig.get_xticklabels(), rotation=30, ha="right",
             rotation_mode="anchor") 

plt.xlabel("RF models", size=16)
#plt.title('CV by test-train split | location | chromosome', size = 16, pad=20)

fig.text(1.2, 1.1, 'CV | train-test-split', va='center', size=12)
fig.text(7, 1.1, 'CV | one location', va='center', size=12)
fig.text(11.6, 1.1, 'CV | chromosome 1', va='center', size=12)


# separation between groups
fig.vlines(x=5.5, ymin=0, ymax=1.2, ls='--', lw=2)
fig.vlines(x=11.5, ymin=0, ymax=1.2, ls='--', lw=2)

plt.suptitle('Random forest models comparison', size = 20)
#plt.savefig('RF_models_v2.png', bbox_inches='tight')

In [None]:
nn =df[df['Algorithm']!='RF']
nn

In [None]:
nn['CVgroup'] = [1,1,1,1,1,1,3,2,1,3,2,2,3] 
nn

In [None]:
nn_sort = nn.groupby('CVgroup').apply(pd.DataFrame.sort_values, 'Rsqu')
nn_sort

In [None]:
# https://gist.github.com/pfandzelter/0ae861f0dee1fb4fd1d11344e3f85c9e
sns.set(rc={'figure.figsize':(13,6)})
sns.set_style("whitegrid")

f, (ax1, ax2) = plt.subplots(ncols=1, nrows=2,
                             sharex=True)

ax1 = sns.barplot(x='Name', y='Rsqu',
                  data=nn_sort, palette='viridis', ax=ax1)
ax2 = sns.barplot(x='Name', y='Rsqu',
                  data=nn_sort, palette='viridis', ax=ax2)

ax1.set_ylim(-4.5, 1)
ax2.set_ylim(-33.5, -28)
ax1.get_xaxis().set_visible(False)
ax1.set_ylabel("")
ax2.set_ylabel("")

# adjust:
f.text(0.08, 0.55, 'R² values', va='center', rotation='vertical', size=16)

ax1.xaxis.tick_top()
ax2.xaxis.tick_bottom()

f.subplots_adjust(left=0.15, right=0.85, bottom=0.15, top=0.85)

plt.setp(ax2.get_xticklabels(), rotation=30, ha="right",
             rotation_mode="anchor")    
    
plt.xlabel("NN models", size=16)
ax1.set_title('CV by test-train split | location | chromosome', size = 16, pad=10)

d = .01  # how big to make the diagonal lines in axes coordinates
kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
ax1.plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal

# separation between groups
ax2.vlines(x=6.5, ymin=-33.5, ymax=-1, ls='--', lw=2)
ax1.vlines(x=6.5, ymin=-5, ymax=1, ls='--', lw=2)

ax2.vlines(x=8.5, ymin=-33.5, ymax=-1, ls='--', lw=2)
ax1.vlines(x=8.5, ymin=-5, ymax=1, ls='--', lw=2)

f.suptitle('Neural network models comparison', size = 20)
f.show()
#f.savefig('NN_models.png', bbox_inches='tight')

In [None]:
# replace negatives with zero
nn_sort['Rsqu'][nn_sort['Rsqu'] < 0] = 0
nn_sort

In [None]:
sns.set(rc={'figure.figsize':(13,6)})
sns.set_style("whitegrid")


fig = sns.barplot(x='Name', y='Rsqu',
                  data=nn_sort, palette='viridis')

fig.set_ylabel("") # remove original y-label
fig.text(-2, 0.6, 'R² values', va='center', rotation='vertical', size=16) # add new one and adjust position

#ax1.xaxis.tick_top()
fig.xaxis.tick_bottom()
plt.setp(fig.get_xticklabels(), rotation=30, ha="right",
             rotation_mode="anchor") 

plt.xlabel("NN models", size=16)
#plt.title('CV by test-train split | location | chromosome', size = 16, pad=20)

fig.text(1.4, 1.1, 'CV | train-test-split', va='center', size=12)
fig.text(7, 1.1, 'CV | one location', va='center', size=12)
fig.text(10.1, 1.1, 'CV | chromosome 1', va='center', size=12)


# separation between groups
fig.vlines(x=6.5, ymin=0, ymax=1.2, ls='--', lw=2)
fig.vlines(x=9.5, ymin=0, ymax=1.2, ls='--', lw=2)

plt.suptitle('Neural network models comparison', size = 20)
plt.savefig('NN_models_v2.png', bbox_inches='tight')