# Extended Data Figure 7

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as sp_stats
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import ndimage

## Extended Data Figure 5a-c

### Load Source Data

In [None]:
data_figext7ac_prf = pd.read_csv('../source-data/data-figext7ac-forecast-performance.csv').set_index('NP_code')
data_figext7ac_prf

### Model Performance Comparisons

In [None]:
sns.set_palette(['blue', 'gold'])

### PRECISION
plt.figure(figsize=(4,6), dpi=300)
ax = plt.subplot(111)
ax = sns.violinplot(x='Model', y='PRECISION', cut=2, split=False, data=data_figext7ac_prf, ax=ax)
ax = sns.stripplot(x='Model', y='PRECISION', jitter=False, color='k', data=data_figext7ac_prf, ax=ax)
ax = sns.lineplot(x='Model', y='PRECISION', hue='NP_code', palette='Greys', data=data_figext7ac_prf.reset_index(), ax=ax)
ax.text(0.4, 1.1, 't={:0.2f}\np={:0.3f}'.format(
    *sp_stats.ttest_rel(data_figext7ac_prf[data_figext7ac_prf['Model'] == 'FC-Based Model']['PRECISION'],
                        data_figext7ac_prf[data_figext7ac_prf['Model'] == 'IEA Cycle-Based Model']['PRECISION'])))
ax.legend_.remove()
ax.set_xlim([-0.5, 1.5])
ax.set_ylim([-0.25, 1.25])
plt.show()

### RECALL
plt.figure(figsize=(4,6), dpi=300)
ax = plt.subplot(111)
ax = sns.violinplot(x='Model', y='RECALL', cut=2, split=False, data=data_figext7ac_prf, ax=ax)
ax = sns.stripplot(x='Model', y='RECALL', jitter=False, color='k', data=data_figext7ac_prf, ax=ax)
ax = sns.lineplot(x='Model', y='RECALL', hue='NP_code', palette='Greys', data=data_figext7ac_prf.reset_index(), ax=ax)
ax.text(0.4, 1.1, 't={:0.2f}\np={:0.3f}'.format(
    *sp_stats.ttest_rel(data_figext7ac_prf[data_figext7ac_prf['Model'] == 'FC-Based Model']['RECALL'],
                        data_figext7ac_prf[data_figext7ac_prf['Model'] == 'IEA Cycle-Based Model']['RECALL'])))
ax.legend_.remove()
ax.set_xlim([-0.5, 1.5])
ax.set_ylim([-0.25, 1.25])
plt.show()

### F1
plt.figure(figsize=(4,6), dpi=300)
ax = plt.subplot(111)
ax = sns.violinplot(x='Model', y='F1', cut=2, split=False, data=data_figext7ac_prf, ax=ax)
ax = sns.stripplot(x='Model', y='F1', jitter=False, color='k', data=data_figext7ac_prf, ax=ax)
ax = sns.lineplot(x='Model', y='F1', hue='NP_code', palette='Greys', data=data_figext7ac_prf.reset_index(), ax=ax)
ax.text(0.4, 1.1, 't={:0.2f}\np={:0.3f}'.format(
    *sp_stats.ttest_rel(data_figext7ac_prf[data_figext7ac_prf['Model'] == 'FC-Based Model']['F1'],
                        data_figext7ac_prf[data_figext7ac_prf['Model'] == 'IEA Cycle-Based Model']['F1'])))
ax.legend_.remove()
ax.set_xlim([-0.5, 1.5])
ax.set_ylim([-0.25, 1.25])
plt.show()

## Extended Data Figure 5d

### Load Source Data

In [None]:
data_figext7d_roc = pd.read_csv('../source-data/data-figext7d-forecast-roc.csv').set_index('NP_code')
data_figext7d_roc

### ROC Curve

In [None]:
sns.set_palette(['gray', 'blue', 'gold'])

roc_mu = data_figext7d_roc.groupby(['Model', 'bins']).mean().reset_index().dropna()
roc_sd = data_figext7d_roc.groupby(['Model', 'bins']).sem().reset_index().dropna()

plt.figure(figsize=(4,4), dpi=300)
ax = plt.subplot(111)
for typ in ['Shuffled Baseline', 'FC-Based Model', 'IEA Cycle-Based Model']:
    ax.plot(roc_mu[roc_mu['Model'] == typ].sort_values(by='fpr')['fpr'],
            roc_mu[roc_mu['Model'] == typ].sort_values(by='fpr')['tpr'], label=typ)
    ax.fill_between(roc_mu[roc_mu['Model'] == typ].sort_values(by='fpr')['fpr'],
                    roc_mu[roc_mu['Model'] == typ].sort_values(by='fpr')['tpr'] - roc_sd[roc_sd['Model'] == typ].sort_values(by='fpr')['tpr'],
                    roc_mu[roc_mu['Model'] == typ].sort_values(by='fpr')['tpr'] + roc_sd[roc_sd['Model'] == typ].sort_values(by='fpr')['tpr'], alpha=0.2)
                    
ax.set_xlim([-0.01, 1.01])
ax.set_ylim([-0.01, 1.01])
ax.plot((0,1), (0,1), color='k', linestyle='--', linewidth=0.5)
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.legend()
plt.show()