In [None]:
%pylab inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import repsci
import scipy.stats as spstats

In [None]:
exp = repsci.Experiment('28_plot_prod_perf')

In [None]:
df_combined = pd.read_csv('output/26_combine_data/2022-05-11 19:05:22 b813703/combined.csv')

In [None]:
df_combined["a_life_eff"] = df_combined["Articles_a"] / df_combined["stage_N_a"]
df_combined["b_life_eff"] = df_combined["Articles_b"] / df_combined["stage_N_b"]
df_combined["c_life_eff"] = df_combined["Articles_c"] / df_combined["stage_N_c"]

df_combined["comp_perf"] = (df_combined["to_fa"] + df_combined["to_ga"]) / df_combined["article_count"]

In [None]:
plt.figure(figsize=(6,6))

plt.subplot(2,2,1)
df = df_combined[df_combined["stage_N_a"] > 0]
df = df[df["a_life_eff"] > 0]
plt.hist(np.log10(df.a_life_eff), np.linspace(-4, 2, 25), zorder=2)
plt.xlim([-4, 2])
plt.xticks(range(-4, 4, 1))
plt.ylim([0,300])
plt.grid()
plt.ylabel('Project Count')
plt.xlabel('$log_{10}$ A-Productivity')

plt.subplot(2,2,2)
df = df_combined[df_combined["stage_N_b"] > 0]
df = df[df["b_life_eff"] > 0]
plt.hist(np.log10(df.b_life_eff), np.linspace(-4, 2, 25), zorder=2)
plt.xlim([-4, 2])
plt.xticks(range(-4, 4, 1))
plt.ylim([0,300])
plt.grid()
plt.ylabel('Project Count')
plt.xlabel('$log_{10}$ B-Productivity')

plt.subplot(2,2,3)
df = df_combined[df_combined["stage_N_c"] > 0]
df = df[df["c_life_eff"] > 0]
plt.hist(np.log10(df.c_life_eff), np.linspace(-4, 2, 25), zorder=2)
plt.xlim([-4, 2])
plt.xticks(range(-4, 4, 1))
plt.ylim([0,300])
plt.grid()
plt.ylabel('Project Count')
plt.xlabel('$log_{10}$ C-Productivity')

ax = plt.subplot(2,2,4)
df = df_combined[df_combined.comp_perf > 0]
plt.hist(np.log10(df.comp_perf), 25, zorder=2)
plt.xlim([-6, 0])
plt.xticks(range(-6, 2, 1))
plt.ylim([0,300])
plt.grid()
plt.ylabel('Project Count')
plt.xlabel('$log_{10}$ Performance')

plt.tight_layout()
plt.savefig(exp.get_filename('fig-eff-perf-hist.png'), dpi=600)
plt.savefig(exp.get_filename('fig-eff-perf-hist.eps'))

In [None]:
df = df_combined[df_combined["stage_N_b"] > 0]
df = df[df["comp_perf"] > 0]
df = df[df["b_life_eff"] > 0]

r, p = spstats.pearsonr(np.log10(df["b_life_eff"]), np.log10(df["comp_perf"]))
m, b = np.polyfit(np.log10(df["b_life_eff"]), np.log10(df["comp_perf"]), 1)
r,p

In [None]:
xmin = df["b_life_eff"].min()
xmax = df["b_life_eff"].max()
ymin = np.power(10, np.log10(xmin)*m+b)
ymax = np.power(10, np.log10(xmax)*m+b)
xmin,xmax,ymin,ymax

In [None]:
plt.figure(figsize=(3,3))
plt.loglog(df["b_life_eff"], df["comp_perf"], '.')
plt.loglog([xmin,xmax],[ymin,ymax],'g-', linewidth=2)
plt.ylabel("Performance")
plt.xlabel("B-Efficiency")
plt.title("r = %0.2f, p<0.001" % r)
plt.tick_params(axis='both', which='major')

plt.tight_layout()
plt.savefig(exp.get_filename('fig-perf-prod.png'), dpi=600)
plt.savefig(exp.get_filename('fig-perf-prod.eps'))