In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sfma.api import SFMAModel

## Make Simulations

In [None]:
np.random.seed(1)

In [None]:
n = 100

In [None]:
intercept = 1.5
slope = 5

x_domain = [0, 10]

ineff = 0.4

sample_size_1 = [1000, 0.7]
sample_size_2 = [1000, 0.3]

In [None]:
def frontier(x):
    return np.log(intercept + slope * x)

In [None]:
def simulate():
    x = np.random.uniform(low=x_domain[0], high=x_domain[1], size=n)
    sample_sizes_1 = np.random.negative_binomial(
        n=sample_size_1[0], p=sample_size_1[1], size=int(n / 2)
    )
    sample_sizes_2 = np.random.negative_binomial(
        n=sample_size_2[0], p=sample_size_2[1], size=int(n / 2)
    )
    sample_sizes = np.append(sample_sizes_1, sample_sizes_2)
    
    the_frontier = frontier(x)
    inefficiency = np.random.exponential(ineff, size=n)
    means = the_frontier - inefficiency
    samples = [np.random.normal(m, scale=4, size=s) for m, s in zip(means, sample_sizes)]
    est_means = np.array([np.mean(s) for s in samples])
    est_sterr = np.array([np.sqrt(np.sum(sum((s - np.mean(s))**2)) / ((len(s) - 1)))/np.sqrt(len(s)) for s in samples])
    df = pd.DataFrame({
        'output': est_means,
        'se': est_sterr,
        'input': x,
        'ones': np.ones(len(x)),
        'frontier': the_frontier,
        'truth': means,
        'sample_size': sample_sizes
    })
    return df

In [None]:
sim = simulate()

In [None]:
the_frontier = sim['frontier']
linspace = np.linspace(x_domain[0], x_domain[1])
front = frontier(linspace)

In [None]:
plt.plot(linspace, front, linestyle='solid')
plt.scatter(sim.input, sim.output)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
ax.plot(linspace, front, linestyle='solid')
ax.scatter(sim.input, sim.output, color='orange')
ax.errorbar(sim.input, sim.output, yerr=sim.se, linestyle='None')

In [None]:
model = SFMAModel(
    df=sim,
    col_output='output',
    col_se='se',
    col_input='input',
    include_gamma=True
)
# no_se_model = SFMAModel(
#     df=sim,
#     col_output='output',
#     col_se='ones',
#     col_input='input',
#     include_gamma=True
# )
# lin_tails = SFMAModel(
#     df=sim,
#     col_output='output',
#     col_se='se',
#     col_input='input',
#     r_linear=True,
#     include_gamma=True
# )
# concave = SFMAModel(
#     df=sim,
#     col_output='output',
#     col_se='se',
#     col_input='input',
#     r_linear=True,
#     concave=True,
#     include_gamma=True
# )

In [None]:
model.fit(options={'solver_options': {}})
# no_se_model.fit(options={'solver_options': {'method': 'trust-constr'}})
# lin_tails.fit(options={'solver_options': {}})
# concave.fit(options={'solver_options': {}})

In [None]:
sim['base_predictions'] = model.predict()
#sim['no_se_predictions'] = no_se_model.predict()
#sim['linear_tail_predictions'] = lin_tails.predict()
#sim['concave_predictions'] = concave.predict()
sim.sort_values('input', inplace=True)

In [None]:
model.solver.x_opt

In [None]:
model.solver.result

In [None]:
lin_tails.inefficiencies

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(16, 8))
axes.plot(linspace, front, linestyle='dashed', color='black')
axes.scatter(sim.input, sim.output, color='grey', alpha=0.4)
axes.errorbar(sim.input, sim.output, yerr=sim.se, linestyle='None', color='grey', alpha=0.4)
axes.plot(sim.input, sim.base_predictions, color='red')
# axes.plot(sim.input, sim.no_se_predictions, color='blue')
# axes.plot(sim.input, sim.linear_tail_predictions, color='purple')
# axes.plot(sim.input, sim.concave_predictions, color='green')

## Simulations

In [None]:
num_simulations = 10

In [None]:
sim_dfs = []
for i in range(num_simulations):
    sim_i = simulate()
    model = SFMAModel(
        df=sim_i,
        col_output='output',
        col_se='se',
        col_input='input',
    )
    no_se_model = SFMAModel(
        df=sim_i,
        col_output='output',
        col_se='ones',
        col_input='input'
    )
    lin_tails = SFMAModel(
        df=sim_i,
        col_output='output',
        col_se='se',
        col_input='input',
        r_linear=True
    )
    model.fit()
    no_se_model.fit()
    lin_tails.fit()
    sim_i['base_predictions'] = model.predict()
    sim_i['no_se_predictions'] = no_se_model.predict()
    sim_i['linear_tail_predictions'] = lin_tails.predict()
    sim_i.sort_values('input', inplace=True)
    sim_dfs.append(sim_i)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(24, 8))
for i, kind in enumerate(['no_se_predictions',
                         'base_predictions',
                         'linear_tail_predictions']):
    axes[i].plot(linspace, front, linestyle='dashed', color='black')
    for df in sim_dfs:
        axes[i].plot(df.input, df[kind], color='red', alpha=0.3)