In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append('../src/')
from msfa import MSFA
from msfa_optimization import optimizeSFA

## Import data

In [None]:
df = pd.read_csv('../data/msfa_data.csv')

## Create object

In [None]:
m = df.shape[0]
s = np.sqrt(df['uhc_variance'].values)
#
x = df['physicians'].values
z = np.ones((m,1))
d = np.ones((m,1))
y = df['uhc'].values
#
ind = np.argsort(x)
x = x[ind]
y = y[ind]
#
msfa = MSFA(x.reshape(m,1), z, d, s, Y=y, vtype='half_normal', ftype='lower', add_intercept_to_x=True)

In [None]:
plt.plot(x, y, '.')

In [None]:
# add splines
knots = np.array([np.min(x), 20.0, 40.0, 60.0, np.max(x)])
degree = 3
msfa.addBSpline(knots, degree, r_linear=True, bspline_mono='increasing', bspline_cvcv='concave')

## Fit data

In [None]:
msfa.gama_uprior = np.zeros((2, msfa.k_gama))
msfa.updateUPrior()
optimizeSFA(msfa)

In [None]:
plt.plot(x, y, '.')
plt.plot(x, msfa.X.dot(msfa.beta_soln))

## Trimming SFA

In [None]:
sfa.optimizeSFAWithTrimming(int(0.9*sfa.N), stepsize=100.0, verbose=True, max_iter=20)

In [None]:
id_outliers = np.where(sfa.w == 0.0)[0]
plt.plot(x, y, '.')
plt.plot(x, sfa.X.dot(sfa.beta_soln))
plt.plot(x[id_outliers], y[id_outliers], 'r.')

In [None]:
np.sqrt(sfa.deta_soln)