# Calculating Coefficients

### Data Sources
- Ventilator information

### Goal
To calculate the following:
- Coefficients
- P-values

### Outputs
The following data set is generated from this notebook:
- Coefficients and p-values per county in California

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy.optimize import curve_fit
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

pd.options.display.max_rows = 200
%matplotlib inline

## Reading the data

In [86]:
# Reads the ventilator data
ventilator = pd.read_csv("ventilator_model_data.csv")
ventilator.columns = ventilator.columns.str.lower()
ventilator = ventilator.rename({"county_name": "county"}, axis=1)

# Previews the top five rows of the data
ventilator.head()

Unnamed: 0,county,risk_index,week,rate,num_licensed_beds,num_staffed_beds,num_icu_beds,bed_utilization,avg_ventilator_usage,num_of_hospital,current_num_of_aval_ventilator,num_covid_cases_increase_from_last_week,max_num_ventilator,v1,v2,ending_deaths
0,Alameda,80.319119,5.0,0.0,3464,3564,331,0.619915,3.894737,19,148,0.0,3464,73.60244,0.0,0
1,Alameda,80.319119,6.0,0.0,3464,3564,331,0.619915,3.894737,19,148,0.0,3464,73.60244,0.0,0
2,Alameda,80.319119,7.0,0.0,3464,3564,331,0.619915,3.894737,19,148,0.0,3464,73.60244,0.0,0
3,Alameda,80.319119,8.0,0.0,3464,3564,331,0.619915,3.894737,19,148,0.0,3464,73.60244,0.0,0
4,Alameda,80.319119,9.0,0.0,3464,3564,331,0.619915,3.894737,19,148,0.0,3464,73.60244,0.0,0


\begin{equation}
f(TV_i) = \omega_i + \gamma_i*R_i* (1-\frac{CV_i}{ Vmax_i })^2 + \nu_i*IR_i * (1-\frac{CV_i}{ Vmax_i })^2
\end{equation}

In [87]:
# Sets x and y
x = ventilator[["v1", "v2"]]    # independent variables
y = ventilator["ending_deaths"] # target variable
x = sm.add_constant(x.values)

ventilator_model = sm.OLS(y, x).fit()
ventilator_predictions = ventilator_model.predict(x)

# Model results
ventilator_model.summary()
#     coefficients = ventilator_model.params.values
#     pvalues = ventilator_model.pvalues.values
#     print(coefficients)
#     print(pvalues)

0,1,2,3
Dep. Variable:,ending_deaths,R-squared:,0.139
Model:,OLS,Adj. R-squared:,0.136
Method:,Least Squares,F-statistic:,53.19
Date:,"Wed, 15 Apr 2020",Prob (F-statistic):,3.85e-22
Time:,08:05:19,Log-Likelihood:,-2665.8
No. Observations:,662,AIC:,5338.0
Df Residuals:,659,BIC:,5351.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-15.6657,4.485,-3.493,0.001,-24.472,-6.859
x1,0.2504,0.066,3.787,0.000,0.121,0.380
x2,0.1786,0.020,8.896,0.000,0.139,0.218

0,1,2,3
Omnibus:,1507.516,Durbin-Watson:,1.569
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5330311.534
Skew:,19.299,Prob(JB):,0.0
Kurtosis:,440.897,Cond. No.,579.0


In [91]:
def calculate_ventilator_coefficients(df):
    
    counties = df.county.unique()

    # Removes unknown counties
    if 'Unknown' in counties:
        counties = counties[counties != 'Unknown']
    
    # Creates empty resulting DataFrame
    county_results_all = pd.DataFrame(columns=['county', 'const', 'x1','x2','const_pvalue','x1_pvalue','x2_pvalue'])
    
    # Iterates through county list
    for county in counties:
    
        # Sets x and y
        x = df[df.county==county][["v1", "v2"]]    # independent variables
        y = df[df.county==county]["ending_deaths"] # target variable
        x = sm.add_constant(x.values)

        ventilator_model = sm.OLS(y, x).fit()
        ventilator_predictions = ventilator_model.predict(x)

        # Model results
        coefficients = pd.DataFrame(ventilator_model.params).T
        pvalues = pd.DataFrame(ventilator_model.pvalues).T.rename({"const":"const_pvalue", 
                                                                   "x1":"x1_pvalue", "x2":"x2_pvalue"},axis=1)
        county_results = coefficients.join(pvalues)
        county_results['county'] = county
        county_results_all = pd.concat([county_results_all, county_results], ignore_index=True, sort=False)

    return county_results_all

In [97]:
county_ventilator_model_results = calculate_ventilator_coefficients(ventilator)
county_ventilator_model_results

Unnamed: 0,county,const,x1,x2,const_pvalue,x1_pvalue,x2_pvalue
0,Alameda,-319.910174,4.344068,-0.072956,9.122421e-10,8.539312e-10,0.00594
1,Amador,0.0,0.0,0.0,,,
2,Butte,0.0,0.0,0.0,,,
3,Calaveras,0.0,0.0,0.0,,,
4,Contra Costa,-112.168241,1.555922,-0.043343,1.000379e-10,9.015241e-11,0.006839
5,Del Norte,0.0,0.0,0.0,,,
6,El Dorado,0.0,0.0,0.0,,,
7,Fresno,-139.886323,1.948304,-0.107155,2.306896e-10,2.236355e-10,6.7e-05
8,Glenn,0.0,0.0,0.0,,,
9,Humboldt,0.0,0.0,0.0,,,


In [98]:
# Saves the coefficients and p-values per county in California as a CSV file
county_ventilator_model_results.to_csv("county_ventilator_model_results.csv", index=False)