# [Python Reference Link](http://www.data8.org/sp20/python-reference.html)
*Run the cell below so that we can set our modules up*

In [None]:
import numpy as np
from datascience import *

# These lines do some fancy plotting magic.
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import warnings
warnings.simplefilter('ignore', FutureWarning)

# Exploring the percentile function

In [None]:
an_array_1_to_100 = np.arange(1,101)
an_array_1_to_100

In [None]:
percentile(25,an_array_1_to_100)

# Constructing Confidence Intervals

In [None]:
covid_data = Table.read_table("Covid Data_subset.csv")
covid_data

In [None]:
#Let's consider this data set to be our population, and calculate the death rate of patients with covid.  
#Those with an impossible date for death can be assumed to live in this data set
100*(1 - (covid_data.where('DATE_DIED','9999-99-99').num_rows / covid_data.num_rows))

### Let's play out a scenario

Suppose that we are still in the early stages of the covid pandemic. You work for a hospital that has seen 100 patients so far. You want to know how deadly this illness is to the population. 

In [None]:
#we proceed by taking a sample of 100 from the covid table. Let's assume that these are the 100 patients we saw.
covid_sample = covid_data.sample(100,with_replacement = False)
covid_sample

Then we can define the following functions to construct an upper and lower bound for the confidence interval

In [None]:
def confidence_interval_for_death_rate(level):
    stats = make_array()
    for i in np.arange(1000):
        stat = 100*(1 - ((covid_sample.sample()).where('DATE_DIED','9999-99-99').num_rows / covid_sample.num_rows))
        stats = np.append(stats,stat)
    lower_bound = str(percentile((100 - level)/2,stats))
    upper_bound = str(percentile( 100 - ((100 - level)/2) , stats))
    return 'We are '+ str(level)+'% confident that the true death rate is between '+lower_bound+' and '+upper_bound

In [None]:
confidence_interval_for_death_rate(95)

In [None]:
def confidence_interval_lower_b(tbl,level,column_label):
    stats = make_array()
    for i in np.arange(10000):
        stat = np.mean(tbl.sample().column(column_label))
        stats = np.append(stats,stat)
    return percentile( (100 - level)/2 , stats)

def confidence_interval_upper_b(tbl,level,column_label):
    stats = make_array()
    for i in np.arange(10000):
        stat = np.mean(tbl.sample().column(column_label))
        stats = np.append(stats,stat)
    return percentile( 100 - ((100 - level)/2) , stats)

In [None]:
column_to_look_at = 'AGE'
level = 95
print('The confidence interval for',column_to_look_at) 
print('is from',confidence_interval_lower_b(covid_sample,level,column_to_look_at))
print('up to',confidence_interval_upper_b(covid_sample,level,column_to_look_at))

In [None]:
np.mean(covid_data.column('AGE'))