## loading modules

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import bernoulli
import matplotlib.pyplot as plt

## loading data and removing NaN values

In [3]:
df = pd.read_csv('content/2016-general-election-trump-vs-clinton.csv')
df = df.dropna(subset=['Number of Observations'])

## Question 1:
$$
\hat{p} = \frac{1}{N} \sum_{i=1}^{N} X_i
$$

we need to calculatte p hat and SE to find the confidence interval

$$
SE = \sqrt{\frac{p(1-p)}{N}}
$$

Since p is unknown, we use p hat to estimate it:

$$
SE \approx \sqrt{\frac{\hat{p}(1-\hat{p})}{N}}
$$


A 95% confidence interval for p is constructed as:

$$
\text{CI} = \hat{p} \pm z \times SE
$$


 where z is the standarlized normal value. for 95% interval z=1.96<br>
 and the formula for CI is:
$$
\text{CI} = \hat{p} \pm 1.96 \times \sqrt{\frac{\hat{p}(1-\hat{p})}{N}}
$$





## Question 2:

In [None]:
p=0.47
N= 30
iters=10**5

confidence_interval=0

for _ in range(iters):
    sample = bernoulli.rvs(p, size=N)
    p_hat=sum(sample)/30
    SE = 1.96*np.sqrt(p_hat*(1-p_hat)/N)
    CI = (p_hat-SE,p_hat+SE)
    if CI[0]<=p<=CI[1]:
        confidence_interval+=1



print(confidence_interval/iters)

    

0.93174


## Question 3:

In [None]:

df_cleaned =df[['Trump', 'Clinton', 'Pollster', 'Start Date', 
'Number of Observations','Mode']]

print(df_cleaned)

      Trump  Clinton          Pollster  Start Date  Number of Observations  \
0      41.0     45.0     Insights West  2016-11-04                   940.0   
4      43.0     41.0          IBD/TIPP  2016-11-04                  1107.0   
5      41.0     45.0  YouGov/Economist  2016-11-04                  3669.0   
6       6.0     90.0  YouGov/Economist  2016-11-04                  1392.0   
7      84.0      4.0  YouGov/Economist  2016-11-04                  1110.0   
...     ...      ...               ...         ...                     ...   
1516   65.0     12.0  YouGov/Economist  2015-06-20                   235.0   
1517   34.0     45.0  YouGov/Economist  2015-06-20                   266.0   
1518   32.0     50.0        Quinnipiac  2015-05-19                  1711.0   
1519    5.0     89.0        Quinnipiac  2015-05-19                   748.0   
1520   68.0      9.0        Quinnipiac  2015-05-19                   679.0   

            Mode  
0       Internet  
4     Live Phone  
5     