# Covid-19 Vaccines in California and their Impacts

### Hypothesis:
#### 1.	College towns had an impact on Covid-19 surges in 2020.
#### 2.	Vaccines have directly impacted California’s case and death rates.

In [None]:
#Dependencies
import pandas as pd
import scipy.stats as sts
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt

### Cleaning Data

In [2]:
# Cleaning Cases Data
import pandas as pd
import datetime as dt
file_path = "Data/covid19cases_test.csv"

In [22]:
case_df = pd.read_csv(file_path)
case_df.head()

Unnamed: 0,date,area,area_type,population,cases,deaths,total_tests,positive_tests,reported_cases,reported_deaths,reported_tests
0,4/23/2021,Alameda,County,1685886.0,0.0,0.0,,,106.0,3.0,11251.0
1,4/23/2021,Alpine,County,1117.0,0.0,0.0,,,0.0,0.0,2.0
2,4/23/2021,Amador,County,38531.0,0.0,0.0,,,4.0,0.0,487.0
3,4/23/2021,Butte,County,217769.0,0.0,0.0,,,16.0,0.0,454.0
4,4/23/2021,Calaveras,County,44289.0,0.0,0.0,,,5.0,0.0,135.0


In [20]:
# change date column to easier to compare format
case_df.date = pd.to_datetime(case_df['date'],format='%m/%d/%Y')

# filter out dates past 3-31-21, as well as rows with useless data
clean_case = case_df.loc[(case_df.date<dt.datetime(2021,4,1)) & \
                         (case_df.area != 'Out of state') & \
                         (case_df.area != 'California')& \
                         (case_df.area != 'Unknown')]

# filter out columns we dont need
clean_case = clean_case[['date','area','population','cases','deaths','total_tests','positive_tests']]

# replace missing data with 0
clean_case = clean_case.fillna(0)
clean_case

Unnamed: 0,date,area,population,cases,deaths,total_tests,positive_tests
1403,2021-03-31,Alameda,1685886.0,98.0,1.0,10553.0,123.0
1404,2021-03-31,Alpine,1117.0,0.0,0.0,3.0,0.0
1405,2021-03-31,Amador,38531.0,3.0,0.0,317.0,8.0
1406,2021-03-31,Butte,217769.0,11.0,0.0,632.0,11.0
1407,2021-03-31,Calaveras,44289.0,4.0,0.0,80.0,4.0
...,...,...,...,...,...,...,...
27322,2020-02-01,Tulare,484423.0,0.0,0.0,0.0,0.0
27323,2020-02-01,Tuolumne,52351.0,0.0,0.0,0.0,0.0
27325,2020-02-01,Ventura,852747.0,0.0,0.0,1.0,0.0
27326,2020-02-01,Yolo,223612.0,0.0,0.0,0.0,0.0


In [None]:
# Cleaning Vaccinations Data

In [2]:
import pandas as pd
import scipy.stats as sts
import matplotlib.pyplot as plt
import numpy as np

### Cleaning Data

In [None]:
# Cleaning Cases Data

In [5]:
# Cleaning Vaccinations Data
vaccine_csv_path = "Data/covid19vaccinesbycounty.csv"
vaccine_df = pd.read_csv(vaccine_csv_path)
vaccine_df.head()

vaccine_csv_path = "Data/covid19vaccinesbycounty.csv"
vaccine_df = pd.read_csv(vaccine_csv_path)

vaccine_df=vaccine_df.drop(columns=['cumulative_total_doses','cumulative_pfizer_doses',
                         'cumulative_moderna_doses','cumulative_jj_doses',
                         'cumulative_at_least_one_dose', 'california_flag'])
vaccine_df=vaccine_df.drop(vaccine_df[(vaccine_df['county']=='All CA Counties')|(vaccine_df['county']=='All CA and Non-CA Counties')|(vaccine_df['county']=='Outside California')|(vaccine_df['county']=='Unknown')].index)
vaccine_df

Unnamed: 0,county,administered_date,total_doses,pfizer_doses,moderna_doses,jj_doses,partially_vaccinated,total_partially_vaccinated,fully_vaccinated,cumulative_fully_vaccinated,at_least_one_dose
511,Alameda,12/15/2020,36,36,0,0,36,36,0,0,36
512,Alameda,12/16/2020,202,202,0,0,202,238,0,0,202
513,Alameda,12/17/2020,396,396,0,0,396,634,0,0,396
514,Alameda,12/18/2020,1766,1765,1,0,1766,2400,0,0,1766
515,Alameda,12/19/2020,1357,1357,0,0,1357,3757,0,0,1357
...,...,...,...,...,...,...,...,...,...,...,...
7761,Yuba,4/19/2021,244,102,142,0,103,5934,141,12839,103
7762,Yuba,4/20/2021,302,79,223,0,177,5987,125,12964,178
7763,Yuba,4/21/2021,694,283,411,0,299,5891,395,13359,299
7764,Yuba,4/22/2021,269,95,174,0,149,5920,120,13479,149


In [None]:
# Cleaning Hospital Data

In [6]:
# Cleaning Colleges Data
college_data = pd.read_csv('Data/ca_colleges.csv')

college_data.columns = ['School_ID', 'School_name', 'Year', 'Duplicate', 'Street_Address', 'City', 'Zip_code', 
                        'County', 'Longitude', 'Latitude', 'Total_Enrollment', 'Full_time_enrollment', 
                        'Part_time_enrollment']

college_data.drop(columns=['School_ID', 'Year', 'Duplicate', 'Street_Address', 'City', 'Zip_code', 
                        'County', 'Full_time_enrollment', 
                        'Part_time_enrollment'], inplace=True)

college_data.dropna(subset=['Total_Enrollment'], inplace=True)
college_data.head()

Unnamed: 0,School_name,Longitude,Latitude,Total_Enrollment
0,Academy of Art University,-122.400578,37.787943,9812.0
1,Academy of Chinese Culture and Health Sciences,-122.269839,37.805972,133.0
3,Avalon School of Cosmetology-Alameda,-122.243566,37.764203,47.0
4,College of Alameda,-122.279303,37.781017,5667.0
5,Allan Hancock College,-120.421144,34.943716,11894.0


### Connecting API

### Summary Statistics

## Research Analysis

### 1.	How has the introduction of the Covid vaccine affected case/death rate in California?

### 2.	Do “College” towns play a larger role in case surges?

### 3.	Are there “Vaccination Deserts”? What impact have Covid-19 and the vaccines had in these areas?

### 4.	Can we predict when California will return to “normal”?