In [10]:
import pandas as pd
import json
import numpy as np

In [10]:
# Read the covid cases and deaths data
covid_df = pd.read_csv('csv/covid.csv')
covid_df = covid_df[['date', 'state', 'cases', 'deaths']]
# Read the covid vaccination data
vacc_df = pd.read_csv('csv/vaccination.csv')
vacc_df.rename(columns={'Date': 'date', 'Province_State': 'state'}, inplace=True)
vacc_df = vacc_df[['date', 'state', 'People_at_least_one_dose', 'People_fully_vaccinated']]
# Merge the two dataframes
covid_df = covid_df.merge(vacc_df, on=['date', 'state'], how='left')
# Fill the missing values with 0
covid_df.fillna(0, inplace=True)
covid_df

Unnamed: 0,date,state,cases,deaths,People_at_least_one_dose,People_fully_vaccinated
0,2020-01-21,Washington,1,0,0.0,0.0
1,2020-01-22,Washington,1,0,0.0,0.0
2,2020-01-23,Washington,1,0,0.0,0.0
3,2020-01-24,Illinois,1,0,0.0,0.0
4,2020-01-24,Washington,1,0,0.0,0.0
...,...,...,...,...,...,...
52249,2022-10-01,Virginia,2088708,21866,7529074.0,6391967.0
52250,2022-10-01,Washington,1816546,14376,6331990.0,5635814.0
52251,2022-10-01,West Virginia,600558,7416,1189745.0,1053708.0
52252,2022-10-01,Wisconsin,1866640,15263,4275404.0,3890833.0


In [9]:
covid_df

Unnamed: 0,date,state,cases,deaths,People_at_least_one_dose,People_fully_vaccinated
0,2020-01-21,Washington,1,0,,
1,2020-01-22,Washington,1,0,,
2,2020-01-23,Washington,1,0,,
3,2020-01-24,Illinois,1,0,,
4,2020-01-24,Washington,1,0,,
...,...,...,...,...,...,...
52249,2022-10-01,Virginia,2088708,21866,7529074.0,6391967.0
52250,2022-10-01,Washington,1816546,14376,6331990.0,5635814.0
52251,2022-10-01,West Virginia,600558,7416,1189745.0,1053708.0
52252,2022-10-01,Wisconsin,1866640,15263,4275404.0,3890833.0


In [15]:
def fibonacci(n):
    counter = 0
    a = 0
    b = 1

    if n == 1:
        return b
    for i in range(1, n):
        c = a + b
        a = b
        b = c
        counter += 1

    print(counter)
    return b
 
# Driver Program
print(fibonacci(5))

4
5


In [7]:
covid_df = pd.read_csv('oriCSV/covidData.csv')
center_df = pd.read_csv('oriCSV/amazonCenters.csv')

# get the data we need from the covid dataframe
print("> Getting data we need from covid dataframe")
covid_df = covid_df[['date', 'state', 'cases', 'deaths',
                        'People_at_least_one_dose', 'People_fully_vaccinated']]
covid_df.rename(columns={'state': 'state_name'}, inplace=True)
covid_df['date'] = pd.to_datetime(covid_df['date'])
covid_df['date'] = covid_df['date'].dt.strftime('%Y-%m-%d')
covid_df = covid_df.groupby(['date', 'state_name']).sum().reset_index()
covid_df = covid_df.sort_values(by=['date', 'state_name'])

# get the data we need from the amazon centers dataframe
print("> Getting data we need from amazon centers dataframe")
center_df = center_df[['center_id', 'center_name', 'county_id', 'county_name', 'county_fips', 'state_id',
                        'state_name', 'state_abbr', 'population2020', 'population2021', 'zip_code', 'latitude', 'longitude']]
center_df = center_df.drop_duplicates(subset=['center_id'])
center_df = center_df.sort_values(
    by=['state_id', 'county_id', 'center_id'])

# merge the two dataframes
print("> Merging dataframes")
df = pd.merge(covid_df, center_df, on=['state_name'], how='left')

> Getting data we need from covid dataframe
> Getting data we need from amazon centers dataframe
> Merging dataframes


In [12]:
# get the data we need into a form of json
print("> Getting data we need into a form of json")
data = {}
for index, row in df.iterrows():
    state_id = row['state_id']
    date = row['date']
    center_id = row['center_id']
    if state_id not in data:
        data[state_id] = {
            'state_name': row['state_name'],
            'state_abbr': row['state_abbr'],
            'population2020': row['population2020'],
            'population2021': row['population2021'],
            'centers': {},
            'dates': {}
        }
    if row['center_id'] not in data[state_id]['centers']:
        data[state_id]['centers'][row['center_id']] = {
            'center_name': row['center_name'],
            'county_id': row['county_id'],
            'county_name': row['county_name'],
            'county_fips': row['county_fips'],
            'zip_code': row['zip_code'],
            'latitude': row['latitude'],
            'longitude': row['longitude'],
        }
    data[state_id]['dates'][date] = {
        'cases': row['cases'],
        'deaths': row['deaths'],
        'People_at_least_one_dose': row['People_at_least_one_dose'],
        'People_fully_vaccinated': row['People_fully_vaccinated'],
    }

# save the data to json file
print("> Saving data to json file")
with open('amazonCenters.json', 'w') as f:
    json.dump(data, f)

> Getting data we need into a form of json
> Saving data to json file


In [8]:
df

Unnamed: 0,date,state_name,cases,deaths,People_at_least_one_dose,People_fully_vaccinated,center_id,center_name,county_id,county_name,county_fips,state_id,state_abbr,population2020,population2021,zip_code,latitude,longitude
0,2020-12-15,Alabama,301533,4124,0,0,,,,,,,,,,,,
1,2020-12-15,Alaska,41645,172,0,0,,,,,,,,,,,,
2,2020-12-15,Arizona,427121,7428,0,0,1.0,#AZA5,105.0,Maricopa County,4013.0,3.0,AZ,7151502.0,7276316.0,85043.0,33.42,-112.20
3,2020-12-15,Arizona,427121,7428,0,0,2.0,#PHX3,105.0,Maricopa County,4013.0,3.0,AZ,7151502.0,7276316.0,85043.0,33.42,-112.20
4,2020-12-15,Arizona,427121,7428,0,0,3.0,#PHX5,105.0,Maricopa County,4013.0,3.0,AZ,7151502.0,7276316.0,85338.0,33.37,-112.38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180724,2022-10-06,Wisconsin,1872550,15298,4287712,3903849,253.0,#MKE2,3089.0,Milwaukee County,55079.0,50.0,WI,5893718.0,5895908.0,53154.0,42.88,-87.89
180725,2022-10-06,Wisconsin,1872550,15298,4287712,3903849,254.0,#DML1,3089.0,Milwaukee County,55079.0,50.0,WI,5893718.0,5895908.0,53215.0,43.00,-87.94
180726,2022-10-06,Wyoming,177165,1899,344906,301951,89.0,#IND9,3130.0,Johnson County,56019.0,15.0,WY,576851.0,578803.0,46143.0,39.60,-86.12
180727,2022-10-06,Wyoming,177165,1899,344906,301951,96.0,#MKC4,3130.0,Johnson County,56019.0,17.0,WY,576851.0,578803.0,66021.0,38.76,-95.01


In [11]:
df[df['center_id'].isnull()]

Unnamed: 0,date,state_name,cases,deaths,People_at_least_one_dose,People_fully_vaccinated,center_id,center_name,county_id,county_name,county_fips,state_id,state_abbr,population2020,population2021,zip_code,latitude,longitude
0,2020-12-15,Alabama,301533,4124,0,0,,,,,,,,,,,,
1,2020-12-15,Alaska,41645,172,0,0,,,,,,,,,,,,
13,2020-12-15,Arkansas,189198,3016,0,0,,,,,,,,,,,,
51,2020-12-15,District of Columbia,25339,720,0,0,,,,,,,,,,,,
54,2020-12-15,Guam,8118,120,0,0,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180561,2022-10-06,Northern Mariana Islands,13171,40,46137,43665,,,,,,,,,,,,
180588,2022-10-06,Puerto Rico,994339,5174,3102522,2730791,,,,,,,,,,,,
180592,2022-10-06,South Dakota,262213,3033,711304,569282,,,,,,,,,,,,
180638,2022-10-06,Vermont,143049,728,602707,521142,,,,,,,,,,,,
