In [None]:
import plotly.figure_factory as ff
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
engine = create_engine('postgresql+psycopg2://username:secret@db:5432/database')
conn = psycopg2.connect(database="database",user="username", password="secret",host="db", port="5432")
cur = conn.cursor()
import plotly.express as px
from urllib.request import urlopen
import json
import plotly.express as px
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
import us
import retrying
import numpy as np



In [2]:
def plot_states(df,col):

    fig2 = px.choropleth(df, geojson=counties, locations='FIPS', color=col,
                               color_continuous_scale="Viridis",
                               scope="usa",
                               labels={col:col}
                              )
    fig2.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig2.show()

# Covid cases by state per 100 000 inhabitants on the 1st of March 2021

In [45]:
def abbr(x):
    return us.states.lookup(x).abbr
population = pd.read_sql_query('select * from population',con=engine)
population['State_abbr'] =population['State_Code'].apply(abbr)
pop = population.groupby('State_abbr').sum()

In [76]:

daily = pd.read_sql_query('select * from daily',con=engine)
daily = daily[daily['date']== 20210301]
plot = pd.merge(daily,pop,left_on='state',right_on='State_abbr')
plot['Avg_cases'] = np.round(plot['positiveIncrease']/plot['TOT_POP'] * 100000,2)

In [80]:

fig1 = px.choropleth(plot, locations= plot['state'],locationmode='USA-states',color =plot['Avg_cases'],
                     color_continuous_scale='inferno',scope='usa',title='Covid cases in USA on 1st March')
fig1.update_layout(margin={"r":0,"l":0,"b":0})
fig1.write_html('./plots/1st_march.html')

# Total cases per state per 1000 inhabitants

In [83]:
base= pd.read_sql_query('select * from daily',con=engine)
states = base.groupby('state').agg({'positiveIncrease':'sum','FIPS':'first','death':'sum'})
states.reset_index(inplace=True)
pop = population.groupby('State_abbr')['TOT_POP'].sum()
plot = pd.merge(states,pop,left_on='state',right_on='State_abbr')
plot['Avg_cases'] = np.round((plot['positiveIncrease']*1000/plot['TOT_POP']),2)
plot['Death_avg'] = np.round((plot['death']*1000/plot['TOT_POP']),2)

In [103]:
fig1 = px.choropleth(plot, locations= plot['state'],locationmode='USA-states',color =plot['Avg_cases'],
                     color_continuous_scale='inferno',scope='usa',title='Total covid cases per state per 1000 inhabitants')
fig1.update_layout(margin={"r":0,"l":0,"b":0})
fig1.write_html('./plots/cases_by_state.html')

In [84]:

fig1 = px.choropleth(plot, locations= plot['state'],locationmode='USA-states',color =plot['Death_avg'],
                     color_continuous_scale='inferno',scope='usa',title='Total covid cases per state per 1000 inhabitants')
fig1.update_layout(margin={"r":0,"l":0,"b":0})
fig1.write_html('./plots/deaths_by_state.html')

# Total amount of cases per county

In [67]:
base = pd.read_sql_query('select * from base',con=engine)
cases = base.groupby('FIPS')['cases'].sum()

In [70]:
out = pd.merge(base,population,left_on='FIPS',right_on='FIPS')
out = out.groupby('FIPS').agg({'cases':'max','TOT_POP':'first','county':'first'})


In [None]:
out['Avg_cases'] = np.round((out['cases']/out['TOT_POP']),2)
out.reset_index(inplace=True)
plot_states(out,'Avg_cases')

# Covid day-by day in TX and NY

In [3]:
df = pd.read_sql_query('select * from base',con=engine)
df = df.loc[:,['cases','state','date']]


In [6]:
tx = df[df['state']=='Texas']
tx = tx.groupby('date').sum().diff().fillna(0)

ny = df[df['state']=='New York']
ny = ny.groupby('date').sum().diff().fillna(0)

In [85]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2,subplot_titles=("Cases in TX", "Cases in NY") )

fig.add_trace(
   go.Scatter(x=tx.index, y=tx["cases"],name='TX'),row=1, col=1)


fig.add_trace(
    go.Scatter(x=ny.index, y=ny["cases"],name='NY'),row=1, col=2)


fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.write_html('./plots/TX_vs_NY.html')

# Poverty by county

In [None]:
df = pd.read_sql_query('select * from poverty',con=engine)
df.loc[df['FIPS'] == '35039','Pov_Rate']=0
df[df['FIPS'] == '35039']
plot_states(df,'Pov_Rate')

# Education by county

In [None]:
df = pd.read_sql_query('select * from education',con=engine)
plot_states(df,'HS_plus')

# Popoulation by county

## Black population

In [72]:

df = pd.read_sql_query('select * from population',con=engine)
df['BA_PR'] = (df['BA_MALE'] + df['BA_FEMALE']) / df['TOT_POP']

In [None]:
plot_states(df,'BA_PR')

## Hispanic population

In [131]:
df['H_PR'] = (df['H_MALE'] + df['H_FEMALE']) / df['TOT_POP']

In [None]:
plot_states(df,'H_PR')