## CP101 - Assignment 3: COVID-19 & Race

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.io as pio
import seaborn as sns
import statsmodels.api as sm

In [None]:
nyc = pd.read_csv('nyc-covid-by-zip.csv')
nyc_hospitals = pd.read_csv('hospitals-2011.csv')
nyc_jobs = pd.read_csv('nyc-jobs-by-zip.csv')

## Data Cleaning

### Hospital Data
This dataset contains public hospitals in NYC I think.
- grouped the hospitals by zip code

In [None]:
## counting the number of public (?) hospitals by zip code
nyc_hospitals_zip = nyc_hospitals.groupby(['Postcode'], as_index=False).count()
nyc_hospitals_zip = nyc_hospitals_zip[['Postcode', 'Facility Type']].rename(columns={"Postcode": "MODZCTA","Facility Type":"num_hospitals"})
nyc_hospitals_zip.head()


### NYC Covid Data 
This dataset contains # of positive COVID cases, # of COVID tests, and racial breakdown by zip code
- changed the zip codes to be integers 
- renamed MODZCTA column to ZCTA

In [None]:
#renaming columns
new_modzcta = nyc['MODZCTA'].astype(int)
nyc['MODZCTA'] = new_modzcta
nyc.rename(columns={'MODZCTA': 'ZCTA'}, inplace=True)
nyc

### Job Data
This dataset contains job types by zip codes. (Source: LEHD) 
- renamed zip code column to be ZCTA
- made the percentages into actual percentages (integers)

In [None]:
## renaming columns
nyc_jobs.rename(columns={"Unnamed: 0": 'ZCTA'}, inplace=True)

## filling all ZCTA
nyc_jobs.fillna(method='ffill', inplace=True)
nyc_jobs_clean = nyc_jobs.drop(nyc_jobs[(nyc_jobs['Unnamed: 1'] == 'Count') | (nyc_jobs['ZCTA'] == '11001 (Partial)') | (nyc_jobs['ZCTA'] == '11040 (Partial)')].index)
nyc_jobs_clean.sort_values('ZCTA')

##making zcta into integer
new_zcta = nyc_jobs_clean['ZCTA'].astype(int)
nyc_jobs_clean['ZCTA'] = new_zcta

##changing % from strings to float
job_types = nyc_jobs_clean.columns.tolist()[2:]
job_types

newcol=[]
for i in job_types: #looping through column names of job types
    nyc_jobs_clean[i] = nyc_jobs_clean[i].apply(lambda x: x[:-1])
    nyc_jobs_clean[i] = pd.to_numeric(nyc_jobs_clean[i], errors="coerce")
    nyc_jobs_clean[i] = nyc_jobs_clean[i]/100
    
nyc_jobs_clean.head()
    

### Final Dataset (woo)
This dataset includes # of positive COVID cases, racial compostion, and job type by zip code.

- merged the nyc dataset w/ the jobs dataset
- replaced blanks w/ nan

In [None]:
### joining with joined_nyc
nyc_final = pd.merge(nyc, nyc_jobs_clean, on='ZCTA', how='left')

### replacing all blanks with NaN
nyc_final = nyc_final.replace(r'^\s*$', np.nan, regex=True)
nyc_final

## Forming Dataset w/ Categorical Data

### Race
- "majority" group = white = 1
- "minority" groups = non white = 0 

In [None]:
## forming a smaller dataset

zcta = nyc_final['ZCTA']
percent_pos = nyc_final['percent_pos']
median_inc = nyc_final['median_inc']

nyc_cat = pd.DataFrame(zcta)
nyc_cat['percent_pos'] = percent_pos
nyc_cat['median_inc'] = median_inc

nyc_cat.head()

In [None]:
#list of race column names
minority = nyc_final.columns[8:14].tolist()

#summing percent minority
perc_minority = nyc_final[minority].sum(axis=1)

# sorting into 0-1 buckets (1 = mainly white)
col = []
for i in perc_minority:
    if i <= 0.5:
        col.append(1)
    else:
        col.append(0)

nyc_cat['perc_minority'] = perc_minority
nyc_cat['perc_majority'] = 1-perc_minority
nyc_cat['majority'] = col

nyc_cat.head()

### Job Type
- white collar jobs (in office): 1
- blue collar jobs: 0

In [None]:
job_types

In [None]:
wfh = ['Information', 'Finance and Insurance', 'Real Estate and Rental and Leasing', 'Professional, Scientific, and Technical Services', 'Management of Companies and Enterprises', 'Educational Services']
non_ess = ['Retail Trade','Administration & Support, Waste Management and Remediation','Arts, Entertainment, and Recreation','Other Services (excluding Public Administration)']
ess = ['Agriculture, Forestry, Fishing and Hunting','Mining, Quarrying, and Oil and Gas Extraction','Utilities','Construction','Manufacturing','Wholesale Trade','Transportation and Warehousing','Health Care and Social Assistance','Accommodation and Food Services','Public Administration']

nyc_final['perc_wfh'] = nyc_final[wfh].sum(axis=1)
nyc_final['perc_non_ess'] = nyc_final[non_ess].sum(axis=1)
nyc_final['perc_ess'] = nyc_final[ess].sum(axis=1)
nyc_final.head()

In [None]:
#list of blue collar jobs
job_types
blue = ['Agriculture, Forestry, Fishing and Hunting', 'Mining, Quarrying, and Oil and Gas Extraction',
       'Utilities', 'Construction', 'Manufacturing', 'Wholesale Trade', 'Retail Trade', 'Transportation and Warehousing',
       'Administration & Support, Waste Management and Remediation', 'Health Care and Social Assistance',
       'Accommodation and Food Services']

perc_blue = nyc_final[blue].sum(axis=1)

# sorting into 0-1 buckets (1 = white collar)
col = []
for i in perc_blue:
    if i <= 0.5:
        col.append(1)
    else:
        col.append(0)

nyc_cat['perc_blue'] = perc_blue
nyc_cat['perc_white'] = 1-perc_blue
nyc_cat['white_collar'] = col

nyc_cat


## Exploratory Data Analysis

### Percent Positive as a function of Median Income

In [None]:
nyc_cat = nyc_cat[nyc_cat['percent_pos'].notna()]
nyc_cat = nyc_cat[nyc_cat['median_inc'].notna()]
nyc_cat['log_percent_pos'] = nyc_cat['percent_pos'].apply(np.log)
nyc_cat = nyc_cat[nyc_cat['log_percent_pos'] != -np.inf]
#nyc_cat['log_percent_pos'].value_counts()

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(np.vstack(nyc_cat['median_inc']), nyc_cat['percent_pos'])
nyc_cat['best_fit_lin'] = reg.predict(np.vstack(nyc_cat['median_inc']))

reg2 = LinearRegression().fit(np.vstack(nyc_cat['median_inc']), nyc_cat['log_percent_pos'])
nyc_cat['best_fit_log'] = reg2.predict(np.vstack(nyc_cat['median_inc']))

In [None]:
score1, score2 = np.sqrt(reg.score(np.vstack(nyc_cat['median_inc']), nyc_cat['percent_pos'])), np.sqrt(reg2.score(np.vstack(nyc_cat['median_inc']), nyc_cat['log_percent_pos']))


In [None]:
import plotly
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=nyc_cat['median_inc'],
                y=nyc_cat['percent_pos'],
                text=nyc_cat['ZCTA'],
                name='lin_y',
                mode='markers'))

fig.add_trace(go.Scatter(name='linear line of best fit',
                        x=nyc_cat['median_inc'],
                        y=nyc_cat['best_fit_lin'],
                        mode='lines',
                        text='r= ' + str(score1)))

fig.add_trace(go.Scatter(x=nyc_cat['median_inc'],
                y=nyc_cat['log_percent_pos'],
                text=nyc_cat['ZCTA'],
                name='log_y',
                mode='markers',
                visible=False))

fig.add_trace(go.Scatter(name='log line of best fit',
                        x=nyc_cat['median_inc'],
                        y=nyc_cat['best_fit_log'],
                        mode='lines',
                        text='r= ' + str(score2),
                        visible=False))

fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            active=0,
            xanchor='left',
            yanchor='top',
            x=0,
            y=1.1,
            buttons=list([
                dict(label="linear",
                     method="update",
                     args=[{"visible": [True, True, False, False]},
                           {"title": "Median Income and Log Percentage of Positive Tests",
                            "xaxis": {"title": "Median Income"},
                            "yaxis": {"title": "Percentage of Positve Tests"}}]),
                
                dict(label="log",
                     method="update",
                     args=[{"visible": [False, False, True, True]},
                           {"title": "Median Income and Log Percentage of Positive Tests",
                            "xaxis": {"title": "Median Income"},
                            "yaxis": {"title": "log(Percentage of Positve Tests)"}}])
            ]),
        )
    ])


fig.update_layout(title='Median Income and Percentage of Positive Tests',
                 xaxis_title='Median Income',
                 yaxis_title='Percentage of Positive Tests',
                 showlegend=False)

fig.show()

In [None]:
#Export above figure to an .html file for hosting.
pio.write_html(fig, file='income-percentage.html', auto_open=False)

In [None]:
##plotting percent positive against median income
sns.regplot('median_inc', 'percent_pos', data=nyc_final);

### Percent Positive as a function of Race

In [None]:
nyc_final.columns

#### Subplots

In [None]:
nyc_final = nyc_final[nyc_final['percent_pos'].notna()]

In [None]:
from sklearn.linear_model import LinearRegression
reg_white = LinearRegression().fit(np.vstack(nyc_final['perc_white']), nyc_final['percent_pos'])
nyc_final['best_fit_white'] = reg_white.predict(np.vstack(nyc_final['perc_white']))
white_score = np.sqrt(reg_white.score(np.vstack(nyc_final['perc_white']), nyc_final['percent_pos']))

reg_black = LinearRegression().fit(np.vstack(nyc_final['perc_black']), nyc_final['percent_pos'])
nyc_final['best_fit_black'] = reg_black.predict(np.vstack(nyc_final['perc_black']))
black_score = np.sqrt(reg_black.score(np.vstack(nyc_final['perc_black']), nyc_final['percent_pos']))

reg_native = LinearRegression().fit(np.vstack(nyc_final['perc_native']), nyc_final['percent_pos'])
nyc_final['best_fit_native'] = reg_native.predict(np.vstack(nyc_final['perc_native']))
native_score = np.sqrt(reg_native.score(np.vstack(nyc_final['perc_native']), nyc_final['percent_pos']))

reg_asian = LinearRegression().fit(np.vstack(nyc_final['perc_asian']), nyc_final['percent_pos'])
nyc_final['best_fit_asian'] = reg_asian.predict(np.vstack(nyc_final['perc_asian']))
asian_score = np.sqrt(reg_asian.score(np.vstack(nyc_final['perc_asian']), nyc_final['percent_pos']))

reg_hawaiian = LinearRegression().fit(np.vstack(nyc_final['perc_hawaiian']), nyc_final['percent_pos'])
nyc_final['best_fit_hawaiian'] = reg_hawaiian.predict(np.vstack(nyc_final['perc_hawaiian']))
hawaiian_score = np.sqrt(reg_hawaiian.score(np.vstack(nyc_final['perc_hawaiian']), nyc_final['percent_pos']))

reg_other = LinearRegression().fit(np.vstack(nyc_final['perc_other']), nyc_final['percent_pos'])
nyc_final['best_fit_other'] = reg_other.predict(np.vstack(nyc_final['perc_other']))
other_score = np.sqrt(reg_other.score(np.vstack(nyc_final['perc_other']), nyc_final['percent_pos']))

reg_two = LinearRegression().fit(np.vstack(nyc_final['perc_two']), nyc_final['percent_pos'])
nyc_final['best_fit_two'] = reg_two.predict(np.vstack(nyc_final['perc_two']))
two_score = np.sqrt(reg_two.score(np.vstack(nyc_final['perc_two']), nyc_final['percent_pos']))

In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=3, cols=3,
                   subplot_titles=["White", "Black", "Native", "Asian", "Hawaiian", "Other", "Two Races"],
                   shared_yaxes=False)

fig.add_trace(go.Scatter(x=nyc_final['perc_white'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=1, col=1)
fig.add_trace(go.Scatter(x=nyc_final['perc_white'], y=nyc_final['best_fit_white'],mode='lines',text='r= ' + np.str(white_score)), row=1, col=1)

fig.add_trace(go.Scatter(x=nyc_final['perc_black'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=1, col=2)
fig.add_trace(go.Scatter(x=nyc_final['perc_black'], y=nyc_final['best_fit_black'],mode='lines',text='r= ' + np.str(black_score)), row=1, col=2)

fig.add_trace(go.Scatter(x=nyc_final['perc_native'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=1, col=3)
fig.add_trace(go.Scatter(x=nyc_final['perc_native'], y=nyc_final['best_fit_native'],mode='lines',text='r= ' + np.str(native_score)), row=1, col=3)

fig.add_trace(go.Scatter(x=nyc_final['perc_asian'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=2, col=1)
fig.add_trace(go.Scatter(x=nyc_final['perc_asian'], y=nyc_final['best_fit_asian'],mode='lines',text='r= ' + np.str(asian_score)), row=2, col=1)

fig.add_trace(go.Scatter(x=nyc_final['perc_hawaiian'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=2, col=2)
fig.add_trace(go.Scatter(x=nyc_final['perc_hawaiian'], y=nyc_final['best_fit_hawaiian'],mode='lines',text='r= ' + np.str(hawaiian_score)), row=2, col=2)

fig.add_trace(go.Scatter(x=nyc_final['perc_other'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=2, col=3)
fig.add_trace(go.Scatter(x=nyc_final['perc_other'], y=nyc_final['best_fit_other'],mode='lines',text='r= ' + np.str(other_score)), row=2, col=3)

fig.add_trace(go.Scatter(x=nyc_final['perc_two'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=3, col=1)
fig.add_trace(go.Scatter(x=nyc_final['perc_two'], y=nyc_final['best_fit_two'],mode='lines',text='r= ' + np.str(two_score)), row=3, col=1)


fig.update_layout(title='Prevalence of Race and Percentage of Positive Tests',
                 yaxis_title='Percentage of Positive Tests',
                 showlegend=False)

fig.update_xaxes(title_text="Percentage White", row=1, col=1,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Black", row=1, col=2,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Native", row=1, col=3,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Asian", row=2, col=1,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Hawaiian", row=2, col=2,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Other", row=2, col=3,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Two Races", row=3, col=1,tickfont={"size":10},titlefont={"size":10},title_standoff=0)

fig.update_yaxes(title_text="Percent Positive",row=1,col=1,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=1,col=2,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=1,col=3,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=2,col=1,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=2,col=2,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=2,col=3,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=3,col=1,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
                 
for i in fig['layout']['annotations']:
    i['font'] = dict(size=12)

fig.show()

In [None]:
#Export above figure to an .html file for hosting.
pio.write_html(fig, file='race-cases.html', auto_open=False)

#### All-in-One

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=nyc_final['perc_white'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,legendgroup='White',name='White',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_white'], y=nyc_final['best_fit_white'],mode='lines',legendgroup='White',text='r= ' + np.str(white_score),showlegend=False))

fig.add_trace(go.Scatter(x=nyc_final['perc_black'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,visible='legendonly',legendgroup='Black',name='Black',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_black'], y=nyc_final['best_fit_black'],mode='lines',visible='legendonly',legendgroup='Black',text='r= ' + np.str(black_score),showlegend=False))

fig.add_trace(go.Scatter(x=nyc_final['perc_native'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,visible='legendonly',legendgroup='Native',name='Native',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_native'], y=nyc_final['best_fit_native'],mode='lines',visible='legendonly',legendgroup='Native',text='r= ' + np.str(native_score),showlegend=False))

fig.add_trace(go.Scatter(x=nyc_final['perc_asian'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,visible='legendonly',legendgroup='Asian',name='Asian',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_asian'], y=nyc_final['best_fit_asian'],mode='lines',visible='legendonly',legendgroup='Asian',text='r= ' + np.str(asian_score),showlegend=False))

fig.add_trace(go.Scatter(x=nyc_final['perc_hawaiian'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,visible='legendonly',legendgroup='Hawaiian',name='Hawaiian',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_hawaiian'], y=nyc_final['best_fit_hawaiian'],mode='lines',visible='legendonly',legendgroup='Hawaiian',text='r= ' + np.str(hawaiian_score),showlegend=False))

fig.add_trace(go.Scatter(x=nyc_final['perc_other'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,visible='legendonly',legendgroup='Other',name='Other',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_other'], y=nyc_final['best_fit_other'],mode='lines',visible='legendonly',legendgroup='Other',text='r= ' + np.str(other_score),showlegend=False))

fig.add_trace(go.Scatter(x=nyc_final['perc_two'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,visible='legendonly',legendgroup='Two Races',name='Two Races',text=nyc_final['ZCTA']))
fig.add_trace(go.Scatter(x=nyc_final['perc_two'], y=nyc_final['best_fit_two'],mode='lines',visible='legendonly',legendgroup='Two Races',text='r= ' + np.str(two_score),showlegend=False))


fig.update_layout(title='Prevalence of Race and Percentage of Positive Tests',
                 yaxis_title='Percentage of Positive Tests',
                 xaxis_title='Prevalence of Race',
                 showlegend=True,
                 legend_title_text='Race')

fig.show()

In [None]:
#Export above figure to an .html file for hosting.
pio.write_html(fig, file='race-cases-individual.html', auto_open=False)

In [None]:
## plotting percent positive against percent black
ax = sns.regplot('perc_black', 'percent_pos', label ="perc black", data=nyc_final, scatter_kws={"s": 20})
ax = sns.regplot('perc_white', 'percent_pos', label ="perc white", data=nyc_final, scatter_kws={"s": 20})
ax.set(xlabel='percent black/white');
ax.legend();

In [None]:
## minorities vs covid
ax = sns.regplot('perc_minority', 'percent_pos', label='minority group', data=nyc_cat, scatter_kws={"s": 20})
ax = sns.regplot('perc_majority', 'percent_pos', label='majority group', data=nyc_cat, scatter_kws={"s": 20})
ax.legend();


### Job type comparison

#### Essential Workers vs. Non-Essential Workers

In [None]:
reg_wfh = LinearRegression().fit(np.vstack(nyc_final['perc_wfh']), nyc_final['percent_pos'])
nyc_final['best_fit_wfh'] = reg_wfh.predict(np.vstack(nyc_final['perc_wfh']))
wfh_score = np.sqrt(reg_wfh.score(np.vstack(nyc_final['perc_wfh']), nyc_final['percent_pos']))

reg_ess = LinearRegression().fit(np.vstack(nyc_final['perc_ess']), nyc_final['percent_pos'])
nyc_final['best_fit_ess'] = reg_ess.predict(np.vstack(nyc_final['perc_ess']))
ess_score = np.sqrt(reg_ess.score(np.vstack(nyc_final['perc_ess']), nyc_final['percent_pos']))

reg_non_ess = LinearRegression().fit(np.vstack(nyc_final['perc_non_ess']), nyc_final['percent_pos'])
nyc_final['best_fit_non_ess'] = reg_non_ess.predict(np.vstack(nyc_final['perc_non_ess']))
non_ess_score = np.sqrt(reg_non_ess.score(np.vstack(nyc_final['perc_non_ess']), nyc_final['percent_pos']))


In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=3,
                   subplot_titles=["Work from Home-enabled", "Non-WFH, essential", "Non-WFH, non-essential"],
                   shared_yaxes=False)

fig.add_trace(go.Scatter(x=nyc_final['perc_wfh'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=1, col=1)
fig.add_trace(go.Scatter(x=nyc_final['perc_wfh'], y=nyc_final['best_fit_wfh'],mode='lines',text='r= ' + np.str(wfh_score)), row=1, col=1)

fig.add_trace(go.Scatter(x=nyc_final['perc_ess'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=1, col=2)
fig.add_trace(go.Scatter(x=nyc_final['perc_ess'], y=nyc_final['best_fit_ess'],mode='lines',text='r= ' + np.str(ess_score)), row=1, col=2)

fig.add_trace(go.Scatter(x=nyc_final['perc_non_ess'],y=nyc_final['percent_pos'], mode="markers",opacity=0.5,text=nyc_final['ZCTA']), row=1, col=3)
fig.add_trace(go.Scatter(x=nyc_final['perc_non_ess'], y=nyc_final['best_fit_non_ess'],mode='lines',text='r= ' + np.str(non_ess_score)), row=1, col=3)




fig.update_layout(title='Prevalence of Job Type and Percentage of Positive Tests',
                 showlegend=False)

fig.update_xaxes(title_text="Percentage WFH", row=1, col=1,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Non-WFH, Essential", row=1, col=2,tickfont={"size":10},titlefont={"size":10},title_standoff=0)
fig.update_xaxes(title_text="Percentage Non-WFH, Non-Essential", row=1, col=3,tickfont={"size":10},titlefont={"size":10},title_standoff=0)

fig.update_yaxes(title_text="Percent Positive",row=1,col=1,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=1,col=2,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
fig.update_yaxes(title_text="Percent Positive",row=1,col=3,titlefont={"size":10},title_standoff=0,tickfont={"size":10})
                 
for i in fig['layout']['annotations']:
    i['font'] = dict(size=12)

fig.show()

In [None]:
#Export above figure to an .html file for hosting.
pio.write_html(fig, file='job-percentage.html', auto_open=False)

In [None]:
## plotting positive cases against percent retail workers (i can't tell which is better for x-axis)
ax = sns.regplot('Finance and Insurance', 'percent_pos', label ="finance", data=nyc_final, scatter_kws={"s": 20})
ax = sns.regplot('Health Care and Social Assistance', 'percent_pos', label ="healthcare", data=nyc_final, scatter_kws={"s": 20})
ax.legend();


In [None]:
## blue collar vs covid 
ax = sns.regplot('perc_blue', 'percent_pos', label='blue collar', data=nyc_cat, scatter_kws={"s": 20})
ax = sns.regplot('perc_white', 'percent_pos', label='white collar', data=nyc_cat, scatter_kws={"s": 20})
ax.legend();


### Correlation Matrix

In [None]:
nyc_corr_matrix = nyc_final.loc[:,['percent_pos','median_inc','perc_white','perc_black','perc_native','perc_asian','perc_hawaiian','perc_other','perc_two','perc_wfh','perc_ess','perc_non_ess']]
nyc_corr_matrix = nyc_corr_matrix.corr()

nyc_corr_matrix = nyc_corr_matrix.where(np.tril(np.ones(nyc_corr_matrix.shape)).astype(np.bool))
nyc_corr_matrix

In [None]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio

heat = go.Heatmap(z=nyc_corr_matrix,
                  x=nyc_corr_matrix.columns.values,
                  y=nyc_corr_matrix.columns.values,
                  xgap=1, ygap=1,
                  colorscale='RdBu',
                  colorbar_thickness=20,
                  colorbar_ticklen=3,
                   )


title = 'Correlation Matrix'               

layout = go.Layout(title_text=title, title_x=0, 
                   width=600, height=600,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed',
                   plot_bgcolor='white'
            )
   
fig=go.Figure(data=[heat], layout=layout)  
fig.show() 

In [None]:
#Export above figure to an .html file for hosting.
pio.write_html(fig, file='corr-matrix.html', auto_open=False)

### Hospital Analysis

In [None]:
nyc_hospitals.dropna(subset=['Postcode'],inplace=True)
nyc_hospitals['Postcode'] = nyc_hospitals['Postcode'].apply(int)

In [None]:
vals = []
for i in nyc_cat['ZCTA']:
    if i in nyc_hospitals['Postcode'].values:
        vals.append(1)
    else:
        vals.append(0)

nyc_cat['hospital'] = vals
nyc_cat.head()

In [None]:
hospital_avg = np.mean(nyc_cat[nyc_cat['hospital'] == 1]['percent_pos'])
no_hospital_avg = np.mean(nyc_cat[nyc_cat['hospital'] == 0]['percent_pos'])

vals2 = []

for i in nyc_cat['hospital']:
    if i == 1:
        vals2.append(hospital_avg)
    else:
        vals2.append(no_hospital_avg)
        
nyc_cat['avg_for_plotting'] = vals2
nyc_cat.head()

In [None]:
import plotly
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=nyc_cat[nyc_cat['hospital'] == 1]['perc_minority'],
                y=nyc_cat[nyc_cat['hospital'] == 1]['percent_pos'],
                text=nyc_cat['ZCTA'],
                name='Health Center Present',
                mode='markers',
                marker=dict(
                    size=10,
                    opacity=0.5,
                    color='blue'),
                legendgroup='Health Center Present',
                ))

fig.add_trace(go.Scatter(
        x=[0,1],
        y=[hospital_avg, hospital_avg],
        mode='lines',
        name='Health Center Present Avg',
        text='Health Center Present Avg',
        legendgroup='Health Center Present',
        line=dict(
            color='blue'),
        showlegend=False))

fig.add_trace(go.Scatter(x=nyc_cat[nyc_cat['hospital'] == 0]['perc_minority'],
                y=nyc_cat[nyc_cat['hospital'] == 0]['percent_pos'],
                text=nyc_cat['ZCTA'],
                name='No Health Center Present',
                mode='markers',
                marker=dict(
                    size=10,
                    opacity=0.5,
                    color='red'),
                legendgroup='No Health Center Present'
                ))


fig.add_trace(go.Scatter(
        x=[0,1],
        y=[no_hospital_avg, no_hospital_avg],
        mode='lines',
        name='No Health Center Present Avg',
        text='No Health Center Present Avg',
        legendgroup='No Health Center Present',
        line=dict(
            color='red'),
        showlegend=False))

fig.update_layout(title='Percent Minority, Percentage of Positive Tests, Presence of a Health Center',
                 xaxis_title='Percent Minority',
                 yaxis_title='Percentage of Positive Tests',
                 showlegend=True)

fig.show()

In [None]:
#Export above figure to an .html file for hosting.
pio.write_html(fig, file='hospital-percentage.html', auto_open=False)