In [1]:
import pandas as pd
import numpy as np
import os
import geopandas
import random
from bokeh.palettes import Spectral11
from sklearn import preprocessing

In [2]:
%load_ext lab_black
%matplotlib inline

In [3]:
basepath = os.environ["SCRATCH"]
input_csv = "covid-map/covid-19-count/us-counties.csv"
input_csv_path = os.path.join(basepath, input_csv)
input_geojson = "twitter-action/depression/county-map/20200410.geojson"
input_geojson_path = os.path.join(basepath, input_geojson)

df = pd.read_csv(input_csv_path)
gdf = geopandas.read_file(input_geojson_path)

for date in df.groupby("date"):
    if date[0] == "2020-04-10":
        df_t = date[1]

def write_geojson(filename, gdf_t):
    gdf_t = geopandas.GeoDataFrame(gdf_t, geometry=gdf_t.geometry)
    gdf_t.to_file(filename, driver="GeoJSON")

df_t["cnty_fips"] = df_t["fips"]
df_t = df_t.drop(columns="fips")
gdf_t = pd.merge(gdf, df_t, on=["cnty_fips"])
print(gdf_t.shape)
cases_var = np.var(gdf_t["cases"])
print(cases_var)
gdf_t = gdf_t[gdf_t["avg_stress"].notnull()]
gdf_t = gdf_t[gdf_t["cases"].notnull()]
print(gdf_t.shape)

def foo(x, y):
    bias = random.uniform(1.2, 1.3)
    yprecent = round(y / sum_cases, 4)
    result = (bias + yprecent) * x
    result_log = np.log2(result + 1)
    # print(x, yprecent, y, result, result_log)
    if result_log is not None:
        return result_log
    else:
        return x

sum_cases = sum(gdf_t["cases"])
gdf_t["log_stress"] = gdf_t[["avg_stress", "cases"]].apply(lambda x: foo(*x), axis=1)

(2191, 14)
983471.0666027009
(2191, 14)


In [17]:
gdf_t["stadnardization_avg_stress"] = preprocessing.scale(gdf_t["avg_stress"])
gdf_t["stadnardization_cases"] = preprocessing.scale(gdf_t["cases"])
gdf_t["stadnardization_log_stress"] = preprocessing.scale(gdf_t["log_stress"])

gdf_t["normalize_log_stress"] = preprocessing.normalize(
    gdf_t["log_stress"].values[:, np.newaxis], axis=0, norm="l2"
).ravel()

gdf_t["normalize_cases"] = preprocessing.normalize(
    gdf_t["cases"].values[:, np.newaxis], axis=0, norm="l2"
).ravel()

In [19]:
from bokeh.models import FactorRange
from bokeh.transform import factor_cmap

from bokeh.palettes import Spectral11
from numpy import histogram, linspace
from bokeh.models import Legend, LegendItem
from bokeh.plotting import figure, output_file, show

def plot_line():
    output_file("line.html")

    p = figure(
        title="avg_stress level county",
        x_axis_label="cnty",
        y_axis_label="avg_stress",
        plot_width=1000,
        plot_height=500,
    )

    r = p.multi_line(
        xs=[gdf_t.cnty_fips, gdf_t.cnty_fips],
        ys=[gdf_t.stadnardization_avg_stress, gdf_t.stadnardization_cases],
        color=["orange", "blue"],
        line_width=1,
    )

    legend = Legend(
        items=[
            LegendItem(label="stress", renderers=[r], index=0),
            LegendItem(label="cases", renderers=[r], index=1),
        ]
    )
    p.add_layout(legend)

    show(p)

def plot_hist():
    output_file("hist.html")

    p = figure(
        title="stress level county",
        x_axis_label="cnty",
        y_axis_label="cases",
        plot_width=1000,
        plot_height=500,
    )

    p.vbar(
        x=gdf_t.cnty_fips,
        top=gdf_t.stadnardization_cases,
        width=0.9,
        alpha=0.5,
        color="firebrick",
    )

    p.line(
        x=gdf_t.cnty_fips,
        y=gdf_t.stadnardization_log_stress,
        color="blue",
        line_width=1,
    )
    # p.y_range.start = 0
    # p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1.2
    p.xgrid.grid_line_color = None

    show(p)


def plot_point():
    output_file("point.html")

    p = figure(
        title="stress level county",
        x_axis_label="cnty",
        y_axis_label="cases",
        plot_width=1200,
        plot_height=500,
    )

    p.vbar(
        x=gdf_t.cnty_fips,
        top=gdf_t.normalize_cases,
        width=0.9,
        alpha=0.5,
        color="firebrick",
    )
    p.circle(
        x=gdf_t.cnty_fips, y=gdf_t.normalize_log_stress, size=1, color="navy", alpha=0.5
    )
    # p.y_range.start = 0
    # p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1.2
    p.xgrid.grid_line_color = None

    show(p)


plot_point()

In [156]:
gdf_t.describe()

Unnamed: 0,cnty_fips,avg_stress,cases,deaths,log_stress,stadnardization_avg_stress,stadnardization_cases,stadnardization_log_cases
count,2191.0,2191.0,2191.0,2191.0,2191.0,2191.0,2191.0,2191.0
mean,30341.008672,0.159236,179.060703,5.770881,0.253861,6.080637e-18,9.72902e-18,1.61745e-16
std,15358.630736,0.108955,991.927487,34.722139,0.151766,1.000228,1.000228,1.000228
min,1001.0,0.001852,1.0,0.0,0.003469,-1.444819,-0.1795508,-1.650232
25%,18096.0,0.093184,4.0,0.0,0.158713,-0.6063721,-0.1765257,-0.6270817
50%,29119.0,0.148331,14.0,0.0,0.245169,-0.1001131,-0.166442,-0.0572866
75%,45039.0,0.197637,55.0,2.0,0.318752,0.3525304,-0.1250989,0.4276749
max,56041.0,0.974001,21512.0,890.0,1.147588,7.479725,21.51146,5.890207


983471.0666027009

In [18]:
gdf_t.head()

Unnamed: 0,FID,cnty_fips,state_name,state_fips,cnty_name,state_abbr,avg_stress,date_x,geometry,date_y,county,state,cases,deaths,log_stress,stadnardization_avg_stress,stadnardization_cases,stadnardization_log_stress,normalize_log_stress,normalize_cases
0,1,1001.0,Alabama,1,Autauga,AL,0.210742,2020-04-10T00:00:00,"POLYGON ((-86.82067 32.34731, -86.81446 32.370...",2020-04-10,Autauga,Alabama,17,1,0.331091,0.472833,-0.163417,0.509448,0.023922,0.00036
1,2,1003.0,Alabama,1,Baldwin,AL,0.148518,2020-04-10T00:00:00,"POLYGON ((-87.97309 31.16482, -87.93710 31.173...",2020-04-10,Baldwin,Alabama,59,1,0.250103,-0.098396,-0.121065,-0.024681,0.018071,0.001251
2,3,1005.0,Alabama,1,Barbour,AL,0.345123,2020-04-10T00:00:00,"POLYGON ((-85.74337 31.62624, -85.71720 31.679...",2020-04-10,Barbour,Alabama,9,0,0.52502,1.706486,-0.171484,1.788425,0.037934,0.000191
3,4,1007.0,Alabama,1,Bibb,AL,0.125892,2020-04-10T00:00:00,"POLYGON ((-87.41986 33.01177, -87.31532 33.012...",2020-04-10,Bibb,Alabama,11,0,0.206323,-0.306108,-0.169467,-0.313415,0.014907,0.000233
4,5,1009.0,Alabama,1,Blount,AL,0.007057,2020-04-10T00:00:00,"POLYGON ((-86.96799 33.86045, -86.92667 33.872...",2020-04-10,Blount,Alabama,12,0,0.012344,-1.397034,-0.168459,-1.592723,0.000892,0.000254


In [93]:
gdf_t['avg_stress'].corr(gdf_t['cases'])

-0.007441484788031607

In [94]:
gdf_t.head()

Unnamed: 0,cnty_fips,state_name,state_fips,cnty_name,state_abbr,avg_stress,date_x,geometry,date_y,county,state,cases,deaths
0,1001.0,Alabama,1,Autauga,AL,0.210742,2020-04-10T00:00:00,"POLYGON ((-9664832.760 3808987.250, -9664141.4...",2020-04-10,Autauga,Alabama,17,1
1,1003.0,Alabama,1,Baldwin,AL,0.148518,2020-04-10T00:00:00,"POLYGON ((-9793119.558 3654173.035, -9789113.3...",2020-04-10,Baldwin,Alabama,59,1
2,1005.0,Alabama,1,Barbour,AL,0.345123,2020-04-10T00:00:00,"POLYGON ((-9544908.599 3714348.264, -9541995.5...",2020-04-10,Barbour,Alabama,9,0
3,1007.0,Alabama,1,Bibb,AL,0.125892,2020-04-10T00:00:00,"POLYGON ((-9731533.815 3896866.747, -9719897.2...",2020-04-10,Bibb,Alabama,11,0
4,1009.0,Alabama,1,Blount,AL,0.007057,2020-04-10T00:00:00,"POLYGON ((-9681232.862 4010079.853, -9676632.1...",2020-04-10,Blount,Alabama,12,0


In [124]:
gdf_t.head()

Unnamed: 0,cnty_fips,state_name,state_fips,cnty_name,state_abbr,avg_stress,date_x,geometry,date_y,county,state,cases,deaths,stadnardization_avg_stress,stadnardization_cases,log_stress
0,1001.0,Alabama,1,Autauga,AL,0.210742,2020-04-10T00:00:00,"POLYGON ((-9664832.760 3808987.250, -9664141.4...",2020-04-10,Autauga,Alabama,17,1,0.472833,-0.163417,0.335861
1,1003.0,Alabama,1,Baldwin,AL,0.148518,2020-04-10T00:00:00,"POLYGON ((-9793119.558 3654173.035, -9789113.3...",2020-04-10,Baldwin,Alabama,59,1,-0.098396,-0.121065,0.247232
2,1005.0,Alabama,1,Barbour,AL,0.345123,2020-04-10T00:00:00,"POLYGON ((-9544908.599 3714348.264, -9541995.5...",2020-04-10,Barbour,Alabama,9,0,1.706486,-0.171484,0.52669
3,1007.0,Alabama,1,Bibb,AL,0.125892,2020-04-10T00:00:00,"POLYGON ((-9731533.815 3896866.747, -9719897.2...",2020-04-10,Bibb,Alabama,11,0,-0.306108,-0.169467,0.216307
4,1009.0,Alabama,1,Blount,AL,0.007057,2020-04-10T00:00:00,"POLYGON ((-9681232.862 4010079.853, -9676632.1...",2020-04-10,Blount,Alabama,12,0,-1.397034,-0.168459,0.01243


In [125]:
gdf_t["log_stress"].corr(gdf_t["cases"])

0.0024642084020587503

In [14]:
x[:]

array([[0.41451399],
       [0.44613434],
       [0.34188328],
       [0.4978512 ],
       [0.25342732],
       [0.38356432],
       [0.18046443],
       [0.07807096],
       [0.90059624],
       [0.69691831],
       [0.02349653],
       [0.3818002 ],
       [0.96715255],
       [0.17304226],
       [0.54639816],
       [0.74730387],
       [0.56725884],
       [0.62663772],
       [0.90470805],
       [0.72162941],
       [0.47296815],
       [0.36311922],
       [0.40999962],
       [0.57020272],
       [0.88440567],
       [0.97205121],
       [0.24270569],
       [0.85359125],
       [0.6403671 ],
       [0.13431122],
       [0.535225  ],
       [0.15309023],
       [0.5993694 ],
       [0.1629321 ],
       [0.19973918],
       [0.20833715],
       [0.77422395],
       [0.13776578],
       [0.85073541],
       [0.00113163],
       [0.84385155],
       [0.20336585],
       [0.90476601],
       [0.11553901],
       [0.86178968],
       [0.14858109],
       [0.39472922],
       [0.322