# Testing your environment

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.scatter('A', 'B', data=pd.DataFrame(np.random.rand(10, 2), columns=['A', 'B']))

# Data download

In [None]:
from tools import download_file, unzip, ResourceMonitor
mon = ResourceMonitor()
mon.start()
mon.tag('download')
url = 'https://uconn-my.sharepoint.com/:u:/g/personal/anton_babkin_uconn_edu/EcVkrfE6Y_FDnmaYziKfjscBa2Rd0KGrXR_dV5vlm2Ql5Q?e=gTBcYJ&download=1'
file = download_file(url, 'data', 'synig.zip', overwrite=True)
mon.tag('unzip')
unzip(file, 'data')
mon.tag('done')
mon.stop()
mon.plot()

# Data preview

In [None]:
import pandas as pd
df = pd.read_csv('data/synig/2020.csv', nrows=100_000)
df.sample(5)

# GIS

Plot sample of businsses over Census tracts in Wisconsin.

*Some businesses may be out of state or even out of land because coourdinates are with added noise.*

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from tools import download_file, unzip, state_aa_00

state = 'WI'
df = pd.read_csv('data/synig/2020.csv')
df = df[df['STATE'] == state]
df = gpd.GeoDataFrame(df)
df['LONLAT'] = gpd.points_from_xy(df['LONGITUDE'], df['LATITUDE'])
df = df.set_geometry('LONLAT', crs={'init': 'epsg:4326'}).to_crs({'init': 'epsg:6579'})

state_code = state_aa_00[state]
url = f'https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_{state_code}_tract_500k.zip'
f = download_file(url, f'data/tracts/{state_code}', overwrite=True)
unzip(f, f'data/tracts/{state_code}', overwrite=True)
tracts = gpd.read_file(f'data/tracts/{state_code}/cb_2019_{state_code}_tract_500k.shp').to_crs({'init': 'epsg:6579'})

fig, ax = plt.subplots(figsize=(10, 10))
tracts.to_crs({'init': 'epsg:6579'}).plot(ax=ax, alpha=0.6, edgecolor='gray')
df.sample(1000).plot(ax=ax, markersize=2, color='red')
ax.axis('off')