In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import pyproj
import shapely
from typing import List

In [2]:
# Set notebook display options
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [3]:
def geod2utm(row):
    '''   Convert geodetic coordinates to UTM   '''
    zn = '16T'
    datum = 'WGS84'
    lat = row['lat']
    lon = row['lon']
        
    p = pyproj.Proj(proj='utm', zone=zn, ellps=datum)
    
    X, Y = p(lon, lat)
    
    return pd.Series({'UTMx': X, 'UTMy': Y})

In [4]:
def make_utm_points(row):
    UTMx = row['UTMx']
    UTMy = row['UTMy']
    UTMPoint = shapely.geometry.Point(UTMx, UTMy)
    return UTMPoint

# Read in summary data from the 2010 census

In [5]:
census_summary_df = pd.read_csv('data/2010-census-summary.csv')

In [6]:
for i in range(67,129):
    del census_summary_df['Unnamed: {0}'.format(i)]

# Read in shapes of community areas

In [7]:
communities = gpd.read_file('data/community-areas.geojson')

In [8]:
census_summary_df.to_pickle('data/census_summary-cleaned.pkl')

In [9]:
communities_df = pd.read_json('data/community-areas.geojson')

In [10]:
community_names: List[str] = []
community_area_numbers: List[int] = []
community_geometries: List[shapely.geometry.Polygon] = []
community_modeled: pd.DataFrame = pd.DataFrame([{'community name': 'DELETE THIS',
                                                 'community number': 0,
                                                 'UTMGeometry': 'DELETE THIS'
                                                }])

In [11]:
del community_modeled
# community_modeled_dict: dict = {}
community_names: list = []
community_area_numbers: list = []
community_geometries:list = []
for feature in communities_df['features']:
    community_poly_points: list = []
    community_names.append(feature['properties']['community'])
    community_area_numbers.append(feature['properties']['area_numbe'])
    for point in feature['geometry']['coordinates'][0][0]:
        community_poly_points.append((geod2utm(pd.Series({'lon': point[0], 'lat': point[1]}))))
    community_geometries.append(shapely.geometry.Polygon(community_poly_points))

In [12]:
community_modeled = pd.DataFrame({'community name': community_names,
                                  'community number': community_area_numbers,
                                  'UTMGeometry': community_geometries},
                                 index=range(len(community_names)))

In [15]:
community_modeled['community number'] = community_modeled['community number'].astype('int64')

In [16]:
print(community_modeled.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 3 columns):
community name      77 non-null object
community number    77 non-null int64
UTMGeometry         77 non-null object
dtypes: int64(1), object(2)
memory usage: 1.9+ KB
None


In [17]:
community_modeled.to_pickle('data/community_modeled.pkl')