# **Adding Data Based Solely on Charger Addresses (Deprecated)**
### Dan Rabinovich

In [None]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np

1. Read in addresses, census data, EV cars by zipcode
2. Sort census data by tract, EV cars by zipcode
3. Map correctly

In [None]:
# Import DataFrames
stations_url = 'https://github.com/d-rabinovich/EV-Charger-Project/blob/main/addresses.csv?raw=true'
census_url = 'https://github.com/d-rabinovich/EV-Charger-Project/blob/main/aggregated_census_data.csv?raw=true'
ev_cars_url = 'https://data.ca.gov/dataset/15179472-adeb-4df6-920a-20640d02b08c/resource/d304108a-06c1-462f-a144-981dd0109900/download/vehicle-fuel-type-count-by-zip-code.csv'

df_stations = pd.read_csv(stations_url, encoding = "ISO-8859-1")
df_census = pd.read_csv(census_url, encoding = "ISO-8859-1")
df_cars_raw = pd.read_csv(ev_cars_url, low_memory=False)

ev_types = ['Battery Electric', 'Plug-in Hybrid']
df_cars_almost = df_cars_raw[df_cars_raw.Fuel.isin(ev_types)]
df_cars = df_cars_almost.groupby('Zip Code')['Vehicles'].sum().drop(labels=['OOS', 'Other']).reset_index(name='Vehicles')

In [None]:
print(df_stations.loc[1,:])
print(df_cars.loc[1,:])


Unnamed: 0                                  1
Station                             AUI-10263
Address       1231 S Hill St, Los Angeles, CA
lat                                   34.0385
lon                                  -118.262
zipcode                            90015-3018
Geo_FIPS                           6037207900
Name: 1, dtype: object
Zip Code    90001
Vehicles       56
Name: 1, dtype: object


In [None]:
FIPS_List = np.array(df_census['Geo_FIPS'])
zip_List = np.array(df_cars['Zip Code'])
concentrated_disadvantage, commute_time = ([], [])
median_household_income, cars_per_home, ev_cars = ([], [], [])
total_population, pop_below_18, pop_between_18_34, pop_between_34_65, pop_over_65 = ([], [], [], [], [])
for ind, row in tqdm(df_stations.iterrows(), total=df_stations.shape[0]):
  f_result = np.where(FIPS_List == row['Geo_FIPS']) 
  f_idx = f_result[0][0]
  concentrated_disadvantage.append(df_census['concentrated_disadvantage'][f_idx])
  commute_time.append(df_census['commute_time'][f_idx])
  cars_per_home.append(df_census['cars_per_home'][f_idx])
  median_household_income.append(df_census['median_household_income'][f_idx])
  total_population.append(df_census['total_population'][f_idx])
  pop_below_18.append(df_census['pop_below_18'][f_idx])
  pop_between_18_34.append(df_census['pop_between_18_34'][f_idx])
  pop_between_34_65.append(df_census['pop_between_34_65'][f_idx])
  pop_over_65.append(df_census['pop_over_65'][f_idx])

  z_result = np.where(zip_List == row['zipcode'].split("-")[0])
  try:
    z_idx = z_result[0][0]
    ev_cars.append(df_cars['Vehicles'][z_idx])
  except:
    ev_cars.append(0)

df_stations['concentrated_disadvantage'] = concentrated_disadvantage
df_stations['commute_time'] = commute_time
df_stations['cars_per_home'] = cars_per_home
df_stations['median_household_income'] = median_household_income
df_stations['ev_car_zip_count'] = ev_cars 

df_stations['total_population'] = total_population
df_stations['pop_below_18'] = pop_below_18
df_stations['pop_between_18_34'] = pop_between_18_34
df_stations['pop_between_34_65'] = pop_between_34_65
df_stations['pop_over_65'] = pop_over_65

df_stations.drop(columns='Unnamed: 0', inplace=True)

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))


56708.0
0.14936149341820282


In [None]:
print(df_stations)
print(list(df_stations))
print(df_stations.loc[1,:])

       Station  ... pop_over_65
0    AUI-10372  ...         959
1    AUI-10263  ...         335
2    AUI-10254  ...          80
3    AUI-10265  ...         506
4    AUI-10218  ...         952
..         ...  ...         ...
172  AUI-10358  ...         588
173  AUI-10361  ...         256
174  AUI-10362  ...         570
175  AUI-10359  ...         519
176  AUI-10261  ...         799

[177 rows x 16 columns]
['Station', 'Address', 'lat', 'lon', 'zipcode', 'Geo_FIPS', 'concentrated_disadvantage', 'commute_time', 'cars_per_home', 'median_household_income', 'ev_car_zip_count', 'total_population', 'pop_below_18', 'pop_between_18_34', 'pop_between_34_65', 'pop_over_65']
Station                                            AUI-10263
Address                      1231 S Hill St, Los Angeles, CA
lat                                                  34.0385
lon                                                 -118.262
zipcode                                           90015-3018
Geo_FIPS                

In [None]:
from google.colab import files
df_stations.to_csv('stations_with_features.csv') 
files.download('stations_with_features.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Combine Features and Labels

In [None]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np

features_url = 'https://github.com/d-rabinovich/EV-Charger-Project/blob/main/stations_with_features.csv?raw=true'
labels_url = 'https://raw.githubusercontent.com/amberguo/ev-charger/main/Flo_Parks_Utilization_Report_addresses.csv?raw=true'

In [None]:
features_raw = pd.read_csv(features_url, encoding = "ISO-8859-1")
labels_raw = pd.read_csv(labels_url, encoding = "ISO-8859-1")

features_cols = ['Station', 'lat', 'lon', 'zipcode', 'Geo_FIPS', 
                 'concentrated_disadvantage', 'commute_time', 
                 'cars_per_home', 'median_household_income',
                 'ev_car_zip_count']
labels_cols = ['Station', 'Total Sessions', 'Total Connection Time', 
               'Total Amount', 'Total kWh']

features = features_raw[features_cols]
labels = labels_raw[labels_cols]

# clean up station id
labels['Station'] = labels['Station'].str.split("\"").str[1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
labels.drop(columns='Station', inplace=True)

ur good


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
# Combine the two
df = pd.concat([features, labels], axis = 1)
print(list(df))
print(df.loc[1,:])

['Station', 'lat', 'lon', 'zipcode', 'Geo_FIPS', 'concentrated_disadvantage', 'commute_time', 'cars_per_home', 'median_household_income', 'ev_car_zip_count', 'Total Sessions', 'Total Connection Time', 'Total Amount', 'Total kWh']
Station                       AUI-10263
lat                             34.0385
lon                            -118.262
zipcode                      90015-3018
Geo_FIPS                     6037207900
concentrated_disadvantage     -0.116582
commute_time                         29
cars_per_home                 0.0944039
median_household_income           88800
ev_car_zip_count                    224
Total Sessions                      159
Total Connection Time            542:59
Total Amount                      67.38
Total kWh                       2846.12
Name: 1, dtype: object


In [None]:
from google.colab import files
df.to_csv('stations_with_features_and_outcomes.csv') 
files.download('stations_with_features_and_outcomes.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **Circular Geo-Analysis**
### Dan Rabinovich



Below is the code Parthik put into the census slides

*   **swifter**: Applies functions to pandas dfs quickly
*   **functools.partial**: Creates a "callable" that freezes some of the arguments of a function in place. 
  * e.g. You can create a partial of a function w/ 2 args and you only have to supply one arg to the partial. 
*   **pyproj**: Python interface with PROJ, a cartography API
*   **lambda row**: Still uncertain, but this seems to apply an *unnamed* function to a dataframe, rather than say np.sqrt( )
  * The function comes after the colon

## Making Polygons of Coordinates

The code in this subsection is from the Census Tutorial slides with the dataframe names changed

In [None]:
%%time

!pip install swifter
!pip install pyproj

import swifter 
from functools import partial
import pyproj 
from shapely.ops import transform
from shapely.geometry import Point
import pandas as pd
import numpy as np
import geopy

In [None]:
stations_url = 'https://github.com/d-rabinovich/EV-Charger-Project/blob/main/addresses.csv?raw=true'

df_stations = pd.read_csv(stations_url, encoding = "ISO-8859-1")

In [None]:
proj_wgs84 = pyproj.Proj(init='epsg:4326')

radius_km = 1 # Change this to what we want

def geodesic_point_buffer(lat, lon, km):
  # Azimuthal equidistant projection
  aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
  project = partial(
      pyproj.transform,
      pyproj.Proj(aeqd_proj.format(lat=lat, lon=lon)),
      proj_wgs84)
  buf = Point(0,0).buffer(km*1000) # Buffer takes meters
  return transform(project, buf).exterior.coords[:]

# Flo_parks_utliziation_report.csv
df_stations['polygon'] =\
  df_stations.swifter.apply(lambda row: geodesic_point_buffer(row['lat'], row['lon'], radius_km), 
                                                            axis=1)



In [None]:
%%time 
from shapely.geometry import Polygon

def check_point_in_polygon(lons_lats_vect, x, y):
  polygon = Polygon(lons_lats_vect) # Creates polygon
  point = Point(y, x) # Creates point
  return polygon.contains(point) or polygon.touches(point)

CPU times: user 13 µs, sys: 2 µs, total: 15 µs
Wall time: 18.4 µs


* Convert polygons to lists of tracts and zipcodes
* Get counts for each tract and zipcode
* Take weighted mean of features


## Convert to Census Tracts



In [None]:
import pandas as pd # this is so we can run bookmark alone
import json
from urllib.request import urlopen
import time
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import MultiPolygon

In [None]:
# https://geohub.lacity.org/datasets/census-tracts-2010-population/data?geometry=-120.029%2C33.595%2C-116.560%2C34.392
la_tracts_url = 'http://s3-us-west-2.amazonaws.com/boundaries.latimes.com/archive/1.0/boundary-set/census-tracts-2012.geojson'
response = urlopen(la_tracts_url)
la_tracts = json.loads(response.read())

In [None]:
import time
from tqdm.notebook import tqdm

# Adds a column as a feature
# (tract id, overlap as a proportion of tract, overlap as a proportion of charger's circle)
start_time = time.time()
tract_hits_list = []
for ind, row in tqdm(df_stations.iterrows(), total=df_stations.shape[0]):
  poly = pd.Series(row['polygon'])
  Poly = Polygon(poly)
  p_area = Poly.area
  tract_hits = []
  counter = 0

  o_area = 0
  t_area = 0

  for obj in la_tracts["features"]: # Creates list of census tracts
    tract_id = float("06037" + str(obj['properties']['metadata']['TRACTCE']))
    tracts = []
    if obj['geometry']['type'] == "MultiPolygon":
      
      for polyg in obj['geometry']['coordinates']:
        tracts.append(Polygon(polyg[0]))
    else:
      tracts.append(Polygon(obj['geometry']['coordinates'][0]))
    
    o_area = 0
    t_area = 0
    for tract in tracts:
      overlap = tract.intersection(Poly)
      o_area += overlap.area
      t_area += tract.area
    if o_area != 0:
      tract_hits.append((float(tract_id), (o_area / t_area), (o_area / p_area)))
  tract_hits_list.append(tract_hits)
df_stations['tract_hits'] = tract_hits_list

total_time = time.time() - start_time

print(total_time)

  

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))


23.421643257141113


## Visualizing radial geonalaysis



First I will find a charger with only a few overlaps to make it simpler

In [None]:
tract_vis = df_stations['tract_hits']
print(type(tract_vis))
for ind, val in tract_vis.iteritems():
  print(f"{ind}: {len(val)}")

<class 'pandas.core.series.Series'>
0: 10
1: 7
2: 7
3: 13
4: 9
5: 8
6: 7
7: 11
8: 14
9: 6
10: 4
11: 5
12: 8
13: 4
14: 8
15: 9
16: 7
17: 8
18: 6
19: 6
20: 4
21: 6
22: 7
23: 9
24: 9
25: 12
26: 12
27: 7
28: 6
29: 9
30: 7
31: 9
32: 14
33: 23
34: 6
35: 4
36: 6
37: 5
38: 12
39: 23
40: 10
41: 10
42: 13
43: 8
44: 6
45: 12
46: 10
47: 6
48: 13
49: 8
50: 7
51: 9
52: 14
53: 12
54: 6
55: 10
56: 8
57: 9
58: 13
59: 8
60: 10
61: 9
62: 7
63: 6
64: 5
65: 6
66: 8
67: 3
68: 8
69: 10
70: 8
71: 15
72: 12
73: 6
74: 9
75: 7
76: 4
77: 11
78: 7
79: 8
80: 5
81: 12
82: 7
83: 7
84: 11
85: 12
86: 9
87: 5
88: 8
89: 18
90: 8
91: 7
92: 11
93: 12
94: 5
95: 7
96: 3
97: 5
98: 7
99: 6
100: 6
101: 9
102: 4
103: 9
104: 8
105: 4
106: 14
107: 7
108: 8
109: 11
110: 5
111: 11
112: 8
113: 12
114: 10
115: 10
116: 12
117: 9
118: 6
119: 6
120: 10
121: 11
122: 14
123: 10
124: 9
125: 15
126: 20
127: 20
128: 18
129: 13
130: 10
131: 15
132: 10
133: 11
134: 23
135: 6
136: 8
137: 7
138: 6
139: 10
140: 6
141: 8
142: 9
143: 8
144: 3
145: 9

In [None]:
for ind, val in df_stations['Address'].iteritems():
  print(ind, ": ", val)

0 :  2316 W Martin Luther King Jr Blvd, Los Angeles, CA
1 :  1231 S Hill St, Los Angeles, CA
2 :  942 S Crocker St, Los Angeles, CA
3 :  5764 S Vermont Ave, Los Angeles, CA
4 :  7150 N Shoup Ave, Los Angeles, CA
5 :  20771 W Sherman Way, Los Angeles, CA
6 :  145 E Olympic Blvd, Los Angeles, CA
7 :  1222 S Grand Ave, Los Angeles, CA
8 :  1676 N Hobart Blvd, Los Angeles, CA
9 :  5617 N Laurel Canyon Blvd, Los Angeles, CA
10 :  4977 N Genesta Ave, Los Angeles, CA
11 :  4882 N Libbit Ave, Los Angeles, CA
12 :  5093 Alonzo Ave, Los Angeles, CA
13 :  20217 Prairie St, Los Angeles, CA
14 :  25825 Normandie Ave, Los Angeles, CA
15 :  25820 S Western Ave, Los Angeles, CA
16 :  1557 W 19th St, Los Angeles, CA
17 :  12412 W Moorpark St, Los Angeles, CA
18 :  23743 W Roscoe Blvd, Los Angeles, CA
19 :  17729 Chatsworth Ave, Los Angeles, CA
20 :  6710 Platt Ave, Los Angeles, CA
21 :  290 N Mission Road, Los Angeles, CA
22 :  17832 Plummer Street, Los Angeles, CA
23 :  20321 W Saticoy St, Los Angeles

In [None]:
print(df_stations.columns)

Index(['Station', 'Address', 'lat', 'lon', 'zipcode', 'Geo_FIPS', 'polygon',
       'tract_hits'],
      dtype='object')


In [None]:
import folium
import json
from folium import plugins

la_tracts_url = 'http://boundaries.latimes.com/1.0/boundary-set/census-tracts-2012/?format=geojson'

In [None]:
# 6710 Platt Ave, Los Angeles, CA
charger = df_stations.iloc[20]
# charger = df_stations.iloc[5]
coordinates = [charger['lat'], charger['lon']]

In [None]:
print(len(la_tracts))
print(charger['Address'])

2
20771 W Sherman Way, Los Angeles, CA


In [None]:
poly = pd.Series(charger['polygon'])
Poly = Polygon(poly)
overlap_list = []
counter = 0
for obj in la_tracts["features"]:
  tract_id = float("06037" + str(obj['properties']['metadata']['TRACTCE']))
  tracts = []
  
  if obj['geometry']['type'] == "MultiPolygon":
    for polyg in obj['geometry']['coordinates']:
      tracts.append(Polygon(polyg[0]))
  else:
    tracts.append(Polygon(obj['geometry']['coordinates'][0]))
  
  for tract in tracts:
    count = 0
    if tract.intersects(Poly):
      points = []
      for point in poly:
        if tract.contains(Point(point)):
          count += 1
    overlap_poly = tract.intersection(Poly)
    overlap_area = overlap_poly.area
    if overlap_area != 0:
      overlap_list.append((float(tract_id), list(overlap_poly.exterior.coords[:-1])))

  counter += 1

print(overlap_list)
print(len(overlap_list))

[(6037127102.0, [(-118.44876, 34.203015), (-118.44870344472112, 34.209393994766934), (-118.44808339312604, 34.20915032336036), (-118.44717006231593, 34.20869545995472), (-118.4463148040251, 34.20816840525995), (-118.44552585580422, 34.207574236379436), (-118.44481081597463, 34.20691867681391), (-118.44417657044012, 34.206208041289834), (-118.44362922638423, 34.20544917489406), (-118.44317405349119, 34.20464938710275), (-118.4428154332553, 34.203816381341895), (-118.44255681686417, 34.20295818075938), (-118.442400692059, 34.2020830509252), (-118.44234855928666, 34.201199420205334), (-118.44234878237172, 34.20119565530833), (-118.448758, 34.201192)]), (6037127210.0, [(-118.44874025315963, 34.20940845995669), (-118.448764, 34.208458), (-118.457465, 34.208452), (-118.457459, 34.201069), (-118.46404044489688, 34.20106447874177), (-118.46404844071334, 34.201199420205334), (-118.463996307941, 34.202083050925204), (-118.46384018313583, 34.20295818075938), (-118.4635815667447, 34.20381638134189

In [None]:
def switch_lat_lon(coords_list):
  ret_list = []
  for coords in coords_list:
    ret_list.append((coords[1], coords[0]))
  return ret_list

In [None]:
la_map = folium.Map(location=coordinates, tiles='Stamen Toner', zoom_start=14.5)

# folium.GeoJson(
#     la_tracts_url,
# ).add_to(la_map)

folium.Circle(
    radius=20,
    location=coordinates,
    popup='Charger',
    color='crimson',
    fill=True,
).add_to(la_map)

folium.Circle(
    radius=1000,
    location=coordinates,
    color='black',
    # fill=True,
).add_to(la_map)

color_list = ['#3cb44b', '#42d4f4', '#e6194B', '#dcbeff', '#4363d8', '#f58231', '#800000', '#ffe119']
for i in range(len(overlap_list)):
  c_color = color_list[i]
  folium.Polygon(
      locations=switch_lat_lon(overlap_list[i][1]),
      color=c_color,
      fill=True,
    ).add_to(la_map)

In [None]:
la_map

In [None]:
print(df_stations.iloc[0])

Station                                               AUI-10372
Address       2316 W Martin Luther King Jr Blvd, Los Angeles...
lat                                                     34.0107
lon                                                    -118.319
zipcode                                                   90008
Geo_FIPS                                             6037234000
polygon       [(-118.30805574691972, 34.010719223608945), (-...
tract_hits    [(6037219020.0, 0.0909026365382895, 0.02093580...
Name: 0, dtype: object


## Convert to Zipcodes

In [None]:
la_zips_url = 'https://raw.githubusercontent.com/ritvikmath/StarbucksStoreScraping/master/laZips.geojson'
response = urlopen(la_zips_url)
la_zips = json.loads(response.read())

In [None]:
zips_hits_list = []
for ind, row in tqdm(df_stations.iterrows(), total=df_stations.shape[0]):
  poly = pd.Series(row['polygon'])
  Poly = Polygon(poly)
  zips_hits = []
  for obj in la_zips["features"]:
    zipcode = obj['properties']['zipcode']
    zipcodes = []
    if obj['geometry']['type'] == "MultiPolygon":
      for polyg in obj['geometry']['coordinates']:
        zipcodes.append(Polygon(polyg[0]))
    else:
      zipcodes.append(Polygon(obj['geometry']['coordinates'][0]))

    o_area = 0
    t_area = 0
    for zip in zipcodes:
      if zip.is_valid == False:
        zip = zip.buffer(0)
      overlap = zip.intersection(Poly)
      o_area += overlap.area
      t_area += zip.area
    if o_area != 0:
      zips_hits.append((float(zipcode), (o_area / t_area))) # We only have an absolute feature 
  zips_hits_list.append(zips_hits)
df_stations['zip_hits'] = zips_hits_list

total_time = time.time() - start_time
print(total_time)

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))


94.95018529891968


## Split Relative and Absolute Features

In [None]:
census_url = 'https://github.com/d-rabinovich/EV-Charger-Project/blob/main/cleaned_census_data_county.csv?raw=true'
df_census = pd.read_csv(census_url, encoding = "ISO-8859-1")
df_census.drop(columns='Unnamed: 0', inplace=True)

In [None]:
# Create lists of names, indices, and new names for relative and absolute features

print(type(df_census.columns))
relative_indices, absolute_indices = ([], [])
relative_names, absolute_names = ([], [])
for ind, val in pd.Series(df_census.columns).iteritems():
  if val[0] == 'r':
    relative_indices.append(ind)
    relative_names.append(val[2:])
  elif val[0] == 'a':
    absolute_indices.append(ind)
    absolute_names.append(val[2:])
new_relative_names = [name + '_' + str(radius_km) + '_km' for name in relative_names]
new_absolute_names = [name + '_' + str(radius_km) + '_km' for name in absolute_names]
num_absolute_features = len(new_absolute_names)
num_relative_features = len(new_relative_names)

<class 'pandas.core.indexes.base.Index'>


## Convert Census Tract Hits to Features


In [None]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

In [None]:
FIPS_List = np.array(df_census['Geo_FIPS'])
absolute_feature_lists, relative_feature_lists = ([], [])
for i in range(num_absolute_features):
  absolute_feature_lists.append([])
for i in range(num_relative_features):
  relative_feature_lists.append([])

for index, row in tqdm(df_stations.iterrows(), total=df_stations.shape[0]):
  tract_hits = pd.Series(row['tract_hits'])
  absolute_features, relative_features = ([], [])
  for i in range(num_absolute_features):
    absolute_features.append([])
  for i in range(num_relative_features):
    relative_features.append([])

  for ind, val in tract_hits.iteritems():
    f_result = np.where(FIPS_List == val[0])
    found = True
    try:
      f_idx = f_result[0][0]
    except:
      found = False
    i = 0
    while (i < val[1]) and found:
      for j in range(num_absolute_features):
        absolute_features[j].append(val[1] * (df_census.iloc[f_idx, absolute_indices[j]]))
      for j in range(num_relative_features):
        relative_features[j].append(val[2] * (df_census.iloc[f_idx, relative_indices[j]]))
      i += 1

  # Change both of the following
  for i in range(num_absolute_features):
    absolute_feature_lists[i].append(np.nansum(np.array(absolute_features[i])))
  for i in range(num_relative_features):
    relative_feature_lists[i].append(np.nansum(np.array(relative_features[i])))
  
for i in range(num_absolute_features):
  df_stations[new_absolute_names[i]] = absolute_feature_lists[i]
for i in range(num_relative_features):
  df_stations[new_relative_names[i]] = relative_feature_lists[i]

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))




In [None]:
print(df_stations.iloc[0])

Station                                                                 AUI-10372
Address                         2316 W Martin Luther King Jr Blvd, Los Angeles...
lat                                                                       34.0107
lon                                                                      -118.319
zipcode                                                                     90008
                                                      ...                        
commute_time_1_km                                                         35.7496
median_household_income_1_km                                              48473.6
average_household_size_1_km                                               2.78012
population_density_1_km                                                   13514.8
median_house_value_1_km                                                    488011
Name: 0, Length: 77, dtype: object


## Check if Conversion Worked Properly

In [None]:
import numpy as np

In [None]:
def math_checker(sz, feature):
  rand_ind = np.random.randint(178, size=sz)
  for idx in rand_ind:
    print("Now testing index ", idx)
    for tuples in df_stations['tract_hits'][idx]:
      print("\tcensus tract: ", tuples[0])
      print("\t\ttract overlap: ", tuples[1])
      print("\t\tcircle overlap: ", tuples[2])

In [None]:
math_checker(6, 0)

Now testing index  173
	census tract:  6037125200.0
		tract overlap:  0.1567369665395423
		circle overlap:  0.06489512193870381
	census tract:  6037125310.0
		tract overlap:  0.03484756145064455
		circle overlap:  0.011700453155539291
	census tract:  6037125320.0
		tract overlap:  0.8035604940594363
		circle overlap:  0.16259436690371018
	census tract:  6037125401.0
		tract overlap:  1.0
		circle overlap:  0.12687875879113852
	census tract:  6037125402.0
		tract overlap:  1.0
		circle overlap:  0.22102369167655228
	census tract:  6037125501.0
		tract overlap:  0.9966151417166174
		circle overlap:  0.1474900026997755
	census tract:  6037125502.0
		tract overlap:  0.9299186247289639
		circle overlap:  0.1560648065810027
	census tract:  6037125600.0
		tract overlap:  0.06540411229529008
		circle overlap:  0.021298520285435354
	census tract:  6037143100.0
		tract overlap:  0.01157632262205033
		circle overlap:  0.006579570411134845
	census tract:  6037143200.0
		tract overlap:  0.168926447

## Convert Zipcode Hits to Features


In [None]:
ev_cars_url = 'https://data.ca.gov/dataset/15179472-adeb-4df6-920a-20640d02b08c/resource/d304108a-06c1-462f-a144-981dd0109900/download/vehicle-fuel-type-count-by-zip-code.csv'

df_cars_raw = pd.read_csv(ev_cars_url, low_memory=False)

ev_types = ['Battery Electric', 'Plug-in Hybrid']
df_cars_almost = df_cars_raw[df_cars_raw.Fuel.isin(ev_types)]
df_cars = df_cars_almost.groupby('Zip Code')['Vehicles'].sum().drop(labels=['OOS', 'Other']).reset_index(name='Vehicles')

In [None]:
print(df_cars.isna().sum())

Zip Code    0
Vehicles    0
dtype: int64


In [None]:
df_cars['Zip Code'] = [int(x) for x in df_cars['Zip Code']]
zips_List = np.array(df_cars['Zip Code'])
ev_car_count = []

for index, row in tqdm(df_stations.iterrows(), total=df_stations.shape[0]):
  zips_hits = pd.Series(row['zip_hits'])
  evc = []
  for ind, val in zips_hits.iteritems():
    f_result = np.where(zips_List == val[0])
    found = True
    try:
      f_idx = f_result[0][0]
    except:
      found = False
    i = 0
    while (i < val[1]) and found:
      evc.append(val[1] * df_cars['Vehicles'][f_idx])
      i += 1
  ev_car_count.append(np.nansum(np.array(evc)))
df_stations['ev_car_count_' + str(radius_km) + '_km'] = ev_car_count 

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))




## Add Labels and Download

In [None]:
labels_url = 'https://raw.githubusercontent.com/amberguo/ev-charger/main/Flo_Parks_Utilization_Report_addresses.csv?raw=true'

In [None]:
labels_raw = pd.read_csv(labels_url, encoding = "ISO-8859-1")

labels_cols = ['Station', 'Total Sessions', 'Total Connection Time', 
               'Total Amount', 'Total kWh']

features = df_stations.copy()
labels = labels_raw[labels_cols]

# clean up station id
labels['Station'] = labels['Station'].str.split("\"").str[1]
labels['total_sessions'] = labels['Total Sessions']
labels['total_connection_time'] = labels['Total Connection Time']
labels['total_amount'] = labels['Total Amount']
labels['total_kwh'] = labels['Total kWh']

cols_to_drop = ['Total Sessions', 'Total Connection Time', 'Total Amount', 'Total kWh']
labels.drop(columns=cols_to_drop, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
df = pd.concat([features, labels], axis = 1)
cols_to_drop = ['polygon', 'tract_hits', 'zip_hits']
df.drop(columns=cols_to_drop, inplace=True)

In [None]:
from google.colab import files
df.to_csv('stations_with_geoanalyzed_features_and_outcomes_1_km.csv') 
files.download('stations_with_geoanalyzed_features_and_outcomes_1_km.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>