# Geospatial Mapping
Mapping data using street names rather than coordinates.

In [1]:
import pandas as pd
import geopandas as gpd
import folium
from collections import defaultdict
import multiprocessing as mp
from numpy import array_split
from tqdm.notebook import tqdm
from geocoding_funcs import *

In [2]:
# Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm`
tqdm.pandas(desc="Progress Bar")

In [3]:
# Load the data
df = pd.read_csv('../usc_data/sc_loc2018.csv')

print(f"Number of rows: {df.shape[0]:,}")
df.head()

Number of rows: 142,406


Unnamed: 0,ano,tway,typ,day,rai,loa,odr,alc,wcc,ahc,...,srn,fhe,prc,tim,bdo,pnt,pat,rct,adid,dlr
0,18000001,1,L,6,0.0,1.0,S,1,1,1,...,6.0,22,2,948,1,948,952,2,NO,S
1,18000002,2,L,2,0.0,2.0,N,5,1,1,...,0.0,22,8,145,0,150,155,4,NO,N
2,18000003,3,L,2,0.0,3.0,S,5,1,2,...,0.0,19,16,150,50,151,156,3,YES,N
3,18000004,4,L,2,0.0,2.0,E,5,1,1,...,106.0,22,16,218,0,218,218,5,YES,E
4,18000005,1,L,2,0.0,2.0,S,2,3,1,...,118.0,25,7,715,2,716,725,5,NO,N


<br>

# Data Geocoding
Geocoding is the process of converting addresses into geographic coordinates. 
<br>Since, we are not sure of the location of the data in the dataset, we can use geocoding to map the data. 
<br>Geocoding can be done using the street names in the dataset. The following columns are used for the geocoding:
<br>
<br>`als`: On Route Street Name
<br>`alsb`: Base Intersection Street Name
<br>
<br> While `alss` also contains strings that are similar to street identifiers, we are actually not sure what this is.

In [4]:
# Split the dataframe into chunks
num_processes = mp.cpu_count()
print(f"Number of processes: {num_processes}")

chunks = array_split(df, num_processes)

Number of processes: 8


  return bound(*args, **kwds)


In [5]:
# Use multiprocessing to geocode in parallel
with mp.Pool(num_processes) as pool:
    # Wrap pool.imap with tqdm to show progress
    all_results = list(tqdm(pool.imap(process_chunk, chunks), total=len(chunks)))

  0%|          | 0/8 [00:00<?, ?it/s]

No results found for address: HARRY C RAYSOR DR, BRIDGES, South Carolina, USA
No results found for address: MEETING STREET, LINE STREET, SHEPPARD STREET, South Carolina, USA
No results found for address: SAM RITTENBERG BLVD, POSTON RD, South Carolina, USA
No results found for address: LINE STREET, COMING STREET, SAINT PHILIP STREET, South Carolina, USA
No results found for address: ELIZABETH STREET, CHARLOTTE STREET, JOHN ST, South Carolina, USA
No results found for address: HANOVER STREET, LEE STREET, JACKSON STREET, South Carolina, USA
No results found for address: GLENN MCCONELL PKWY, CHARLIE HALL BLVD, MAGWOOD DR, South Carolina, USA
No results found for address: ASHLEY HALL RD, S WOODMERE DR, ORANGE GROVE RD, South Carolina, USA
No results found for address: CLEMENTS FERRY RD, CHARLESTON REGIONAL PKWY, JACK PRIMUS RD, South Carolina, USA
No results found for address: MAIN RD, OLD CHARLESTON RD, MCLERNON TRC, South Carolina, USA
No results found for address: MEETING STREET, COLUMBU

Process SpawnPoolWorker-8:
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-6:
Process SpawnPoolWorker-2:
Process SpawnPoolWorker-1:
Process SpawnPoolWorker-7:
Process SpawnPoolWorker-5:
Process SpawnPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/nitingupta/miniconda3/envs/alive25/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/nitingupta/miniconda3/envs/alive25/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/nitingupta/miniconda3/envs/alive25/lib/python3.11/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
                    ^^^^^^^^^^^^^^^^^^^
  File "/Users/nitingu

KeyboardInterrupt: 

In [9]:
l = df.apply(lambda row: get_address(row), axis=1).to_list()

In [10]:
'HUGER ST, BLOSSOM ST, DEVINE ST, South Carolina, USA' in l

True

In [13]:
df[(df['als'] == 'HUGER ST') & (df['alsb'] == 'BLOSSOM ST') & (df['alss'] == 'DEVINE ST')]

Unnamed: 0,ano,tway,typ,day,rai,loa,odr,alc,wcc,ahc,...,srn,fhe,prc,tim,bdo,pnt,pat,rct,adid,dlr
11174,18503239,2,L,5,0.0,1.0,S,5,2,1,...,,22,9,2100,1,2120,2125,2,NO,N
12554,18505110,2,L,3,0.0,2.0,N,1,1,1,...,,22,3,1130,1,1150,1157,2,NO,S
38021,18540196,2,L,2,0.0,1.0,N,1,1,1,...,,20,60,1145,2,1230,1240,2,NO,N
43884,18548975,2,L,2,0.0,2.0,N,1,3,2,...,,23,3,1114,1,1116,1202,2,NO,S
47787,18554717,2,L,5,0.0,1.0,N,1,3,1,...,,23,2,1600,1,1601,1608,2,NO,S
48875,18556409,2,L,4,0.0,4.0,N,1,1,2,...,,23,3,705,2,706,710,2,NO,S
62525,18576694,1,L,3,0.0,3.0,N,1,1,1,...,,22,9,1420,0,1430,1440,2,NO,S
86410,18612535,2,L,5,0.0,2.0,S,1,1,2,...,,22,18,1555,5,1557,1605,2,NO,N
126402,18669765,3,L,7,0.0,2.0,S,1,2,1,...,,12,3,930,2,931,950,2,NO,S


In [None]:
# Flatten results and update dataframe
results_dict = defaultdict(lambda: [None, None])
for chunk_result in all_results:
    for idx, lat, lon in chunk_result:
        results_dict[idx] = [lat, lon]

In [None]:
df['latitude'] = df.index.map(lambda idx: results_dict[idx][0])
df['longitude'] = df.index.map(lambda idx: results_dict[idx][1])

In [None]:
# Remove rows with failed geocoding
df = df.dropna(subset=['latitude', 'longitude'])

<br>

## Mapping

In [None]:
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

In [None]:
# Create a map centered on the mean point
center = gdf.geometry.centroid.mean()
m = folium.Map(location=[center.y, center.x], zoom_start=10)

In [None]:
# Add points to the map
for idx, row in gdf.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"ALS: {row['als']}, ALSB: {row['alsb']}, ALSS: {row['alss']}",
    ).add_to(m)

In [None]:
file_name: str = "street_locations_map.html" 

m.save(file_name)
print(f"Map saved as {file_name}")