# Get CoreLife Locations & Geocode

## Import Libraries

In [1]:
import pandas as pd
import requests
import geopandas as gpd
import shapefile
from shapely.geometry import Point
import numpy as np
import re

## Import Turn_URL_into_Soup function

In [2]:
%run "U:/Projects/Common_Functions/Turn_URL_into_Soup.ipynb"



## Import Geocode_Address function

In [3]:
%run "U:/Projects/Common_Functions/Geocode_Address.ipynb"



## Main page with CoreLife Locations

In [4]:
page = 'https://www.corelifeeatery.com/locations/'

## Select City Entries from Soup

In [5]:
soup = Turn_URL_into_Soup(page, javascript=False)

locations = soup.select('.entry-content')

## Process each Entry and extract Address Info

In [6]:
%%time

data = []

for location in locations:
    
    info = dict()
    
    info['city'] = location.a.string

    try:
        
        address = location.p.next.next.next.string.strip()
        m = re.match('(.*),\s(\w{2})[,\s]+(\d{5})', address)
        info['street'] = location.p.next.string.strip()
        info['city'], info['state'], info['zipcode'] = m.group(1), m.group(2), m.group(3)
    
    except:

        address = location.p.next.next.next.next.next.next.next.string.strip()
        m = re.match('(.*),\s(\w{2})[,\s]+(\d{5})', address)
        info['street'] = location.p.next.next.next.next.next.string.strip()
        info['city'], info['state'], info['zipcode'] = m.group(1), m.group(2), m.group(3)
    
    full_address = f"{info['street']}, {info['city']}, {info['state']} {info['zipcode']}"
    info['latitude'], info['longitude'] = Geocode_Address(full_address, wait=1)
    
    data.append(info)

Wall time: 1min 18s


## Convert to Pandas DataFrame

In [7]:
df = pd.DataFrame(data)
df['type'] = 'CoreLife'

In [8]:
df.head()

Unnamed: 0,city,street,state,zipcode,latitude,longitude,type
0,Allentown,"833 North Krocks Rd, Suite 101",PA,18106,40.563454,-75.562741,CoreLife
1,American Fork,197 NW State Street,UT,84003,40.381365,-111.813989,CoreLife
2,Amherst,1595 Niagara Falls Boulevard,NY,14226,43.000019,-78.822306,CoreLife
3,Ann Arbor,"205 North Maple Road, Suite 26",MI,48103,42.282659,-83.780447,CoreLife
4,Boardman,700 Boardman-Poland Road,OH,44512,41.02461,-80.64279,CoreLife


## Save GeoPandas DataFrame

In [9]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
gdf = gdf.reset_index().drop(columns=['index', 'latitude', 'longitude'])
gdf.to_file(driver = 'ESRI Shapefile', filename = 'CoreLife_Coordinates.shp')