Geospatial Analysis:

The objective of this tutorial is to use geospatial analysis to make decisions about opening up new restaurants(or retail stores, bank branches, airports, etc). For our analysis, we use a dataset consisting of Starbucks and McDonalds store coordinates in New York.

Our tutorial consists of the following steps:


1.   Compute the distance of the stores
2.   Visualize data on a map
3.   Compute number of stores located in a given radius
4.   Compute the closest and farthest stores from Empire State Building



In [9]:
import pandas as pd
import numpy as np
import folium
import geopy.distance
import geocoder

In [25]:
# Locate the address
def locator(address):
  add = geocoder.osm(address)
  return [add.lat, add.lng]

# define the two locations and get their lat and long
L1 = locator('Aurangabad, India')
L2 = locator('Kollam, India')

# find the distance
geopy.distance.distance((L1[0],L1[1]),(L2[0],L2[1])).km

1223.4220980603232

In [5]:
loc = "Chennai"
print("\nLocation address:",loc)
location = geolocator.geocode(loc)
print("Latitude and Longitude of the said address:")
print((location.latitude, location.longitude))
print(geopy.distance.distance((l1[0],l1[1]), (location.latitude, location.longitude)).km)

Location address: Loni

Location address: Chennai
Latitude and Longitude of the said address:
(13.0836939, 80.270186)
943.94777019174


In [None]:
for x in ["shivnagar","Papum Pare-Pithapur","Dewas-Chittor"]:
  l1,l2 = x.split("-")
  l1 = geolocator.geocode(l1)
  l2 = geolocator.geocode(l2)
  print(geopy.distance.distance((l1.latitude, l1.longitude), (l2.latitude, l2.longitude)).km)

ValueError: ignored

In [None]:
l1

In [None]:
geopy.distance.distance((l1.latitude, l1.longitude), (location.latitude, location.longitude)).km

898.1167301426894

In [None]:
ladd1 = "Kozhikode"
print("Location address:",ladd1)
l1 = geolocator.geocode(ladd1)
print("Latitude and Longitude of the said address:")
print((l1.latitude, l1.longitude))

ladd2 = "Ahmednagar"
print("\nLocation address:",ladd2)
location = geolocator.geocode(ladd2)
print("Latitude and Longitude of the said address:")
print((location.latitude, location.longitude))



Location address: Kozhikode
Latitude and Longitude of the said address:
(11.2450558, 75.7754716)

Location address: Ahmednagar
Latitude and Longitude of the said address:
(19.162772500000003, 74.85802430085195)


In [None]:
import geopy.distance

geopy.distance.distance((l1.latitude, l1.longitude), (location.latitude, location.longitude)).km

881.6306067583353

In [None]:
def distance(a,b):
  l1 = geolocator.geocode(a)
  location = geolocator.geocode(b)
  return geopy.distance.distance((l1.latitude, l1.longitude), (location.latitude, location.longitude)).km


In [None]:
distance("Bhopal","Cachar")

In [None]:
distance("Coimbatore","Cachar")

In [None]:
distance("Khammam","Chandigarh")

1509.3306732976478

In [None]:
distance("Tiruvannamalai","Cachar")

In [None]:
#Read file containing location details
df = pd.read_csv('https://drive.google.com/uc?id=19JERBe0ndadkMPKDFZ0rTEZqdI50zDfk')
df.head(10)

In [None]:
#Merge Columns
df['Coordinate'] = '('+df['lat'].astype(str) + ', ' + df['lng'].astype(str)+')'
df.head(10)

In [None]:
#New York Empire State Building Lat Long (Looking for any approx center point)
NY_coord = (40.748488, -73.985238)

In [None]:
ass=df.itertuples
type(ass)

In [None]:
df.columns

In [None]:
for sd in df.itertuples(index=True):
  print(len(sd))
  print(type(sd))
  d=geopy.distance.distance(NY_coord, sd.Coordinate)
  dff=geopy.distance.distance(NY_coord, sd.Coordinate).km
  print(d,dff)
  print(type(dff))

  break

**1. Compute the distance of the stores**

In [None]:
#Compute distance of every store from city center
distances_km = []

for row in df.itertuples(index=False):
   s=row
   distances_km.append(
       geopy.distance.distance(NY_coord, row.Coordinate).km
   )

df['Distance'] = distances_km
df.head(10)

In [None]:
for i, row in df.iterrows():
  lat = df.at[i, 'lat']
  print(lat)
  lng = df.at[i, 'lng']
  print(lng)
  store = df.at[i, 'store']
  print(store)
  print(i)
  print()
  print(type(row))
  print()
  print()
  print(row)
  
  break


In [None]:
s.Coordinate

In [None]:
NY_coord

In [None]:
nn = folium.Map(location=NY_coord, zoom_start= 15)
type(nn)

In [None]:
j=folium.Marker(location=[lat,lng], popup=store, icon= folium.Icon(color="color")).add_to(nn)
type(j)

**2.Visualize data on a map**

In [None]:
#Empire State Building coordinates
m = folium.Map(location=NY_coord, zoom_start= 15)

#Place markers for the stores on the map
for i,row in df.iterrows():
  lat = df.at[i, 'lat']
  lng = df.at[i, 'lng']
  store = df.at[i, 'store']

  if store == 'McDonalds':
    color = 'red'
  else:
    color = 'green'

  folium.Marker(location=[lat,lng], popup=store, icon= folium.Icon(color=color)).add_to(m)

m

**3. Compute number of stores located in a given radius**

In [None]:
#All stores at a distance greater/less than x kms
df[df['Distance'] > 10]

**4. Compute the closest and farthest stores from Empire State Building**



In [None]:
df_farthest = df.iloc[df.groupby('store')['Distance'].agg(pd.Series.idxmax)]
df_closest = df.iloc[df.groupby('store')['Distance'].agg(pd.Series.idxmin)]
df_new = pd.concat([df_farthest, df_closest])

In [None]:
df_farthest

In [None]:
ds=df.groupby('store')['Distance'].agg(pd.Series.idxmax)
ds.index[:]

In [None]:
df_new

In [None]:
df_new = pd.read_csv('/content/clean_airbnb.csv')
df_new.head()

In [None]:
m = folium.Map(location=[40.748488, -73.985238], zoom_start= 10)
for i, rown in df_new.iterrows():
  lat = df_new.at[i, 'LAT']
  lng = df_new.at[i, 'LONG']
  place = df_new.at[i, 'neighbourhood group']
  folium.Marker(location=[lat,lng], popup=place, icon= folium.Icon(color=color)).add_to(m)
m

# Coffee king

In [None]:
!pip install geopandas

In [None]:
data = requests.get("https://www.mcdonalds.com/googleappsv2/geolocation?latitude=40.7830603&longitude=-73.9712488&radius=8.045&maxResults=30&country=us&language=en-us").json()

In [None]:
import numpy as np
import pandas as pd
import os
import geopandas
import requests
import pyproj

def mcdonalds():
    data = requests.get("https://www.mcdonalds.com/googleappsv2/geolocation?latitude=40.7830603&longitude=-73.9712488&radius=8.045&maxResults=30&country=us&language=en-us").json()
    
    result = []
    for row in data['features']:
        geom,prop = row['geometry'], row['properties']
        result.append({
            'store': 'McDonalds',
            'lat': geom['coordinates'][1],
            'lng': geom['coordinates'][0],
            'address': prop['addressLine1'],
        })

    return pd.DataFrame(result)


def starbucks():

    
    #Return a DataFrame with Manhattan Starbucks (store, address, lat, lng)I went to https://www.starbucks.com/store-locator and moved the map around to get center points that seem to cover Manhattan.
    
    
    map_points = (
      [40.709353, -74.002588],
      [40.72899, -73.982323],
      [40.741837, -74.008072],
      [40.741317, -73.972179],
      [40.761994, -73.959133],
      [40.763489, -73.989517],
      [40.763814, -74.01415],
      [40.781312, -73.98617],
      [40.773145, -73.960995],
      [40.79807, -73.977686],
      [40.786317, -73.928566],
      
    )

    result = []

    for lat, lng in map_points:
        url = f'https://www.starbucks.com/bff/locations?lat={lat}&lng={lng}'
        
        data = requests.get(url, headers={'x-requested-with': 'XMLHttpRequest'}).json()


        for row in data['stores']:
            result.append({
              'store': 'Starbucks',
              'lat': row['coordinates']['latitude'],
              'lng': row['coordinates']['longitude'],
              'address': ', '.join(row['addressLines'])
            })
    return pd.DataFrame(result).drop_duplicates()

def scrape_stores(target, overwrite=False):
    if not overwrite and os.path.exists(target):
        print(target, 'already exists. not scraping')
        return pd.read_excel(target)
    
    data = pd.concat([
        mcdonalds(),
        starbucks(),
    ])

    data.to_excel(target, index=False)
    return data

def manhatten_map():
    #load population and census shape file and adata 
    cols  =  ['BoroName','FIPSCode','BoroCode','CT2010','Pop2000','Pop2010','Change','ChangePC','Acres','Density']

    pop = pd.read_excel('/content/t_pl_p5_ct.xlsx', skiprows=range(7), header=None, names=cols, dtype=object)

    shape  = geopandas.read_file('/content/nyct2010.shp')

    # Filter for Manhattan 
    shape  =  shape.merge(pop.loc[:, ['CT2010', 'Pop2010']], on='CT2010')

    if not os.path.exists('map'):
        os.makedirs('map')

        # convert lat-long coordinate system and save 
    #shape  = CRS('EPSG:4326').to_proj4()
    crs=pyproj.CRS("epsg:4326")
    shape = shape.to_crs(crs)
    shape['Pop2010'] = shape['Pop2010'].astype(int)
    shape.to_file('map/population.shp')
    # save as a single blog for use by reshaper 

    shape.dissolve(by='BoroName')[['BoroCode', 'geometry']].to_file('map/single.shp')

def stores_map(stores, shape):
    geom = geopandas.points_from_xy(stores.lng,stores.lat)
    points = geopandas.GeoDataFrame(stores, geometry=geom)
    #points = points[points.intersects(shape.unary_union)]
    
    points.loc[:,['lat', 'lng','store','address']].to_csv('manhatton-stores.csv', index=False, encoding='utf8')
    points.to_file('map/points.shp')

if __name__ == '__main__':
    stores = scrape_stores('stores.xlsx', overwrite=False)
    shape = manhatten_map()
    stores_map(stores, shape)

In [None]:
from google.colab import drive
drive.mount('/content/drive')