In [18]:
# Basic Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import the config file
import config

# For tesxt:
import re

# For times:
import time

# Set a random seed for imputation
#  Source:  https://numpy.org/doc/stable/reference/random/generated/numpy.random.seed.html
np.random.seed(42)

# for Google Geocoding API
'''
Setting up the API key and getting started was enabled with help from the following sources:
* https://developers.google.com/maps/documentation/geocoding/?csw=1
* https://developers.google.com/maps/documentation/geocoding/overview
* https://developers.google.com/maps/get-started#api-key
* https://developers.google.com/maps/documentation/geocoding/get-api-key
* https://github.com/googlemaps/google-maps-services-python
'''
import googlemaps
from datetime import datetime

# Import the Beatiful Soup and Requests Packages for web scraping:
import requests
from bs4 import BeautifulSoup

# Find Proximity to Subway ('T') Stations

## Get a List of Sll the T Stations/Stops

Web scrape this entire site:  https://www.mbta.com/stops/subway .... to get the list of stations, then go to the urls for each to get the address and scrape the address which will get entered into the google geocoding request

In [89]:
# Store the base url from which all T-stop data will be retrieved
base_url = 'https://www.mbta.com/stops/subway'

# Connect to the web page:
res = requests.get(base_url)

# Pull the website text HTML string out
html = res.text

# Create that Beautiful Soup Instance!  Choosing 'lxml' as the parser.
soup = BeautifulSoup(html, 'lxml')

res.status_code

200

In [90]:
# From the site code, the urls for the stops are under the class specified below
a_list = soup.find_all('a', {'class': 'btn button stop-btn m-detailed-stop'})

# The href is then the url for each stop. However, it includes some of the text from 
#  the base url which must be removed while sending to a list.
stn_urls = [station['href'].replace('/stops', '') for station in a_list]
print(len(stn_urls))
stn_urls

132


['/place-alfcl',
 '/place-andrw',
 '/place-asmnl',
 '/place-brntn',
 '/place-brdwy',
 '/place-cntsq',
 '/place-chmnl',
 '/place-davis',
 '/place-dwnxg',
 '/place-fldcr',
 '/place-harsq',
 '/place-jfk',
 '/place-knncl',
 '/place-nqncy',
 '/place-pktrm',
 '/place-portr',
 '/place-qamnl',
 '/place-qnctr',
 '/place-shmnl',
 '/place-smmnl',
 '/place-sstat',
 '/place-wlsta',
 '/place-asmnl',
 '/place-butlr',
 '/place-capst',
 '/place-cedgr',
 '/place-cenav',
 '/place-matt',
 '/place-miltt',
 '/place-valrd',
 '/place-astao',
 '/place-bbsta',
 '/place-chncl',
 '/place-ccmnl',
 '/place-dwnxg',
 '/place-forhl',
 '/place-grnst',
 '/place-haecl',
 '/place-jaksn',
 '/place-mlmnl',
 '/place-masta',
 '/place-north',
 '/place-ogmnl',
 '/place-rcmnl',
 '/place-rugg',
 '/place-state',
 '/place-sbmnl',
 '/place-sull',
 '/place-tumnl',
 '/place-welln',
 '/place-alsgr',
 '/place-amory',
 '/place-armnl',
 '/place-babck',
 '/place-bckhl',
 '/place-balsq',
 '/place-bcnfd',
 '/place-bland',
 '/place-lake',
 '/

# Geocoding API

In [2]:
api_key = config.api_key;

In [6]:
# I think we need this url per the following source:
#  https://developers.google.com/maps/documentation/geocoding/get-api-key

# This is the url they give you for formatting:
    # 'https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY'
url = 'https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=' + api_key

In [8]:
# The code below was adapted from the following source:
    # https://github.com/googlemaps/google-maps-services-python

gmaps = googlemaps.Client(key=api_key)

# Geocoding an address
geocode_result = gmaps.geocode('Alewife Brook Pkwy and Cambridge Park Dr, Cambridge, MA 02140')

# Look up an address with reverse geocoding
reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

In [10]:
geocode_result

[{'address_components': [{'long_name': 'Alewife Brook Parkway & Cambridgepark Drive',
    'short_name': 'Alewife Brook Pkwy & Cambridgepark Dr',
    'types': ['intersection']},
   {'long_name': 'North Cambridge',
    'short_name': 'North Cambridge',
    'types': ['neighborhood', 'political']},
   {'long_name': 'Cambridge',
    'short_name': 'Cambridge',
    'types': ['locality', 'political']},
   {'long_name': 'Middlesex County',
    'short_name': 'Middlesex County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'Massachusetts',
    'short_name': 'MA',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '02140', 'short_name': '02140', 'types': ['postal_code']}],
  'formatted_address': 'Alewife Brook Pkwy & Cambridgepark Dr, Cambridge, MA 02140, USA',
  'geometry': {'location': {'lat': 42.3947455, 'lng': -71.14048149999999},
   'locati

In [17]:
geocode_result[0]['geometry']['location']

{'lat': 42.3947455, 'lng': -71.14048149999999}