## Sheet Overview
This sheet scrapes the geographic.org website for all the street names in Boston and stores them in a list

### Necessary Imports

In [118]:
import requests
from bs4 import BeautifulSoup
import time
import numpy as np

### Set the url for scraping

In [1]:
scrape_url = 'https://geographic.org/streetview/usa/ma/suffolk/boston.html'
scrape_url

'https://geographic.org/streetview/usa/ma/suffolk/boston.html'

### Request street names from url and check status

In [120]:
res = requests.get(scrape_url)
res.status_code

200

### Use BeautifulSoup to parse the content of the Boston Street name web page

In [121]:
soup = BeautifulSoup(res.content, 'lxml')
soup

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<meta content="Boston, Suffolk, Massachusetts, United States, maps, List of Streets, Street View, Geographic.org" name="Description"/>
<meta content="Boston, Suffolk, Massachusetts, List of Streets, maps, United States, Street View, Geographic.org" name="keywords"/>
<title>List of Streets in Boston, Suffolk, Massachusetts, United States, Google Maps and Photos Streetview, United States, List of Streets, Google Street View, Geographic.org</title>
<!--Taboola Head Section-->
<script type="text/javascript">
        window._taboola = window._taboola || [];
        _taboola.push({article:'auto'});
        !function (e, f, u) {
            e.async = 1;
            e.src = u;
            f.parentNode.insertBefore(e, f);
        }(document.createElement('script'),
            document.getElementsByTagName('script')[0],
            '//cdn.taboola.com/libtrc/fotios-geographicorg/loader.js');
    </script>
<!--End Taboola Head Section-->
<meta

### Isolate the street names table using soup.find

In [122]:
street_name_table = soup.find('ul')
street_name_table

<ul>
<li><a alt="3rd Place" href="../../../view.php?place=3rd Place, Boston, Suffolk, Massachusetts, 2127, United States">3rd Place</a>   2127</li>
<li><a alt="4th Street Place" href="../../../view.php?place=4th Street Place, Boston, Suffolk, Massachusetts, 2127, United States">4th Street Place</a>   2127</li>
<li><a alt="A Street" href="../../../view.php?place=A Street, Boston, Suffolk, Massachusetts, 2210, United States">A Street</a>   2210</li>
<li><a alt="A Street" href="../../../view.php?place=A Street, Boston, Suffolk, Massachusetts, 2127, United States">A Street</a>   2127</li>
<li><a alt="Aberdeen Street" href="../../../view.php?place=Aberdeen Street, Boston, Suffolk, Massachusetts, 2215, United States">Aberdeen Street</a>   2215</li>
<li><a alt="Acadia Street" href="../../../view.php?place=Acadia Street, Boston, Suffolk, Massachusetts, 2127, United States">Acadia Street</a>   2127</li>
<li><a alt="Access Road" href="../../../view.php?place=Access Road, Boston, Suffolk, Massach

### Create a list of street names by extratcating them individually from the street name table

In [123]:
boston_streets = []

for li in street_name_table.find_all('li'):
    boston_streets.append(li.text)

In [124]:
boston_streets[0:10]

['3rd Place \xa0 2127',
 '4th Street Place \xa0 2127',
 'A Street \xa0 2210',
 'A Street \xa0 2127',
 'Aberdeen Street \xa0 2215',
 'Acadia Street \xa0 2127',
 'Access Road \xa0 2128',
 'Accolyn Way \xa0 2114',
 'Acorn Street \xa0 2108',
 'Adams Place \xa0 2127']

### Clean the street names by stripping the uneccsary info and only selecting the firest word of each street


In [125]:
# Split street name into individual pieces
boston_streets =[streets.split(" ") for streets in boston_streets]
boston_streets[:10]

[['3rd', 'Place', '\xa0', '2127'],
 ['4th', 'Street', 'Place', '\xa0', '2127'],
 ['A', 'Street', '\xa0', '2210'],
 ['A', 'Street', '\xa0', '2127'],
 ['Aberdeen', 'Street', '\xa0', '2215'],
 ['Acadia', 'Street', '\xa0', '2127'],
 ['Access', 'Road', '\xa0', '2128'],
 ['Accolyn', 'Way', '\xa0', '2114'],
 ['Acorn', 'Street', '\xa0', '2108'],
 ['Adams', 'Place', '\xa0', '2127']]

In [126]:
# Select the first word of each street name
boston_streets =[streets[0] for streets in boston_streets]

# Remove all street names with name length less than 3 characters
boston_streets =[streets for streets in boston_streets if len(streets)>=3]
boston_streets[:10]

['3rd',
 '4th',
 'Aberdeen',
 'Acadia',
 'Access',
 'Accolyn',
 'Acorn',
 'Adams',
 'Adams',
 'Addison']

In [127]:
%store boston_streets

Stored 'boston_streets' (list)


Above code adapted from Grant Wilson of San Francisco cohort