![title](img/tb_23557_westheimer.png)

In [1]:
import pandas as pd
import requests
import re
import time
from bs4 import BeautifulSoup as bs
from pprint import pprint

In [2]:
display(HTML("<style>.container { width:100% !important; }</style>"))
css = open('../style-table.css').read() + open('../style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

pd.set_option('display.max_colwidth', 200)


# Visit main page to get store list with urls

In [3]:
#url = 'https://locations.tacobell.com/tx/katy.html'
url = 'https://locations.tacobell.com/ca/bakersfield.html'
response = requests.get(url)

response.status_code

200

In [4]:
page = response.text

soup = bs(page)

In [5]:
url_prefix = 'https://locations.tacobell.com'

In [6]:
store_urls = [p['href'].replace(r'..', url_prefix) for p in soup.find_all('a', text = 'View Page')]
store_urls

['https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html',
 'https://locations.tacobell.com/ca/bakersfield/1301-allen-rd-.html',
 'https://locations.tacobell.com/ca/bakersfield/139-s-oswell-st.html',
 'https://locations.tacobell.com/ca/bakersfield/15-south-h-street.html',
 'https://locations.tacobell.com/ca/bakersfield/1877-white-lane.html',
 'https://locations.tacobell.com/ca/bakersfield/2300-h-street.html',
 'https://locations.tacobell.com/ca/bakersfield/2433-n--chester-ave-.html',
 'https://locations.tacobell.com/ca/bakersfield/2676-mt-vernon-avenue.html',
 'https://locations.tacobell.com/ca/bakersfield/3200-california-avenue.html',
 'https://locations.tacobell.com/ca/bakersfield/3300-buena-vista-rd.html',
 'https://locations.tacobell.com/ca/bakersfield/3300-panama-lane.html',
 'https://locations.tacobell.com/ca/bakersfield/3315-union-ave.html',
 'https://locations.tacobell.com/ca/bakersfield/3707-coffee-rd-.html',
 'https://locations.tacobell.com/ca/bakersfield/3799-r

# Define function to get store hours and features (address, phone, does it offer delivery or breakfast)

In [7]:
def get_store_hour(soup):
    dow_pattern = re.compile(r'^([A-Z][a-z]+)')
    opn_pattern = re.compile(r'(\d.+M)\ ')
    cld_pattern = re.compile(r'-\ (\d.+M)')

    texts = ['Drive-Thru Hours', 'Dine-In Hours']
    tags = ['dt', 'fc']

    dflist = []

    for text, tag in zip(texts, tags):
        mytable = soup.find(class_ = 'c-hours-title', text = text)
        hours = [p.text for p in mytable.next.next.find_all('tr')[1:]]

        dow = [dow_pattern.search(i).group(1) for i in hours]
        opn = [opn_pattern.search(i).group(1) if opn_pattern.search(i) else 'closed' for i in hours]
        cld = [cld_pattern.search(i).group(1) if cld_pattern.search(i) else 'closed' for i in hours]

        dflist.append(pd.DataFrame({tag + '_open' : opn, 
                                    tag + '_closed' : cld},
                                  index = dow))

    return pd.concat(dflist, axis = 1)    

In [8]:
def get_store_feature(soup):
    store_feature = set(p.find('img')['alt'] for p in soup.find_all(class_ = 'Core-icon-wrapper'))

    open_late_night = sum('open late night' in p for p in store_feature)
    delivery = sum('delivery' in p for p in store_feature)
    drive_through = sum('drive thru' in p for p in store_feature)
    breakfast = sum('breakfast' in p for p in store_feature)
    
    street_address = soup.find('span', class_ = 'c-address-street-1').text
    city = soup.find('span', class_ = 'c-address-city').text
    state = soup.find('abbr', class_ = 'c-address-state').text
    zipcode = soup.find('span', class_ = 'c-address-postal-code').text
    phone = soup.find('div', class_ = 'Core-phones').find('div', class_ = 'Phone-display').text

    store_info = {
        'address' : street_address + ', ' + city + ', ' + state + ' ' + zipcode,
        'phone' : phone,
        'open_late_night' : open_late_night,
        'delivery' : delivery,
        'drive_through' : drive_through,
        'breakfast' : breakfast,
    }    
    
    return store_info

# Visit each store to get information

In [9]:
store_features = []
store_hour = []

for url in store_urls[:]:
    time.sleep(0.5)
    response = requests.get(url)
    page = response.text
    
    soup = bs(page)
    
    feature_df = get_store_feature(soup)
    feature_df['url'] = url
    store_features.append(feature_df)
    
    hourdf = get_store_hour(soup)
    hourdf['url'] = url
    store_hour.append(hourdf)
    

In [10]:
df_store_feat = pd.DataFrame(store_features)
df_store_feat

Unnamed: 0,address,phone,open_late_night,delivery,drive_through,breakfast,url
0,"11108 Olive Drive, Bakersfield, CA 93312",(661) 588-2939,0,1,1,1,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
1,"1301 Allen Rd., Bakersfield, CA 93314",(661) 589-1018,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/1301-allen-rd-.html
2,"139 S Oswell St, Bakersfield, CA 93307",(661) 363-7455,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/139-s-oswell-st.html
3,"15 South H Street, Bakersfield, CA 93304",(661) 637-1032,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/15-south-h-street.html
4,"1877 White Lane, Bakersfield, CA 93304",(661) 836-3897,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/1877-white-lane.html
5,"2300 H Street, Bakersfield, CA 93301",(661) 326-1366,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/2300-h-street.html
6,"2433 N. Chester Ave., Bakersfield, CA 93308",(661) 399-0915,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/2433-n--chester-ave-.html
7,"2676 Mt Vernon Avenue, Bakersfield, CA 93306",(661) 871-1065,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/2676-mt-vernon-avenue.html
8,"3200 California Avenue, Bakersfield, CA 93304",(661) 322-1835,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/3200-california-avenue.html
9,"3300 Buena Vista Rd, Bakersfield, CA 93311",(661) 663-8131,1,1,1,1,https://locations.tacobell.com/ca/bakersfield/3300-buena-vista-rd.html


In [12]:
df_hour = pd.concat(store_hour)
df_hour.head(14)

Unnamed: 0,dt_open,dt_closed,fc_open,fc_closed,url
Mon,7:00 AM,1:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Tue,7:00 AM,1:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Wed,7:00 AM,1:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Thu,7:00 AM,1:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Fri,7:00 AM,2:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Sat,7:00 AM,2:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Sun,7:00 AM,1:00 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/11108-olive-drive.html
Mon,7:00 AM,2:30 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/1301-allen-rd-.html
Tue,7:00 AM,2:30 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/1301-allen-rd-.html
Wed,7:00 AM,2:30 AM,7:00 AM,10:00 PM,https://locations.tacobell.com/ca/bakersfield/1301-allen-rd-.html
