![title](img/tb_23557_westheimer.png)

In [1]:
import pandas as pd
import requests
import re
import time
from bs4 import BeautifulSoup as bs
from pprint import pprint

In [2]:
display(HTML("<style>.container { width:100% !important; }</style>"))
css = open('../style-table.css').read() + open('../style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

pd.set_option('display.max_colwidth', 200)


# Visit main page to get store list with urls

In [3]:
url = 'https://locations.tacobell.com/tx/katy.html'
#url = 'https://locations.tacobell.com/ca/bakersfield.html'
response = requests.get(url)

response.status_code

200

In [4]:
page = response.text

soup = bs(page)

In [5]:
url_prefix = 'https://locations.tacobell.com'

In [6]:
store_urls = [p['href'].replace(r'..', url_prefix) for p in soup.find_all('a', text = 'View Page')]
store_urls

['https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html',
 'https://locations.tacobell.com/tx/katy/23557-westheimer-pkwy-.html',
 'https://locations.tacobell.com/tx/katy/25109-market-place-drive.html',
 'https://locations.tacobell.com/tx/katy/601-s-mason-rd.html',
 'https://locations.tacobell.com/tx/katy/6117-n--fry-road.html']

# Define function to get store hours and features (address, phone, does it offer delivery or breakfast)

In [7]:
def get_store_hour(soup):
    dow_pattern = re.compile(r'^([A-Z][a-z]+)')
    opn_pattern = re.compile(r'(\d.+M)\ ')
    cld_pattern = re.compile(r'-\ (\d.+M)')

    texts = ['Drive-Thru Hours', 'Dine-In Hours']
    tags = ['dt', 'fc']

    dflist = []

    for text, tag in zip(texts, tags):
        mytable = soup.find(class_ = 'c-hours-title', text = text)
        hours = [p.text for p in mytable.next.next.find_all('tr')[1:]]

        dow = [dow_pattern.search(i).group(1) for i in hours]
        opn = [opn_pattern.search(i).group(1) if opn_pattern.search(i) else 'closed' for i in hours]
        cld = [cld_pattern.search(i).group(1) if cld_pattern.search(i) else 'closed' for i in hours]

        dflist.append(pd.DataFrame({tag + '_open' : opn, 
                                    tag + '_closed' : cld},
                                  index = dow))

    return pd.concat(dflist, axis = 1)    

In [8]:
def get_store_feature(soup):
    store_feature = set(p.find('img')['alt'] for p in soup.find_all(class_ = 'Core-icon-wrapper'))

    open_late_night = sum('open late night' in p for p in store_feature)
    delivery = sum('delivery' in p for p in store_feature)
    drive_through = sum('drive thru' in p for p in store_feature)
    breakfast = sum('breakfast' in p for p in store_feature)
    
    street_address = soup.find('span', class_ = 'c-address-street-1').text
    city = soup.find('span', class_ = 'c-address-city').text
    state = soup.find('abbr', class_ = 'c-address-state').text
    zipcode = soup.find('span', class_ = 'c-address-postal-code').text
    phone = soup.find('div', class_ = 'Core-phones').find('div', class_ = 'Phone-display').text

    store_info = {
        'address' : street_address + ', ' + city + ', ' + state + ' ' + zipcode,
        'phone' : phone,
        'open_late_night' : open_late_night,
        'delivery' : delivery,
        'drive_through' : drive_through,
        'breakfast' : breakfast,
    }    
    
    return store_info

# Visit each store to get information

In [9]:
store_features = []
store_hour = []

for url in store_urls[:]:
    time.sleep(0.5)
    response = requests.get(url)
    page = response.text
    
    soup = bs(page)
    
    feature_df = get_store_feature(soup)
    feature_df['url'] = url
    store_features.append(feature_df)
    
    hourdf = get_store_hour(soup)
    hourdf['url'] = url
    store_hour.append(hourdf)
    

In [10]:
df_store_feat = pd.DataFrame(store_features)
df_store_feat

Unnamed: 0,address,phone,open_late_night,delivery,drive_through,breakfast,url
0,"22642 Morton Ranch Road, Katy, TX 77449",(281) 347-3309,1,1,1,0,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
1,"23557 Westheimer Pkwy., Katy, TX 77494",(281) 391-3504,1,1,1,1,https://locations.tacobell.com/tx/katy/23557-westheimer-pkwy-.html
2,"25109 Market Place Drive, Katy, TX 77494",(281) 644-2599,1,1,1,0,https://locations.tacobell.com/tx/katy/25109-market-place-drive.html
3,"601 S Mason Rd, Katy, TX 77450",(281) 578-1342,1,1,1,1,https://locations.tacobell.com/tx/katy/601-s-mason-rd.html
4,"6117 N. Fry Road, Katy, TX 77449",(281) 858-5024,1,1,1,0,https://locations.tacobell.com/tx/katy/6117-n--fry-road.html


In [12]:
df_hour = pd.concat(store_hour)
df_hour

Unnamed: 0,dt_open,dt_closed,fc_open,fc_closed,url
Mon,9:00 AM,2:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Tue,9:00 AM,2:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Wed,9:00 AM,2:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Thu,9:00 AM,2:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Fri,9:00 AM,3:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Sat,9:00 AM,3:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Sun,9:00 AM,2:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/22642-morton-ranch-road.html
Mon,8:00 AM,9:00 PM,closed,closed,https://locations.tacobell.com/tx/katy/23557-westheimer-pkwy-.html
Tue,8:00 AM,9:00 PM,closed,closed,https://locations.tacobell.com/tx/katy/23557-westheimer-pkwy-.html
Wed,8:00 AM,9:00 AM,closed,closed,https://locations.tacobell.com/tx/katy/23557-westheimer-pkwy-.html
