In [1]:
# Import libaries
import pandas as pd
import requests
from bs4 import BeautifulSoup
import html

In [2]:
from IPython.display import display, Markdown

with open('README.md', 'r') as fh:
    content = fh.read()

display(Markdown(content))

# Web Scraping with Beautiful Soup: A Lab

Using Beautiful Soup, scrape [ChubbyGrub.com](http://chubbygrub.com), and create a DataFrame of food items from every restaurant. Your DataFrame should look something like this:

| restaurant | category | name    | calories | carbs | fat |
|------------|----------|---------|----------|-------|-----|
| McDonald's | Burgers  | Big Mac | 540      | 45    | 29  |
| Burger King | Burgers  | Whopper | 900      | 51    | 57  |
| ... | ...  | ... | ...      | ...    | ...  |
| Chili's | Ribs  | Shiner Bock® BBQ Ribs | 2310      | 168    | 123  |


**Note**: Your DataFrame should have just over 4,900 rows


### Step 1: Create a soup object from the home page

In [3]:
url = 'http://chubbygrub.com/'
res = requests.get(url)

In [4]:
res.status_code

200

In [5]:
soup = BeautifulSoup(res.content, 'lxml')

In [6]:
h1 = soup.find('h1')

In [7]:
type(h1)

bs4.element.Tag

In [8]:
h1.text

'Nutrition information from your favorite restaurants…all in one place!'

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and slug. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'slug': 'aw-restaurants'}, 
    {'name': "Applebee's", 'slug': 'applebees'},
    ...
]
```

In [9]:
#upon inspection of site, all restaurants have <a> tags, and class 'btn btn-lg btn-primary'
div = soup.find_all('a', {'class': 'btn btn-lg btn-primary'})

In [10]:
restaurants = []

for row in div:
    rest = {}
    rest['name'] = row.text
    rest['slug'] = row.attrs['href'].split('/')[2]
    restaurants.append(rest)

In [11]:
restaurants[:5]

[{'name': 'A&W Restaurants', 'slug': 'aw-restaurants'},
 {'name': "Applebee's", 'slug': 'applebees'},
 {'name': "Arby's", 'slug': 'arbys'},
 {'name': 'Atlanta Bread Company', 'slug': 'atlanta-bread-company'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'slug': 'bojangles-famous-chicken-n-biscuits'}]

In [12]:
base_url = 'http://chubbygrub.com/restaurants/'

### Step 3: Using the slug, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [13]:
slug_list = [restaurants[i]['slug'] for i in range(len(restaurants))]

In [14]:
slug_list[:5]

['aw-restaurants',
 'applebees',
 'arbys',
 'atlanta-bread-company',
 'bojangles-famous-chicken-n-biscuits']

In [15]:
for x in range(len(slug_list)):
    #This will give us 
    res = requests.get(base_url + slug_list[x] + '/')
    soup = BeautifulSoup(res.content, 'lxml')
    table_soup = soup.find_all('tr')
    foods = []
    
    for i in range(1,(len(table_soup))):
        food = {}
        food['calories'] = str(table_soup[i]).split('calories">')[1].split('</td>')[0]
        food['carbs'] = str(table_soup[i]).split('carbohydrateContent">')[1].split('</td>')[0]
        food['category'] = str(table_soup[i]).split('href="/categories/')[1].split('"')[0]
        food['fat'] = str(table_soup[i]).split('fatContent">')[1].split('</td>')[0]
        food['name'] = html.unescape(str(table_soup[i]).split('itemprop="name">')[1].split('</td>')[0])
        food['restaurant'] = restaurants[x]['name']

        foods.append(food)
        
    restaurants[x]['foods'] = foods

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 4,977 rows

In [16]:
list_of_food_dicts = []
for i in range(len(restaurants)):
    for x in range(len(restaurants[i]['foods'])):
        dict = {}
        dict = (restaurants[i]['foods'][x])
        list_of_food_dicts.append(dict)

In [17]:
df = pd.DataFrame(list_of_food_dicts)
df.head()

Unnamed: 0,calories,carbs,category,fat,name,restaurant
0,0,0,drinks,0,A&W® Diet Root Beer,A&W Restaurants
1,0,0,drinks,0,A&W® Diet Root Beer,A&W Restaurants
2,0,0,drinks,0,A&W® Diet Root Beer,A&W Restaurants
3,0,0,drinks,0,A&W® Diet Root Beer,A&W Restaurants
4,0,0,drinks,0,A&W® Diet Root Beer,A&W Restaurants


### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [19]:
df.to_csv('chubbygrub_foods.csv', index=False, sep=",")