In [1]:
# Import libaries
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd

### Step 1: Create a soup object from the home page

In [2]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [3]:
# Isolate the table that has all the restaurant links
restaurants_table = soup.find('table', {'id': 'restaurants'})

# Create an empty list
restaurants = []

# Loop through each link in the restaurants table
for restaurant_link in restaurants_table.find_all('a'):
    # Start with an empty dictionary
    restaurant = {}
    
    # Add name
    restaurant['name'] = restaurant_link.text
    
    # Add href
    restaurant['href'] = restaurant_link['href']
    
    # Add restaurant to our list of restaurants
    restaurants.append(restaurant)
restaurants[:3]

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'}]

### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [4]:
# Start with an empty list
foods = []

# Loop through each restaurant in the previous step
for restaurant in restaurants:
    print('Scraping {}'.format(restaurant['name']))
    
    href = restaurant['href']
    restaurant_url = f'https://pages.git.generalassemb.ly/rldaggie/for-scraping/{href}'
    
    # Use requests library to get the content from each restaurant page
    restaurant_res = requests.get(restaurant_url)
    
    # Create soup object from restauarant html
    restaurant_soup = BeautifulSoup(restaurant_res.content, 'lxml')
    
    # Isolate the foods table from restaurant page
    table = restaurant_soup.find('table')
    
    # Loop through each row in the tbody of the restaurants table
    for row in table.find('tbody').find_all('tr'):
        # We'll use almost all the <td /> tags for each row, might as well create a variable
        cells = row.find_all('td')
        
        # Start with an empty food dictionary
        food = {}
        
        # Add the restaurant's name (No need for the slug, that was just for scraping purposes)
        food['restaurant'] = restaurant['name']
        
        # Add food name from firs cell
        food['name'] = cells[0].text
        
        # Add category, note the .strip() for removing white space
        food['category'] = cells[1].text.strip()
        
        # Add calories 
        food['calories'] = cells[2].text
        
        # Add fat
        food['fat'] = cells[3].text
        
        # Add carbs
        food['carbs'] = cells[4].text
        
        # Add the food dictionary to our list of foods
        foods.append(food)
        
    # Be courteous and throttle your scrapes!
    time.sleep(3)

Scraping A&W Restaurants
Scraping Applebee's
Scraping Arby's
Scraping Atlanta Bread Company
Scraping Bojangle's Famous Chicken 'n Biscuits
Scraping Buffalo Wild Wings
Scraping Burger King
Scraping Captain D's
Scraping Carl's Jr.
Scraping Charley's Grilled Subs
Scraping Chick-fil-A
Scraping Chili's
Scraping Chipotle Mexican Grill
Scraping Church's
Scraping Corner Bakery Cafe
Scraping Dairy Queen
Scraping Denny's
Scraping El Pollo Loco
Scraping FATZ
Scraping Fazoli's
Scraping Five Guys Burgers and Fries
Scraping Golden Chick
Scraping Hardee's
Scraping IHOP
Scraping In-N-Out Burger
Scraping Jack in the Box
Scraping Jimmy Johns
Scraping Joe's Crab Shack
Scraping KFC
Scraping McDonald's
Scraping O'Charley's
Scraping Olive Garden
Scraping Outback Steakhouse
Scraping Panda Express
Scraping Panera Bread
Scraping Popeye's
Scraping Quiznos
Scraping Red Robin Gourmet Burgers
Scraping Romano's Macaroni Grill
Scraping Ruby Tuesday
Scraping Subway
Scraping Taco Bell
Scraping Taco Bueno
Scraping Wend

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [5]:
df = pd.DataFrame(foods)
df.head()

Unnamed: 0,restaurant,name,category,calories,fat,carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries,French Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150


In [6]:
df.shape

(5131, 6)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [7]:
df.to_csv('foods.csv', index=False)