In [1]:
# Import libaries

import requests
from bs4 import BeautifulSoup
import pandas as pd

### Step 1: Create a soup object from the home page

In [2]:
# Define url:

url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)
res.status_code

200

In [3]:
# Create soup object:

soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [4]:
# Observe the soup:

print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <title>
   Nutrition Information
  </title>
  <link crossorigin="anonymous" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" rel="stylesheet"/>
 </head>
 <body>
  <header>
   <section class="container">
    <nav class="navbar navbar-expand-lg navbar-light bg-light" role="navigation">
     <a class="navbar-brand" href="/">
      Nutrition Information
     </a>
    </nav>
   </section>
  </header>
  <main class="container" role="main">
   <br/>
   <div class="alert alert-danger">
    NOTE: This data is super old and rife with errors. It's meant for scraping practice only.
   </div>
   <table class="table" id="restaurants">
    <thead>
     <tr>
      <th>
       Name
      </th>

In [5]:
# Create df of restaurant names and urls:

restaurants = []

for row in soup.find_all('tr')[1:]:
    
    restaurant = {
        'name': row.find('a').text.strip(),
        'href': row.find('a')['href']}

    restaurants.append(restaurant)
    
df = pd.DataFrame(restaurants)

In [6]:
df

Unnamed: 0,name,href
0,A&W Restaurants,restaurants/1.html
1,Applebee's,restaurants/2.html
2,Arby's,restaurants/3.html
3,Atlanta Bread Company,restaurants/4.html
4,Bojangle's Famous Chicken 'n Biscuits,restaurants/5.html
5,Buffalo Wild Wings,restaurants/6.html
6,Burger King,restaurants/7.html
7,Captain D's,restaurants/8.html
8,Carl's Jr.,restaurants/9.html
9,Charley's Grilled Subs,restaurants/10.html


### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [7]:
# Create list for restaurant foods:

foods = []

for i in range(df.shape[0]):
    
    url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
    url_2 = df.iloc[i][1]
    url_new = url + url_2
    res = requests.get(url_new)
    soup = BeautifulSoup(res.content,'lxml')
    tbody = soup.find('tbody')
    rest_name = df.iloc[i][0]

    for row in tbody.find_all('tr'):
        
        row = row.get_text()
        row = row.split()
        row[:-4] = [' '.join(row[:-4])]        
        food = {
            'restaurant': rest_name,
            'name'      : row[0],
            'category'  : row[-4],
            'calories'  : row[-3],
            'fat'       : row[-2],
            'carbs'     : row[-1]}
        foods.append(food)
        

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [8]:
df_food=pd.DataFrame(foods)
df_food

Unnamed: 0,restaurant,name,category,calories,fat,carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries French,Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150
...,...,...,...,...,...,...
5126,Wendy's,Jr. Original Chocolate Frosty™,Shakes,200,5,32
5127,Wendy's,Grilled Chicken Go Wrap,Wraps,260,10,25
5128,Wendy's,Asiago Ranch Chicken Club,Sandwiches,670,32,57
5129,Wendy's,Spicy Chicken Go Wrap,Wraps,330,16,30


### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [10]:
df_food.to_csv('./export/food_scrape.csv', index=False)