# Web Scraping with Beautiful Soup Lab

In [12]:
# Import libaries here
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [13]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [14]:
table = soup.find('table', {'id': 'restaurants'})
type(table)

bs4.element.Tag

In [17]:
restaurant = []

for row in table.find_all('tr')[1:]:
    
    
    restaurants = {}
    
    restaurants['restaurant'] = row.find('a').text
    restaurants['link'] = row.find('a').attrs['href']
    
    restaurant.append(restaurants)
    
# pd.DataFrame(restaurant)
len(restaurant)

44

In [18]:
for n in restaurant:
    print(n['restaurant'])

A&W Restaurants
Applebee's
Arby's
Atlanta Bread Company
Bojangle's Famous Chicken 'n Biscuits
Buffalo Wild Wings
Burger King
Captain D's
Carl's Jr.
Charley's Grilled Subs
Chick-fil-A
Chili's
Chipotle Mexican Grill
Church's
Corner Bakery Cafe
Dairy Queen
Denny's
El Pollo Loco
FATZ
Fazoli's
Five Guys Burgers and Fries
Golden Chick
Hardee's
IHOP
In-N-Out Burger
Jack in the Box
Jimmy Johns
Joe's Crab Shack
KFC
McDonald's
O'Charley's
Olive Garden
Outback Steakhouse
Panda Express
Panera Bread
Popeye's
Quiznos
Red Robin Gourmet Burgers
Romano's Macaroni Grill
Ruby Tuesday
Subway
Taco Bell
Taco Bueno
Wendy's


### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [19]:
foods = []

for res in restaurant:
    url = f'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'+res['link']
    request = requests.get(url)
    soup = BeautifulSoup(request.content, 'lxml')
    table = soup.find('table')
    
    for row in table.find_all('tr')[1:]:
        food = {}
        
        food['calories'] = row.find_all('td')[2].text
        food['carbs'] = row.find_all('td')[4].text
        food['category'] = row.find_all('td')[1].text
        food['fat'] = row.find_all('td')[3].text
        food['name'] = row.find('td').text
        food['restaurant'] = res['restaurant']
        
        foods.append(food)

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows. Please output the number of rows in your DataFrame!

In [21]:
data = pd.DataFrame(foods)

In [22]:
data.shape

(5131, 6)

In [8]:
# How many rows does your dataframe have?
# 5131

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [23]:
data.to_csv('food_list.csv', index=False)