# Web Scraping with Beautiful Soup Lab

In [1]:
# Import libaries here
import pandas as pd
import requests
from bs4 import BeautifulSoup
import requests


### Step 1: Create a soup object from the home page

In [2]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)

In [3]:
res.status_code

200

In [4]:
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [5]:
rest_tags = soup.find_all('a')[1:]

In [6]:
restaurants = []
for row in rest_tags:
    restaur = {}
    restaur['name'] = row.text
    restaur['href'] = row.attrs['href']
    
    restaurants.append(restaur)
    
rest_df = pd.DataFrame(restaurants)

In [7]:
restaurants

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'},
 {'name': 'Atlanta Bread Company', 'href': 'restaurants/4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'href': 'restaurants/5.html'},
 {'name': 'Buffalo Wild Wings', 'href': 'restaurants/6.html'},
 {'name': 'Burger King', 'href': 'restaurants/7.html'},
 {'name': "Captain D's", 'href': 'restaurants/8.html'},
 {'name': "Carl's Jr.", 'href': 'restaurants/9.html'},
 {'name': "Charley's Grilled Subs", 'href': 'restaurants/10.html'},
 {'name': 'Chick-fil-A', 'href': 'restaurants/11.html'},
 {'name': "Chili's", 'href': 'restaurants/12.html'},
 {'name': 'Chipotle Mexican Grill', 'href': 'restaurants/13.html'},
 {'name': "Church's", 'href': 'restaurants/14.html'},
 {'name': 'Corner Bakery Cafe', 'href': 'restaurants/15.html'},
 {'name': 'Dairy Queen', 'href': 'restaurants/16.html'},
 {'name': "Denny's", 'href': 'res

### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [42]:
foods =[]
for rest_dict in restaurants:
    url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
    current_soup = BeautifulSoup(
        requests.get(f'{url}{rest_dict["href"]}').content, 'lxml')
    
    current_rows = current_soup.find_all('tr')[1:]
    for row in current_rows:
        
        
        
        food = {}
        food['calories'] = row.find_all('td')[2].text
        food['carbs'] = row.find_all('td')[4].text
        food['category'] = row.find_all('td')[1].text
        food['fat'] = row.find_all('td')[3].text
        food['name'] = row.find_all('td')[0].text
        food['restauraunt'] = rest_dict['name']
        
        foods.append(food)

    

In [44]:
foods;

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows. Please output the number of rows in your DataFrame!

In [46]:
food_df = pd.DataFrame(foods)

In [47]:
# How many rows does your dataframe have?
food_df.shape

(5131, 6)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [51]:
food_df.to_csv('./data/foods.csv'
    ,index=False)