# Automated Gasbuddy Scraper

In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
import numpy as np
from datetime import date
import time

### Scraping Function:

In [1]:
def price_check(postal_code, fuel_type):
    url = ('https://www.gasbuddy.com/home?search=' +
            postal_code +
            '&fuel=' +
            fuel_type +
            '&method=all&maxAge=0')

    # REPLACE 'YOUR_USER_AGENT_HEADER' with your own
    headers = 'YOUR_USER_AGENT_HEADER'
    page = requests.get(url, headers = headers)
    soup = BeautifulSoup(page.content, 'html.parser')
    soup_pretty = BeautifulSoup(soup.prettify(), 'html.parser')

    def postal_to_country(postal_code):
        if re.match('^\d{5}(?:[-\s]?\d{4})?$', postal_code):
            return 'US'
        else:
            return 'CA'

    country = postal_to_country(postal_code)

    brands = soup_pretty.find_all('h3', class_ = 'header__header3___1b1oq header__header___1zII0 header__midnight___1tdCQ header__snug___lRSNK StationDisplay-module__stationNameHeader___1A2q8')    
    brand_names = [brand.text.strip() for brand in brands]

    prices = soup_pretty.find_all('span', class_ = 'text__xl___2MXGo text__left___1iOw3 StationDisplayPrice-module__price___3rARL')  
    gas_prices = [price.text.strip().replace('¢','').replace('$','').replace('- - -', 'NaN') for price in prices]

    addresses = soup_pretty.find_all('div', class_ = 'StationDisplay-module__address___2_c7v')
    station_addresses = [', '.join(re.split(r'\s{2,}', address.text.strip().replace('\n',''))) for address in addresses]

    date_of = date.today()

    df = pd.DataFrame({'Brand': brand_names, 
                       'Price': gas_prices, 
                       'Address': station_addresses,
                       'Date': date_of,
                       'Country': country
                      })
    
    path = (r'Automated Gas Scraper\gas_data_' +
            postal_code +
            '_fueltype' +
            fuel_type +
            '.csv')
    with open(path, 'a') as f:
        df.to_csv(f, encoding = 'utf-8', mode = 'a', index = False, header = f.tell()==0, lineterminator='\n')

### Automate Scraper:

In [None]:
# postal accepts any standard US or CA postal code format
# fuel takes as strings, digits 1-4, correspoding to: 1) Regular, 2) Midgrade, 3) Premium, 4) Diesel
postal = 'YOUR_POSTAL_CODE_HERE'
fuel = 'YOUR_FUEL_TYPE_HERE'
path = (r'Automated Gas Scraper\gas_data_' + 
        postal + 
        '_fueltype' + 
        fuel + 
        '.csv')

while(True):
    price_check(postal, fuel)
    time.sleep(86400)