In [26]:

import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from datetime import datetime
import sqlite3
import re


################################################
# Helper functions #
################################################

# Parse a specific tag + class
def parseColumn(soup, html_tag, html_class):
    dataColumn = soup.find_all(html_tag, class_=html_class)
    new_col_list = []
    
    for i in range(0, len(dataColumn)):
        new_col_list.append(dataColumn[i].get_text())
        
    return new_col_list

# Add new column to data frame
def addColumnDF(df, arr, col_name):
    df_tmp = pd.DataFrame(arr, columns = [col_name])
    df[col_name] = df_tmp[col_name]

# Grab vehicle manufacture year
def get_valid_year(soup, html_tag, html_class):
    models = parseColumn(soup, html_tag, html_class)
    new_col_list = []
    
    for model in models:
        new_col_list.append(model[0:4])
        
    return new_col_list

def fix_vehicle_type(df):
    # Check van & cargo
    for idx, row in df.iterrows():
        if 'van' in row['title'].lower() and 'cargo' in row['title'].lower() :
            row['vehicle_type'] = 'Cargo Van'

# Make & Model & Vehicle Type
def get_car_make_model_type(soup, html_tag, html_class):
    full_titles = parseColumn(soup, html_tag, html_class)
    makes = []
    models = []
    vehicle_types = []
    valid_makes = ['BMW', 'Audi', 'Toyota', 'Lexus', 'Ford', 'Honda'
                   , 'Hyundai', 'Kia', 'Chevrolet', 'Jeep', 'Nissan', 'Volkswagen'
                   , 'Mitsubishi', 'Mazda', 'GMC', 'Cadillac', 'Land Rover', 'Dodge'           
                   , 'Subaru', 'Ram', 'Chrysler', 'Acura', 'Mercedes-Benz', 'Infiniti']
    
    valid_vehicle_type = ['Sedan', 'SUV', 'Coupe', 'Wagon', 'Hatchback', 'Truck', 'Cargo Van', 'Van']
    
    # Remove years
    for idx in range(len(full_titles)):
        full_titles[idx] = full_titles[idx][5:]
        
    for model in full_titles:
        clean_model = ''
        stripped_make = ''
        stripped_type = ''
        for make in valid_makes:
            if make.lower() in model.lower():
                stripped_make = make
                clean_model = model.replace(make, '')
        
        for vt in valid_vehicle_type:
            if vt.lower() in clean_model.lower():
                stripped_type = vt
                clean_model = clean_model.replace(vt, '')
                
        if (stripped_make != ''):
            makes.append(stripped_make.strip())
        else:
            makes.append(None) # if none matches add a none
        
        if (stripped_type != ''):
            vehicle_types.append(stripped_type.strip())
        else:
            vehicle_types.append(None) # if none matches add a none
        
        models.append(clean_model.strip()) 

    return makes, models, vehicle_types

# def get_numeric_vehicle_data(html_tag, html_class):
#     scraped_mileages = parseColumn(html_tag, html_class)
#     mileages = []
    
#     for mileage in scraped_mileages:
#         mileages.append(int(re.sub("[^0-9]", "", mileage)))
    
#     return mileages

def get_numeric_vehicle_data(soup, html_tag, html_class):
    scraped_data = parseColumn(soup, html_tag, html_class)
    parsed_data = []
    numeric_data = []
    
    for el in scraped_data:
        parsed_data.append(re.sub("[^0-9]", "", el))
        
    for data in parsed_data:
        if data == '':
            numeric_data.append(np.nan)
        else:
            numeric_data.append(int(data))
    
    return numeric_data

# Use for when vehicle data doesn't have specific class names
def get_misc_vehicle_data(soup, html_tag, html_class, vehicle_data_type):
    misc_vehicle_data = parseColumn(soup, html_tag, html_class)
    vehicle_data_list = []
    for row in misc_vehicle_data:
        for el in row.split('\n'):
            if vehicle_data_type.lower() in el.lower():
                vehicle_data_list.append(el.split(':')[1].strip())
    return vehicle_data_list


In [19]:
# Dealership Info Dictionary
dealerships = {
    'Bostonyan Auto Group': {
        'url': 'https://www.bostonyanautogroup.com/view-inventory',
        'pagination_url': '',
        'dealership_name': 'Bostonyan Auto Group',
        'address': '119 Worcester St',
        'zipcode': '01760',
        'city': 'Natick',
        'state': 'MA'
    },
    'Direct Auto Mecca': {
        'url': 'https://www.directautomecca.com/view-inventory.aspx',
        'pagination_url': 'https://www.directautomecca.com/inventory.aspx?_new=true&_used=true&_page=2',
        'dealership_name': 'Direct Auto Mecca',
        'address': '154 Waverly Street',
        'zipcode': '01760',
        'city': 'Natick',
        'state': 'MA'
    },
    'Fafama Auto Sales': {
        'url': 'https://www.fafama.com/used-cars.aspx',
        'pagination_url': 'https://www.fafama.com/inventory.aspx?_used=true&_page=2',
        'dealership_name': 'Fafama Auto Sales',
        'address': '5 Cape Road',
        'zipcode': '01757',
        'city': 'Milford',
        'state': 'MA'
    }    
}

In [20]:
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}

URL = 'https://www.fafama.com/used-cars.aspx'
response = requests.get(URL, headers = headers)

soup = BeautifulSoup(response.text, "html.parser")

In [21]:
print(soup.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en-US">
 <head>
  <title>
   Used Car Inventory &amp; Prices - Boston, Milford, &amp; Framingham, MA | Fafama Auto Sales
  </title>
  <meta content="Serving Boston, Milford, and Framingham, MA, Fafama Auto Sales is the best place to purchase your next used car. Check out our car deals today!" name="description"/>
  <meta content="Boston Used Cars, used cars for sale in Boston, Used Cars Natick, Used cars Worcester MA, used cars boston, used cars providence" name="keywords"/>
  <meta content="text/html; charset=utf-8" http-equiv="content-type">
   <meta content="Fafama Auto Sales" name="author">
    <meta content="width=device-width, initial-scale=1" name="viewport"/>
    <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
    <meta content="Used Car Inventory &amp; Prices - Boston, Milford, &amp; Framingham, MA | Fafama Auto Sales" name="og:title"/>
    <meta content="website" name="og:type"/>
    <meta content="Fafama Auto Sales" property="og:s

In [60]:
def get_fafama_inventory_data(soup, dealership_info):
    # Initialize empty data frame
    cars = pd.DataFrame()
    
    # Add title
    title = parseColumn(soup, 'h2', 'color m-0 ebiz-vdp-title')    
    addColumnDF(cars, title, 'title')

    # Add vehicle manufacture date
    years = get_valid_year(soup, 'h2', 'color m-0 ebiz-vdp-title')
    addColumnDF(cars, years, 'year')

    # Add make, models, and vehicle type
    makes, models, vtypes = get_car_make_model_type(soup, 'h2', 'color m-0 ebiz-vdp-title')
    addColumnDF(cars, makes, 'make')
    addColumnDF(cars, models, 'models')
    addColumnDF(cars, vtypes, 'vehicle_type')
    
    # Clean up vehicle type
    fix_vehicle_type(cars)

    # Add mileage col
    miles = get_numeric_vehicle_data(soup, 'li', 'mileage-units')
    addColumnDF(cars, miles, 'vehicle_mileage')

    # Add price
    car_prices = get_numeric_vehicle_data(soup, 'div', 'price-item active mt-3 mt-md-0')
    addColumnDF(cars, car_prices, 'price')

    # Add Colors & transmission & other cols
    addColumnDF(cars, get_misc_vehicle_data(soup, 'ul', 'small list-unstyled mb-0', 'exterior'), 'exterior_color')
    addColumnDF(cars, get_misc_vehicle_data(soup, 'ul', 'small list-unstyled mb-0', 'interior'), 'interior_color')
    addColumnDF(cars, get_misc_vehicle_data(soup, 'ul', 'small list-unstyled mb-0', 'transmission'), 'transmission')
    addColumnDF(cars, get_misc_vehicle_data(soup, 'ul', 'small list-unstyled mb-0', 'engine'), 'engine')
    addColumnDF(cars, parseColumn(soup, 'li', 'vin'), 'vin')
    cars['vin'] = cars['vin'].str.replace('VIN #: ', '')
    
    # Add dealership info + scrape date
    cars['dealership_name'] = dealership_info['dealership_name']
    cars['dealership_address'] = dealership_info['address']
    cars['dealership_zipcode'] = dealership_info['zipcode']
    cars['dealership_city'] = dealership_info['city']
    cars['dealership_state'] = dealership_info['state']
    cars['inventory_url'] = dealership_info['url']
    cars['scraped_date'] = datetime.now(tz = None)
    
    return cars
    
    # Add data to a SQLite database
#     conn = sqlite3.connect('cars.db')
#     cars.to_sql('inventory', conn, if_exists='append', index=False)    

In [53]:
c = get_fafama_inventory_data(soup, dealerships['Fafama Auto Sales'])

In [54]:
c

Unnamed: 0,title,year,make,models,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
0,2019 Acura ILX Sedan,2019,Acura,ILX,Sedan,36721,26790.0,Majestic Black Pearl,Ebony,8 Spd Automatic,2.4L 4 CYLINDER,19UDE2F37KA004691,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
1,2019 Acura ILX Technology Pkg Sedan,2019,Acura,ILX Technology Pkg,Sedan,21018,28390.0,Majestic Black Pearl,Ebony,8 Spd Automatic,2.4L 4 CYLINDER,19UDE2F72KA005069,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
2,2018 Acura ILX Special Edition Sedan,2018,Acura,ILX Special Edition,Sedan,30104,25190.0,Lunar Silver Metallic,Ebony,8 Spd Automatic,2.4L 4 CYLINDER,19UDE2F41JA000708,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
3,2017 Acura ILX Sedan,2017,Acura,ILX,Sedan,39649,23190.0,Bellanova White Pearl,Ebony,8 Spd Automatic,2.4L 4 CYLINDER,19UDE2F39HA801291,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
4,2020 Acura TLX Sedan,2020,Acura,TLX,Sedan,43790,29790.0,Majestic Black Pearl,Ebony,8 Spd Automatic,2.4L 4 CYLINDER,19UUB1F38LA007927,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
5,2017 Audi A4 Premium Plus S Line AWD Sedan,2017,Audi,A4 Premium Plus S Line AWD,Sedan,50165,29790.0,Monsoon Gray Metallic,S Line Black,8 Spd Automatic,2.0L 4 CYLINDER,WAUENAF41HN023869,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
6,2018 BMW X1 sDrive28 SAV,2018,BMW,X1 sDrive28 SAV,,27326,29390.0,Jet Black,Black,8 Spd Automatic,2.0L 4 CYLINDER,WBXHU7C32J5H39208,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
7,2018 BMW X1 xDrive28i SAV,2018,BMW,X1 xDrive28i SAV,,36183,30990.0,Alpine White,Black,8 Spd Automatic,2.0L 4 CYLINDER,WBXHT3C38J5L30896,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
8,2018 BMW X1 xDrive28i SAV,2018,BMW,X1 xDrive28i SAV,,30371,31390.0,Alpine White,Black,8 Spd Automatic,2.0L 4 CYLINDER,WBXHT3C31J5K28792,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523
9,2018 BMW X5 xDrive35i SAV,2018,BMW,X5 xDrive35i SAV,,32113,42390.0,Jet Black,Black,8 Spd Automatic,3.0L STRAIGHT 6 CYLINDER,5UXKR0C53J0X94997,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:07:11.328523


In [55]:
c.isnull().sum()

title                 0
year                  0
make                  0
models                0
vehicle_type          4
vehicle_mileage       0
price                 3
exterior_color        0
interior_color        0
transmission          0
engine                0
vin                   0
dealership_name       0
dealership_address    0
dealership_zipcode    0
dealership_city       0
dealership_state      0
inventory_url         0
scraped_date          0
dtype: int64

In [5]:
import sqlite3
import pandas as pd
conn = sqlite3.connect('cars.db')
df = pd.read_sql_query("SELECT * from inventory_staging", conn)
df.loc[df['dealership_name'] == 'Fafama Auto Sales', :]

Unnamed: 0,title,year,make,models,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
167,2017 Honda Civic Sedan LX,2017,Honda,Civic LX,Sedan,55638,18990.0,Lunar Silver Metallic,Gray,Automatic,2.0L 4 CYLINDER,2HGFC2F51HH519003,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:11.615053
168,2020 Honda CR-V LX AWD SUV,2020,Honda,CR-V LX AWD,SUV,32368,29790.0,Radiant Red Metallic,Gray,Automatic,1.5L 4 CYLINDER,5J6RW2H27LA012081,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:11.615053
169,2019 Honda CR-V LX AWD SUV,2019,Honda,CR-V LX AWD,SUV,33488,29190.0,Obsidian Blue Pearl,Gray,Automatic,2.4L 4 CYLINDER,5J6RW6H30KL003385,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:11.615053
170,2019 Honda CR-V EX-L AWD SUV,2019,Honda,CR-V EX-L AWD,SUV,32178,30990.0,Crystal Black Pearl,Black,Automatic,1.5L 4 CYLINDER,2HKRW2H89KH101751,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:11.615053
171,2018 Honda Odyssey EX-L Van,2018,Honda,Odyssey EX-L,Van,39482,36390.0,Crystal Black Pearl,Black,9 Spd Automatic,3.5L V6 CYLINDER,5FNRL6H73JB078889,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:11.615053
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,2019 Volkswagen Jetta 1.4T SEL Sedan,2019,Volkswagen,Jetta 1.4T SEL,Sedan,21528,,Pure White,Black,8 Spd Automatic,1.4L 4 CYLINDER,3VWE57BU3KM251768,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:12.548816
258,2017 Volkswagen Passat R-Line w/Comfort Pkg Sedan,2017,Volkswagen,Passat R-Line w/Comfort Pkg,Sedan,57382,,Pure White,Cornsilk Beige,6 Spd Automatic,1.8L 4 CYLINDER,1VWDT7A31HC052402,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:12.548816
259,2018 Volkswagen Tiguan SE 4MOTION w/Premium Pk...,2018,Volkswagen,Tiguan SE 4MOTION w/Premium Pkg,SUV,34481,,Pure White,Saffrano & Black,8 Spd Automatic,2.0L 4 CYLINDER,3VV2B7AX6JM193763,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:12.548816
260,2018 Volkswagen Tiguan SE 4MOTION w/Premium Pk...,2018,Volkswagen,Tiguan SE 4MOTION w/Premium Pkg,SUV,34161,28790.0,Pure White,Saffrano & Black,8 Spd Automatic,2.0L 4 CYLINDER,3VV2B7AX2JM103007,Fafama Auto Sales,5 Cape Road,01757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-08 22:41:12.548816


In [6]:
df.loc[df['dealership_name'] == 'Fafama Auto Sales', :].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95 entries, 167 to 261
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   title               95 non-null     object 
 1   year                95 non-null     object 
 2   make                95 non-null     object 
 3   models              95 non-null     object 
 4   vehicle_type        95 non-null     object 
 5   vehicle_mileage     95 non-null     int64  
 6   price               81 non-null     float64
 7   exterior_color      95 non-null     object 
 8   interior_color      95 non-null     object 
 9   transmission        95 non-null     object 
 10  engine              94 non-null     object 
 11  vin                 95 non-null     object 
 12  dealership_name     95 non-null     object 
 13  dealership_address  95 non-null     object 
 14  dealership_zipcode  95 non-null     object 
 15  dealership_city     95 non-null     object 
 16  dealers

In [107]:
df.loc[df['dealership_name'] == 'Fafama Auto Sales', :].isnull().sum()

title                  0
year                   0
make                   0
models                 0
vehicle_type           0
vehicle_mileage        0
price                 12
exterior_color         0
interior_color         0
transmission           0
engine                 1
vin                    0
dealership_name        0
dealership_address     0
dealership_zipcode     0
dealership_city        0
dealership_state       0
inventory_url          0
scraped_date           0
dtype: int64

In [75]:
df.loc[(df['dealership_name'] == 'Fafama Auto Sales') & (df['make'].isnull()), :]

Unnamed: 0,title,year,make,models,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
246,2018 Jaguar XE Premium AWD Sedan,2018,,,,29866,28790.0,Corris Grey,Ebony/Ebony,8 Spd Automatic,2.0L 4 CYLINDER,SAJAJ4FX4JCP21579,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
320,2018 Volvo S90 T5 AWD Momentum Sedan,2018,,,,36349,34190.0,Onyx Black Metallic,Charcoal,8 Spd Automatic,2.0L 4 CYLINDER,LVY982MK8JP032610,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:53.555042


In [103]:
df.loc[(df['dealership_name'] == 'Fafama Auto Sales') & (df['engine'].isnull()), :]

Unnamed: 0,title,year,make,models,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
262,2019 Kia Forte LXS Sedan,2019,Kia,Forte LXS,Sedan,35478,19790.0,,,,,3KPF24AD1KE103127,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:38:12.684829


In [104]:
df.loc[(df['dealership_name'] == 'Fafama Auto Sales') & (df['exterior_color'].isnull()), :]

Unnamed: 0,title,year,make,models,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
262,2019 Kia Forte LXS Sedan,2019,Kia,Forte LXS,Sedan,35478,19790.0,,,,,3KPF24AD1KE103127,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:38:12.684829


In [78]:
df.loc[(df['dealership_name'] == 'Fafama Auto Sales') & (df['price'].isnull()), :]

Unnamed: 0,title,year,make,models,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
202,2016 Dodge Challenger SXT Coupe,2016,Dodge,Challenger SXT,Coupe,23495,,Bright White Clearcoat,Black,8 Spd Automatic,3.6L V6 CYLINDER,2C3CDZAGXGH300124,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:51.677114
211,2017 Ford F-150 XLT SuperCrew 4WD XTR Pkg Truck,2017,Ford,F-150 XLT SuperCrew 4WD XTR Pkg,Truck,51573,,Shadow Black,Medium Earth Gray,Automatic,2.7L V6 CYLINDER,1FTEW1EP4HFB93911,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:51.677114
215,2014 Ford Fusion SE Sedan,2014,Ford,Fusion SE,Sedan,81346,,Ingot Silver,Charcoal Black,6 Spd Automatic,2.5L 4 CYLINDER,1FA6P0H79E5382326,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:51.677114
233,2017 Hyundai Accent SE Sedan,2017,Hyundai,Accent SE,Sedan,67743,,Century White,Gray,Automatic,1.6L 4 CYLINDER,KMHCT4AE5HU307864,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
242,2018 INFINITI Q50 3.0t SPORT AWD Sedan,2018,Infiniti,INFINITI Q50 3.0t SPORT AWD,Sedan,11124,,Graphite Shadow,Graphite,7 Spd Automatic,3.0L V6 CYLINDER,JN1EV7AR4JM434481,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
254,2018 Jeep Compass Limited 4x4 Luxury Pkg SUV,2018,Jeep,Compass Limited 4x4 Luxury Pkg,SUV,28823,,Granite Crystal Metallic Clearcoat,Black,9 Spd Automatic,2.4L 4 CYLINDER,3C4NJDCBXJT234299,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
257,2018 Jeep Compass Limited 4x4 Luxury Pkg SUV,2018,Jeep,Compass Limited 4x4 Luxury Pkg,SUV,21475,,Pearl White Tri-Coat,Black,9 Spd Automatic,2.4L 4 CYLINDER,3C4NJDCB1JT434570,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
271,2018 Lexus IS 300 AWD Sedan,2018,Lexus,IS 300 AWD,Sedan,28924,,Atomic Silver,Black,6 Spd Automatic,3.5L V6 CYLINDER,JTHC81D21J5033390,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
274,2018 Mazda CX-9 Grand Touring AWD SUV,2018,Mazda,CX-9 Grand Touring AWD,SUV,22768,,Jet Black Mica,Black,6 Spd Automatic,2.5L 4 CYLINDER,JM3TCBDY2J0226825,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:52.864160
291,2019 Ram 1500 Big Horn/Lone Star Crew Cab 4x4 ...,2019,Ram,1500 Big Horn/Lone Star Crew Cab 4x4,Truck,33190,,Diamond Black Crystal Pearlcoat,Black,8 Spd Automatic,3.6L V6 CYLINDER,1C6RRFFG4KN825234,Fafama Auto Sales,5 Cape Road,1757,Milford,MA,https://www.fafama.com/used-cars.aspx,2022-03-06 01:19:53.555042


In [4]:
conn = sqlite3.connect('cars.db')

#Creating a cursor object using the cursor() method
cursor = conn.cursor()

#Doping EMPLOYEE table if already exists
cursor.execute("DROP TABLE inventory_staging.db")

#Commit your changes in the database
conn.commit()

#Closing the connection
conn.close()

OperationalError: no such table: inventory_staging.db

In [69]:
pagination_url.replace('page=[0-9+]', f'page={page_counter}')       

'https://www.fafama.com/inventory.aspx?_used=true&_page=3'

In [71]:
pagination_url = re.sub('page=[0-9+]', f'page={page_counter}', pagination_url)

In [72]:
print(pagination_url)

https://www.fafama.com/inventory.aspx?_used=true&_page=4


In [80]:
pagination_url = 'https://www.fafama.com/inventory.aspx?_used=true&_page=2'

response = requests.get(pagination_url, headers = headers)
soup_pagination = BeautifulSoup(response.text, "html.parser")   

In [83]:
len(get_misc_vehicle_data(soup_pagination, 'ul', 'small list-unstyled mb-0', 'engine'))

49

In [94]:
misc_vehicle_data = parseColumn(soup_pagination, 'ul', 'small list-unstyled mb-0')
misc_vehicle_data[0]

'\nEngine: 1.5L 4 CYLINDER\nTransmission: Automatic\nMiles: 32,368\nExterior: Radiant Red Metallic\nInterior: Gray\nStock #: 12673\nVIN #: 5J6RW2H27LA012081\n\n37 Photos\n\n'

In [97]:
for row in misc_vehicle_data[0]:
    print(row)



E
n
g
i
n
e
:
 
1
.
5
L
 
4
 
C
Y
L
I
N
D
E
R


T
r
a
n
s
m
i
s
s
i
o
n
:
 
A
u
t
o
m
a
t
i
c


M
i
l
e
s
:
 
3
2
,
3
6
8


E
x
t
e
r
i
o
r
:
 
R
a
d
i
a
n
t
 
R
e
d
 
M
e
t
a
l
l
i
c


I
n
t
e
r
i
o
r
:
 
G
r
a
y


S
t
o
c
k
 
#
:
 
1
2
6
7
3


V
I
N
 
#
:
 
5
J
6
R
W
2
H
2
7
L
A
0
1
2
0
8
1




3
7
 
P
h
o
t
o
s






In [98]:
vehicle_data_list = []
for row in misc_vehicle_data:
    if 'engine' not in row.lower():
        vehicle_data_list.append(None)
        continue
    for el in row.split('\n'):
        if 'engine' in el.lower():
            vehicle_data_list.append(el.split(':')[1].strip())
            print(el)

Engine: 1.5L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 1.5L 4 CYLINDER
Engine: 3.5L V6 CYLINDER
Engine: 3.5L V6 CYLINDER
Engine: 3.5L V6 CYLINDER
Engine: 1.6L 4 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 2.5L 4 CYLINDER
Engine: 2.5L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 3.0L V6 CYLINDER
Engine: 3.0L V6 CYLINDER
Engine: 3.5L V6 CYLINDER
Engine: 3.5L V6 CYLINDER
Engine: 3.5L V6 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 3.6L V6 CYLINDER
Engine: 3.6L V6 CYLINDER
Engine: 3.6L V6 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 2.0L 4 CYLINDER
Engine: 1.6L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engine: 2.4L 4 CYLINDER
Engin

In [99]:
vehicle_data_list

['1.5L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '1.5L 4 CYLINDER',
 '3.5L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '1.6L 4 CYLINDER',
 '2.0L 4 CYLINDER',
 '2.0L 4 CYLINDER',
 '2.5L 4 CYLINDER',
 '2.5L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '3.0L V6 CYLINDER',
 '3.0L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '2.0L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.0L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '3.6L V6 CYLINDER',
 '3.6L V6 CYLINDER',
 '3.6L V6 CYLINDER',
 '2.4L 4 CYLINDER',
 None,
 '2.0L 4 CYLINDER',
 '2.0L 4 CYLINDER',
 '2.0L 4 CYLINDER',
 '1.6L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.4L 4 CYLINDER',
 '2.0L 4 CYLINDER',
 '3.5L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '3.5L V6 CYLINDER',
 '2.0L 4 CYLINDER',
 '2.5L 4 CYLINDER',
 '2.5L 4 CYLINDER',
 '1.6L 4 CYLINDER']