In [1]:
import parse_inventory as pi
import parse_dealership
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import sqlite3
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
# Dealership Info Dictionary
dealerships = {
    'Bostonyan Auto Group': {
        'url': 'https://www.bostonyanautogroup.com/view-inventory',
        'pagination_url': '',
        'dealership_name': 'Bostonyan Auto Group',
        'address': '119 Worcester St',
        'zipcode': '01760',
        'city': 'Natick',
        'state': 'MA'
    },
    'Direct Auto Mecca': {
        'url': 'https://www.directautomecca.com/view-inventory.aspx',
        'pagination_url': 'https://www.directautomecca.com/inventory.aspx?_new=true&_used=true&_page=2',
        'dealership_name': 'Direct Auto Mecca',
        'address': '154 Waverly Street',
        'zipcode': '01760',
        'city': 'Natick',
        'state': 'MA'
    },
    'Fafama Auto Sales': {
        'url': 'https://www.fafama.com/used-cars.aspx',
        'pagination_url': 'https://www.fafama.com/inventory.aspx?_used=true&_page=2',
        'dealership_name': 'Fafama Auto Sales',
        'address': '5 Cape Road',
        'zipcode': '01757',
        'city': 'Milford',
        'state': 'MA'
    },
    'Newton Automotive Sales': {
        'url': 'https://www.newtonautoandsales.com/cars-for-sale',
        'pagination_url': 'https://www.newtonautoandsales.com/cars-for-sale?PageNumber=2&Sort=MakeAsc&StockNumber=&Condition=&BodyStyle=&Make=&MaxPrice=&Mileage=&SoldStatus=AllVehicles&StockNumber=',
        'dealership_name': 'Newton Automotive Sales',
        'address': '249 Centre Street',
        'zipcode': '02458',
        'city': 'Newton',
        'state': 'MA'
    },
    'Blasius Boston': {
        'url': 'https://www.blasiusboston.com/used-cars-holliston-ma',
        'pagination_url': 'https://www.blasiusboston.com/used-cars-holliston-ma?page=2',
        'dealership_name': 'Blasius Boston',
        'address': '1286 Washington Street',
        'zipcode': '01746',
        'city': 'Holliston',
        'state': 'MA'
    },
    'Avon Auto Brokers': {
        'url': 'https://avonautobrokers.com/newandusedcars?clearall=1',
        'pagination_url': 'https://avonautobrokers.com/newandusedcars?page=2',
        'dealership_name': 'Avon Auto Brokers',
        'address': '159 Memorial Drive',
        'zipcode': '02322',
        'city': 'Avon',
        'state': 'MA'
    }, 
    'Johns Auto Sales': {
        'url': 'https://johnsautosales.com/newandusedcars?clearall=1',
        'pagination_url': 'https://johnsautosales.com/newandusedcars?page=2',
        'dealership_name': "John's Auto Sales",
        'address': '181 Somerville Avenue',
        'zipcode': '02143',
        'city': 'Somerville',
        'state': 'MA'
    },
    'JM Automotive': {
        'url': 'https://www.jmautomotive.com/cars-for-sale-in-Naugatuck-CT-Hartford-New-Haven/used_cars',
        'pagination_url': 'https://www.jmautomotive.com/inventory.aspx?pg=2&sort=12&limit=50&vstatus=1&status=6',
        'dealership_name': 'J&M Automotive',
        'address': '756/820 New Haven Road',
        'zipcode': '06770',
        'city': 'Naugatuck',
        'state': 'CT'
    },
    'CT Auto': {
        'url': 'https://www.hartefamilymotors.com/inventory/used-vehicles/',
        'pagination_url': None,
        'dealership_name': 'CT Auto',
        'address': '7 Wayne Street', 
        'zipcode': '06606',
        'city': 'Bridgeport',
        'state': 'CT'
    }
}


## Test Connection

Check that the html for the page is being pulled in correctly

In [3]:
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}

URL = 'https://www.ct-auto.com/cars-for-sale-in-Bridgeport-CT-Waterbury-Norwich/used_cars'
#URL = 'https://www.jmautomotive.com/inventory.aspx?pg=11&sort=12&limit=50&vstatus=1&status=6'
response = requests.get(URL, headers = headers)
soup = BeautifulSoup(response.text, "html.parser")


In [20]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en-us">
 <head>
  <title>
   Used cars for sale in Bridgeport, Norwalk, Waterbury, Norwich, CT | CT Auto
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="black" name="apple-mobile-web-app-status-bar-style"/>
  <meta content="width=device-width, user-scalable=yes, initial-scale=1.0, minimum-scale=1.0, maximum-scale=9.0" name="viewport"/>
  <meta content="index, follow" name="robots"/>
  <meta content="CT Auto, used cars for sale, used car dealer, Connecticut, CT, Bridgeport, Norwalk, Waterbury, Norwich, Fairfield CT, Stratford CT, Trumbull CT, Westport CT, Milford CT, Norwalk CT, Shelton CT, Orange CT, Derby CT, Ansonia CT, Darien CT, West Haven CT, Woodbridge CT, Stony Brook NY and Seymour CT,  used cars, used suv, used trucks, used pickups, used minivans, car financing, service and repair center in Bridgeport, CT" name="keywords"/>
  <meta content="Used cars for sale in Bridgeport, Norwalk, Waterbury, Norwic

## Test Extracting Columns

### Title

In [5]:
# pi.parse_subsection_attr(soup, 'title','div', 'a', 'thumbnail', 'listitemlink')

### Year

In [27]:
pi.convert_to_numeric_type(pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                    , 'itemprop', 'vehicleModelDate'))

[2013,
 2014,
 2010,
 2009,
 2018,
 2001,
 2013,
 1984,
 2014,
 2010,
 2015,
 2017,
 2007,
 2017,
 2008,
 2010,
 2015,
 2010,
 2010,
 2011,
 2015,
 2014,
 2007,
 2011,
 2015,
 2019,
 2007,
 2017,
 2012,
 2014,
 2017,
 2014,
 2015,
 2000,
 2015,
 2009,
 1999,
 2009,
 2014,
 2016,
 2011,
 2012,
 2006,
 2017]

### Make

In [23]:
pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                    , 'itemprop', 'manufacturer')

['Acura',
 'Acura',
 'Acura',
 'Acura',
 'BMW',
 'Chevrolet',
 'Chevrolet',
 'Chevrolet',
 'Chevrolet',
 'Chevrolet',
 'Chrysler',
 'Dodge',
 'Dodge',
 'GMC',
 'Honda',
 'Honda',
 'Honda',
 'Hyundai',
 'Hyundai',
 'Hyundai',
 'Infiniti',
 'Jeep',
 'Jeep',
 'Jeep',
 'Jeep',
 'Land Rover',
 'Mercedes-Benz',
 'Nissan',
 'Nissan',
 'Nissan',
 'Nissan',
 'Nissan',
 'Nissan',
 'Pontiac',
 'Ram',
 'Subaru',
 'Toyota',
 'Toyota',
 'Toyota',
 'Toyota',
 'Toyota',
 'Toyota',
 'Toyota',
 'Volkswagen']

### Model

In [24]:
pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                    , 'itemprop', 'model')

['MDX',
 'RLX',
 'TL',
 'TSX',
 'X6',
 'Corvette',
 'Cruze',
 'El Camino',
 'Equinox',
 'Tahoe',
 'Town & Country',
 'Grand Caravan',
 'Ram 1500',
 'Acadia Limited',
 'CR-V',
 'CR-V',
 'Pilot',
 'Elantra',
 'Sonata',
 'Sonata',
 'Q50',
 'Cherokee',
 'Wrangler',
 'Wrangler',
 'Wrangler Unlimited',
 'Range Rover',
 'SL-Class',
 'Altima',
 'Frontier',
 'Frontier',
 'Rogue Sport',
 'Sentra',
 'Versa',
 'Bonneville',
 '1500',
 'Legacy',
 '4Runner',
 'Camry',
 'Camry',
 'Corolla',
 'Highlander',
 'RAV4',
 'Tundra',
 'Jetta']

### Trim

In [25]:
pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                    , 'itemprop', 'vehicleConfiguration')

['AWD 4dr Tech Pkg',
 '4dr Sdn Tech Pkg',
 'SH-AWD',
 'BASE',
 'MSPORT',
 '2dr Convertible',
 '4dr Sdn Auto LS',
 'STANDARD PICKUP',
 'AWD 4dr LT w/1LT',
 '4WD 4dr 1500 LTZ',
 '4dr Wgn Touring',
 'SXT Wagon',
 '4WD Quad Cab 140.5" SLT',
 'AWD 4dr Limited',
 '4WD 5dr EX',
 'EX-L',
 '4WD 4dr LX',
 'Base',
 '4dr Sdn I4 Auto GLS',
 '4dr Sdn 2.4L Auto GLS PZEV',
 '4dr Sdn Sport AWD',
 '4WD 4dr Limited',
 '4WD 2dr Sahara',
 '4WD 2dr Mojave',
 '4WD 4dr Sahara',
 'V8 Supercharged SWB',
 '2dr Roadster 5.5L V8',
 '2.5 S Sedan',
 '4WD Crew Cab SWB Auto SV',
 '4WD Crew Cab SWB Auto SV',
 'AWD S',
 '4dr Sdn I4 CVT SR',
 '4dr Sdn Manual 1.6 S',
 '4dr Sdn SSEi',
 '4WD Crew Cab tradesman',
 'SE',
 'SR5',
 '4dr Sdn I4 Auto (Natl)',
 'LE',
 '4dr Sdn CVT S (Natl)',
 'Base',
 '4WD 4dr I4 (Natl)',
 'SR5',
 '1.4T SE Auto']

In [26]:
len(pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                    , 'itemprop', 'vehicleConfiguration'))

44

### Mileage

In [28]:
pi.convert_to_numeric_type(
pi.parse_subsection(soup, 'div', 'li', 'invItems item col-xs-6 col-lg-6 list-group-item'
                    , 'list-group-item mileage', 'get_text'))

[161206,
 116970,
 173399,
 140765,
 47386,
 70738,
 108182,
 26437,
 84659,
 76733,
 80958,
 129066,
 195002,
 87945,
 174807,
 182734,
 201382,
 129351,
 139810,
 135036,
 96764,
 137949,
 195475,
 97661,
 86499,
 51965,
 68150,
 109793,
 102938,
 134386,
 151129,
 17883,
 92820,
 140357,
 130338,
 160988,
 200077,
 165130,
 56878,
 73404,
 168417,
 233627,
 129562,
 116566]

### Car Prices

In [29]:
pi.convert_to_numeric_type(
    pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                    , 'itemprop', 'price'))

[14950,
 18950,
 0,
 0,
 49900,
 0,
 9900,
 0,
 14900,
 21900,
 15900,
 14000,
 6900,
 25500,
 6800,
 10900,
 9688,
 3900,
 5900,
 7900,
 19900,
 13500,
 10900,
 14500,
 0,
 86900,
 21900,
 14400,
 16600,
 16700,
 14800,
 12800,
 0,
 0,
 22800,
 6600,
 0,
 9900,
 16700,
 15800,
 13350,
 8900,
 0,
 14900]

### Exterior Color

In [30]:
pi.clean_text_data(
    pi.parse_subsection(soup, 'div', 'li', 'invItems item col-xs-6 col-lg-6 list-group-item'
                        , 'list-group-item InvExteriorcolor', 'get_text'), 'Exterior')

['Palladium Metallic',
 'Gilded Pewter Metallic',
 'Crystal Black Pearl',
 'Palladium Metallic',
 'Alpine White',
 'Torch Red',
 'Summit White',
 'Red',
 'Crystal Red Tintcoat',
 'Sheer Silver Metallic',
 'Deep Cherry Red Crystal Pearlcoat',
 'Contusion Blue Pearlcoat',
 'Mineral Gray Metallic',
 'Sparkling Silver Metallic',
 'Royal Blue Pearl',
 'Polished Metal Metallic',
 'Crystal Black Pearl',
 'Liquid Silver Metallic',
 'Pearl White',
 'Midnight Black',
 'Black Obsidian',
 'Granite Crystal Metallic Clearcoat',
 'Steel Blue Metallic',
 'Black Clear Coat',
 'Bright White Clearcoat',
 'Narvik Black',
 'Mars Red',
 'Pearl White',
 'Avalanche',
 'Brilliant Silver',
 'Gun Metallic',
 'Aspen White',
 'Fresh Powder',
 'Black',
 'Black Clearcoat',
 'Ruby Red Pearl',
 'Desert Dune Metallic',
 'Magnetic Gray Metallic',
 'Attitude Black Metallic',
 'Slate Metallic',
 'Cypress Pearl',
 'Super White',
 'Bluesteel Metallic',
 'Black']

### Interior Color

In [32]:
# None

### Transmission

In [33]:
pi.clean_text_data(
    pi.parse_subsection(soup, 'div', 'li', 'invItems item col-xs-6 col-lg-6 list-group-item'
                        , 'list-group-item InvTransmissiontype', 'get_text'), 'Transmission')

['Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Manual',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Manual',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic',
 'Automatic']

### Engine

In [39]:
pi.clean_text_data(
    pi.parse_subsection(soup, 'div', 'li', 'invItems item col-xs-6 col-lg-6 list-group-item'
                        , 'list-group-item InvEnginetype', 'get_text'), 'Engine')

['3.7\xa0V6 Cylinder Engine',
 '3.5\xa0V6 Cylinder Engine',
 '3.7\xa0V6 Cylinder Engine',
 '2.4\xa04 Cylinder Engine',
 '3\xa0Straight 6 Cylinder Engine',
 '5.7\xa08 Cylinder Engine',
 '1.8\xa04 Cylinder Engine',
 '3.8\xa0V6 Cylinder Engine',
 '2.4\xa04 Cyl.',
 '5.3\xa08 Cylinder Engine',
 '3.6 L\xa06 Cyl.',
 '3.6\xa0V6 Cylinder Engine',
 '8 Cyl.',
 '3.6\xa0V6 Cylinder Engine',
 '2.4\xa04 Cylinder Engine',
 '2.4\xa04 Cylinder Engine',
 '3.5\xa0V6 Cylinder Engine',
 '2\xa04 Cylinder Engine',
 '2.4\xa04 Cylinder Engine',
 '2.4\xa04 Cylinder Engine',
 '3.7\xa0V6 Cylinder Engine',
 '2.4\xa04 Cyl.',
 '3.8\xa0V6 Cylinder Engine',
 '3.8\xa0V6 Cylinder Engine',
 '3.6\xa0V6 Cylinder Engine',
 '5\xa08 Cylinder Engine',
 '5.5\xa08 Cylinder Engine',
 '2.5\xa04 Cylinder Engine',
 '4\xa0V6 Cylinder Engine',
 '4\xa0V6 Cylinder Engine',
 '2\xa04 Cylinder Engine',
 '1.8\xa04 Cylinder Engine',
 '1.6\xa04 Cylinder Engine',
 '3.8\xa0V6 Cylinder Engine',
 '5.7\xa08 Cyl.',
 '2.5\xa04 Cylinder Engine',
 '3.4

### Drivetrain

In [40]:
pi.parse_main_section_attr_text_all(soup, 'div', 'invItems item col-xs-6 col-lg-6 list-group-item'
                                , 'itemprop', 'driveWheelConfiguration')

['All Wheel Drive',
 'Front Wheel Drive',
 None,
 'Front Wheel Drive',
 'All Wheel Drive',
 None,
 'Front Wheel Drive',
 'Front Wheel Drive',
 'All Wheel Drive',
 None,
 'Front Wheel Drive',
 'Front Wheel Drive',
 'Four Wheel Drive',
 'All Wheel Drive',
 None,
 None,
 'Four Wheel Drive',
 None,
 None,
 'Front Wheel Drive',
 'All Wheel Drive',
 'Four Wheel Drive',
 'Four Wheel Drive',
 'Four Wheel Drive',
 'Four Wheel Drive',
 'Four Wheel Drive',
 'Rear Wheel Drive',
 'Front Wheel Drive',
 'Four Wheel Drive',
 'Four Wheel Drive',
 'All Wheel Drive',
 'Front Wheel Drive',
 'Front Wheel Drive',
 'Front Wheel Drive',
 'Four Wheel Drive',
 'All Wheel Drive',
 None,
 'Front Wheel Drive',
 'Front Wheel Drive',
 'Front Wheel Drive',
 None,
 'Four Wheel Drive',
 None,
 'Front Wheel Drive']

### VIN

In [41]:
pi.clean_text_data(
    pi.parse_subsection(soup, 'div', 'li', 'invItems item col-xs-6 col-lg-6 list-group-item', 'list-group-item InvVin', 'get_text'), 'VIN')

['2HNYD2H33DH501008',
 'JH4KC1F57EC006925',
 '19UUA9F58AA004333',
 'JH4CU26669C016304',
 '5UXKU2C51J0Z62419',
 '1G1YY32G015114228',
 '1G1PA5SG7D7314141',
 '1GCCW8095ER105760',
 '1GNFLFEK0EZ127856',
 '1GNUKCE09AR286778',
 '2C4RC1BG8FR504938',
 '2C4RDGCG5HR604061',
 '1D7HU18217S209402',
 '1GKKVSKD3HJ106507',
 'JHLRE485X8C027865',
 '5J6RE4H73AL089297',
 '5FNYF4H29FB032518',
 'KMHDU4AD5AU150111',
 '5NPET4ACXAH588281',
 '5NPEB4AC5BH118423',
 'JN1BV7AR8FM391161',
 '1C4PJMDB5EW153461',
 '1J8FA54157L168631',
 '1J4AA2D15BL521548',
 '1C4BJWEG1FL510976',
 'SALGS2RE3KA526468',
 'WDBSK71F57F128657',
 '1N4AL3AP3HC195195',
 '1N6AD0EV6CC430953',
 '1N6AD0EV3EN726035',
 'JN1BJ1CR1HW106975',
 '3N1AB7APXEY286927',
 '3N1CN7AP1FL825773',
 '1G2HZ5419Y4278171',
 '1C6RR7KT2FS550771',
 '4S3BL616797226241',
 'JT3HN86R3X0253854',
 '4T1BE46K29U806854',
 '4T4BF1FK8ER339774',
 '2T1BURHE4GC629682',
 '5TDBK3EHXBS066140',
 '2T3BF4DV6CW253831',
 '5TBDT44156S541904',
 '3VWDB7AJ7HM239630']

## Check Full Script

### Check Data

In [5]:
c = parse_dealership.get_ct_auto_inventory_data(soup, dealerships['CT Auto'], dealerships['CT Auto']['url'])

In [6]:
c.head()

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
0,2013 Acura MDX AWD 4dr Tech Pkg,2013,Acura,MDX AWD 4dr Tech Pkg,,MDX,AWD 4dr Tech Pkg,161206,14950,Palladium Metallic,,Automatic,3.7 V6 Cylinder Engine,All Wheel Drive,2HNYD2H33DH501008,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
1,2014 Acura RLX 4dr Sdn Tech Pkg,2014,Acura,RLX 4dr Sdn Tech Pkg,,RLX,4dr Sdn Tech Pkg,116970,18950,Gilded Pewter Metallic,,Automatic,3.5 V6 Cylinder Engine,Front Wheel Drive,JH4KC1F57EC006925,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
2,2010 Acura TL SH-AWD,2010,Acura,TL SH-AWD,,TL,SH-AWD,173399,0,Crystal Black Pearl,,Automatic,3.7 V6 Cylinder Engine,,19UUA9F58AA004333,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
3,2009 Acura TSX BASE,2009,Acura,TSX BASE,,TSX,BASE,140765,0,Palladium Metallic,,Automatic,2.4 4 Cylinder Engine,Front Wheel Drive,JH4CU26669C016304,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
4,2018 BMW X6 MSPORT,2018,BMW,X6 MSPORT,,X6,MSPORT,47386,49900,Alpine White,,Automatic,3 Straight 6 Cylinder Engine,All Wheel Drive,5UXKU2C51J0Z62419,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672


In [7]:
c.isnull().sum()

title                  0
year                   0
make                   0
model_trim             0
vehicle_type          44
model                  0
trim                   0
vehicle_mileage        0
price                  0
exterior_color         0
interior_color        44
transmission           0
engine                 0
drivetrain            10
vin                    0
dealership_name        0
dealership_address     0
dealership_zipcode     0
dealership_city        0
dealership_state       0
inventory_url          0
scraped_date           0
dtype: int64

In [8]:
## Check Null
c.loc[c.drivetrain.isnull(), :]

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
2,2010 Acura TL SH-AWD,2010,Acura,TL SH-AWD,,TL,SH-AWD,173399,0,Crystal Black Pearl,,Automatic,3.7 V6 Cylinder Engine,,19UUA9F58AA004333,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
5,2001 Chevrolet Corvette 2dr Convertible,2001,Chevrolet,Corvette 2dr Convertible,,Corvette,2dr Convertible,70738,0,Torch Red,,Automatic,5.7 8 Cylinder Engine,,1G1YY32G015114228,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
9,2010 Chevrolet Tahoe 4WD 4dr 1500 LTZ,2010,Chevrolet,Tahoe 4WD 4dr 1500 LTZ,,Tahoe,4WD 4dr 1500 LTZ,76733,21900,Sheer Silver Metallic,,Automatic,5.3 8 Cylinder Engine,,1GNUKCE09AR286778,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
14,2008 Honda CR-V 4WD 5dr EX,2008,Honda,CR-V 4WD 5dr EX,,CR-V,4WD 5dr EX,174807,6800,Royal Blue Pearl,,Automatic,2.4 4 Cylinder Engine,,JHLRE485X8C027865,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
15,2010 Honda CR-V EX-L,2010,Honda,CR-V EX-L,,CR-V,EX-L,182734,10900,Polished Metal Metallic,,Automatic,2.4 4 Cylinder Engine,,5J6RE4H73AL089297,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
17,2010 Hyundai Elantra Base,2010,Hyundai,Elantra Base,,Elantra,Base,129351,3900,Liquid Silver Metallic,,Manual,2 4 Cylinder Engine,,KMHDU4AD5AU150111,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
18,2010 Hyundai Sonata 4dr Sdn I4 Auto GLS,2010,Hyundai,Sonata 4dr Sdn I4 Auto GLS,,Sonata,4dr Sdn I4 Auto GLS,139810,5900,Pearl White,,Automatic,2.4 4 Cylinder Engine,,5NPET4ACXAH588281,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
36,1999 Toyota 4Runner SR5,1999,Toyota,4Runner SR5,,4Runner,SR5,200077,0,Desert Dune Metallic,,Automatic,3.4 V6 Cylinder Engine,,JT3HN86R3X0253854,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
40,2011 Toyota Highlander Base,2011,Toyota,Highlander Base,,Highlander,Base,168417,13350,Cypress Pearl,,Automatic,3.5 V6 Cylinder Engine,,5TDBK3EHXBS066140,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672
42,2006 Toyota Tundra SR5,2006,Toyota,Tundra SR5,,Tundra,SR5,129562,0,Bluesteel Metallic,,Automatic,4.7 8 Cylinder Engine,,5TBDT44156S541904,CT Auto,7 Wayne Street,6606,Bridgeport,CT,https://www.hartefamilymotors.com/inventory/us...,2022-04-09 23:44:08.930672


## Check DB

In [41]:
# Check other null data points
conn = sqlite3.connect('cars.db')
sql_query = "SELECT * FROM test WHERE dealership_name = 'J&M Automotive'"
#sql_query = "SELECT * FROM inventory_staging"
result = pd.read_sql_query(sql_query, conn)

In [42]:
result

Unnamed: 0,title,year,make,model_trim,vehicle_type,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
0,2016 Ford F-150 4WD SuperCrew 145 XLT,2016,Ford,F-150,4WD SuperCrew 145 XLT,89305,28995,Ingot Silver Metallic,Medium Earth Gray,Automatic,3.5L V6 Cylinder Engine,Four Wheel Drive,1FTEW1EG5GKD26164,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:02.549807
1,2016 Ford Super Duty F-350 Srw 4WD Crew Cab 15...,2016,Ford,Super Duty F-350 Srw,4WD Crew Cab 156 Platinum,24752,53995,Shadow Black,Pecan,Automatic,6.2L 8 Cylinder Engine,Four Wheel Drive,1FT8W3B60GEC37377,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:02.549807
2,2017 Mini Countryman Cooper ALL4,2017,Mini,Countryman,Cooper ALL4,77144,18995,Moonwalk Gray Metallic,Satellite Grey,Manual,1.5L 3 Cylinder Engine,All Wheel Drive,WMZYV5C35H3B75526,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:02.549807
3,2012 Toyota Fj Cruiser 4WD 4dr Auto,2012,Toyota,Fj Cruiser,4WD 4dr Auto,92465,30995,Iceberg,Dark Charcoal,Automatic,4.0L V6 Cylinder Engine,Four Wheel Drive,JTEBU4BF6CK123246,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:02.549807
4,2015 Ford Expedition 4WD 4dr Platinum,2015,Ford,Expedition,4WD 4dr Platinum,87901,31995,Ruby Red Metallic Tinted Clearcoat,Brunell W/black,Automatic,3.5L V6 Cylinder Engine,Four Wheel Drive,1FMJU1MT2FEF15902,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:02.549807
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,2014 Toyota Tacoma 4WD Double Cab V6 AT,2014,Toyota,Tacoma,4WD Double Cab V6 AT,53421,28995,Silver Sky Metallic,Graphite,Automatic,4.0L V6 Cylinder Engine,Four Wheel Drive,3TMLU4EN3EM143512,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:24.924182
472,2016 Toyota Tacoma 4WD Access Cab V6 AT TRD Of...,2016,Toyota,Tacoma,4WD Access Cab V6 AT TRD Off Road,36397,36995,Super White,,,3.5L V6 Cylinder Engine,Four Wheel Drive,5TFSZ5AN8GX008189,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:24.924182
473,2012 Lincoln Mkz 4dr Sdn AWD,2012,Lincoln,Mkz,4dr Sdn AWD,88925,12995,Steel Blue Metallic,Light Camel,Automatic,3.5L V6 Cylinder Engine,All Wheel Drive,3LNHL2JC9CR810841,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:24.924182
474,2017 Honda Accord LX CVT,2017,Honda,Accord,LX CVT,83396,19995,White Orchid Pearl,Ivory,Automatic,2.4L 4 Cylinder Engine,Front Wheel Drive,1HGCR2F30HA020482,J&M Automotive,756/820 New Haven Road,06770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:29:24.924182


### Before

In [10]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT dealership_name, count(*) AS num_of_vehicles FROM inventory GROUP BY 1"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,dealership_name,num_of_vehicles
0,Avon Auto Brokers,627
1,Blasius Boston,358
2,Bostonyan Auto Group,41
3,Direct Auto Mecca,303
4,Fafama Auto Sales,239
5,J&M Automotive,939
6,Johns Auto Sales,270
7,Newton Automotive Sales,75


### After

In [66]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT dealership_name, count(*) AS num_of_vehicles FROM inventory_staging GROUP BY 1"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,dealership_name,num_of_vehicles
0,Avon Auto Brokers,303
1,Blasius Boston,289
2,Bostonyan Auto Group,18
3,Direct Auto Mecca,147
4,Fafama Auto Sales,132
5,J&M Automotive,476
6,John's Auto Sales,294
7,Newton Automotive Sales,35


### Check Random Null Values to QA

In [67]:
# Check other null data points
conn = sqlite3.connect('cars.db')
sql_query = "SELECT * FROM inventory_staging WHERE dealership_name = 'J&M Automotive'"
#sql_query = "SELECT * FROM inventory_staging"
result = pd.read_sql_query(sql_query, conn)

In [68]:
result.tail()

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
471,2014 Toyota Tacoma 4WD Double Cab V6 AT,2014,Toyota,Tacoma 4WD Double Cab V6 AT,,Tacoma,4WD Double Cab V6 AT,53421,28995,Silver Sky Metallic,Graphite,Automatic,4.0L V6 Cylinder Engine,Four Wheel Drive,3TMLU4EN3EM143512,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:50:53.953560
472,2016 Toyota Tacoma 4WD Access Cab V6 AT TRD Of...,2016,Toyota,Tacoma 4WD Access Cab V6 AT TRD Off Road,,Tacoma,4WD Access Cab V6 AT TRD Off Road,36397,36995,Super White,,,3.5L V6 Cylinder Engine,Four Wheel Drive,5TFSZ5AN8GX008189,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:50:53.953560
473,2012 Lincoln Mkz 4dr Sdn AWD,2012,Lincoln,Mkz 4dr Sdn AWD,,Mkz,4dr Sdn AWD,88925,12995,Steel Blue Metallic,Light Camel,Automatic,3.5L V6 Cylinder Engine,All Wheel Drive,3LNHL2JC9CR810841,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:50:53.953560
474,2017 Honda Accord LX CVT,2017,Honda,Accord LX CVT,,Accord,LX CVT,83396,19995,White Orchid Pearl,Ivory,Automatic,2.4L 4 Cylinder Engine,Front Wheel Drive,1HGCR2F30HA020482,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:50:53.953560
475,2017 Nissan Sentra S CVT,2017,Nissan,Sentra S CVT,,Sentra,S CVT,82109,13995,Black,,Automatic,1.8L 4 Cylinder Engine,Front Wheel Drive,3N1AB7AP1HY374009,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:50:53.953560


In [69]:
result.isnull().sum()

title                   0
year                    0
make                    0
model_trim              0
vehicle_type          476
model                   0
trim                    0
vehicle_mileage         0
price                   0
exterior_color          0
interior_color         12
transmission            4
engine                  0
drivetrain              0
vin                     0
dealership_name         0
dealership_address      0
dealership_zipcode      0
dealership_city         0
dealership_state        0
inventory_url           0
scraped_date            0
dtype: int64

In [62]:
result.loc[result.transmission.isnull(), :]

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
263,,2010,Ford,Super Duty F-250 Srw Super Duty F-250 Srw,,Super Duty F-250 Srw,Super Duty F-250 Srw,170882,17995,Oxford White,Medium Stone,,5.4L 8 Cylinder Engine,Four Wheel Drive,1FTSX2B52AEA54798,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:44:08.856191
302,,2008,Ford,Super Duty F-350 Drw Super Duty F-350 Drw,,Super Duty F-350 Drw,Super Duty F-350 Drw,13454,37995,Oxford White,Camel,,6.4L 8 Cylinder Engine,Four Wheel Drive,1FDWF37R58ED29916,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:44:12.383617
382,,2008,Nissan,Altima Altima,,Altima,Altima,79330,9995,Dark Slate Metallic,Frost,,2.5L 4 Cylinder Engine,Front Wheel Drive,1N4AL21E18N417391,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:44:14.763709
472,,2016,Toyota,Tacoma Tacoma,,Tacoma,Tacoma,36397,36995,Super White,,,3.5L V6 Cylinder Engine,Four Wheel Drive,5TFSZ5AN8GX008189,J&M Automotive,756/820 New Haven Road,6770,Naugatuck,CT,https://www.jmautomotive.com/cars-for-sale-in-...,2022-03-25 22:44:28.720324


In [72]:
TABLE_NAME = 'inventory_staging'
DB_NAME = 'cars.db'

#Connecting to sqlite
conn = sqlite3.connect(DB_NAME)

#Creating a cursor object using the cursor() method
cursor = conn.cursor()

#Doping EMPLOYEE table if already exists
cursor.execute(f"DELETE FROM {TABLE_NAME} WHERE dealership_name = 'J&M Automotive'")

#Commit your changes in the database
conn.commit()

#Closing the connection
conn.close()


In [65]:
TABLE_NAME = 'test'
DB_NAME = 'cars.db'

#Connecting to sqlite
conn = sqlite3.connect(DB_NAME)

#Creating a cursor object using the cursor() method
cursor = conn.cursor()

#Doping EMPLOYEE table if already exists
cursor.execute(f"DROP TABLE {TABLE_NAME}")
print("Table dropped... ")

#Commit your changes in the database
conn.commit()

#Closing the connection
conn.close()


Table dropped... 


In [48]:
TABLE_NAME = 'parsing_errors_test'
DB_NAME = 'cars.db'

#Connecting to sqlite
conn = sqlite3.connect(DB_NAME)

#Creating a cursor object using the cursor() method
cursor = conn.cursor()

#Doping EMPLOYEE table if already exists
cursor.execute(f"DROP TABLE {TABLE_NAME}")
print("Table dropped... ")

#Commit your changes in the database
conn.commit()

#Closing the connection
conn.close()


Table dropped... 


In [66]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT * FROM test"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
0,2005 Mercedes-Benz CLK 500,2005,Mercedes-Benz,CLK 500,,,CLK 500,49345,14995,Black,Charcoal/Dark Ash,Automatic,5.0L NA V8 single overhead cam (SOHC) 24V,RWD,WDBTK75J95F123493,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
1,2008 Audi A6 4.2 quattro,2008,Audi,A6 4.2 quattro,Sedan,A6,4.2 quattro,94540,9995,Brilliant Black,Amaretto,Automatic,4.2L V8 350hp 325ft. lbs.,AWD,WAUDV74FX8N162949,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
2,2009 Toyota Camry LE,2009,Toyota,Camry LE,Sedan,Camry,LE,125050,8495,Magnetic Gray Metallic,Ash,Automatic,2.4L I4 158hp 161ft. lbs.,FWD,4T4BE46K89R114118,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
3,2011 BMW X3 xDrive28i,2011,BMW,X3 xDrive28i,SUV,X3,xDrive28i,98952,13995,Jet Black,Black Nevada Leather,Automatic,3.0L I6 240hp 221ft. lbs.,AWD,5UXWX5C51BL715699,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
4,2011 Mercedes-Benz M-Class ML 350 4MATIC,2011,Mercedes-Benz,M-Class ML 350 4MATIC,SUV,M-Class,ML 350 4MATIC,98922,14995,Obsidian Black Metallic,Black Leather,Automatic,3.5L V6 268hp 258ft. lbs.,AWD,4JGBB8GB4BA723044,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
5,2012 Audi A6 3.0T quattro Prestige,2012,Audi,A6 3.0T quattro Prestige,Sedan,A6,3.0T quattro Prestige,98933,17995,White,Brown,Automatic,3.0L Supercharged V6 310hp 325ft. lbs.,AWD,WAUHGAFC4CN137257,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
6,2013 Mercedes-Benz E-Class E 550 4MATIC,2013,Mercedes-Benz,E-Class E 550 4MATIC,Sedan,E-Class,E 550 4MATIC,113887,20495,Obsidian Black Metallic,Almond/Mocha,Automatic,4.7L Twin Turbo V8 402hp 443ft. lbs.,AWD,WDDHF9BB6DA701796,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
7,2014 Mercedes-Benz M-Class ML 350 4MATIC,2014,Mercedes-Benz,M-Class ML 350 4MATIC,SUV,M-Class,ML 350 4MATIC,98705,22495,Black,Black,Automatic,3.5L V6 302hp 273ft. lbs.,AWD,4JGDA5HB8EA352421,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
8,2015 Audi A4 2.0T quattro Premium Plus,2015,Audi,A4 2.0T quattro Premium Plus,Sedan,A4,2.0T quattro Premium Plus,80411,20595,Silver,Black,Automatic,2.0L Flex Fuel Turbo I4 220hp 258ft. lbs.,AWD,WAUFFAFL3FN008344,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878
9,2015 Audi A6 3.0T quattro Premium Plus,2015,Audi,A6 3.0T quattro Premium Plus,Sedan,A6,3.0T quattro Premium Plus,105344,20695,Black,Brown,Automatic,3.0L Supercharged V6 310hp 325ft. lbs.,AWD,WAUFGAFC6FN031145,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:43:19.813878


In [63]:
for idx, row in result.iterrows():
    if row['model'] == 'Other':
        result.at[idx, 'model'] = None

In [64]:
result

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
0,2005 Mercedes-Benz CLK CLK 500 AMG Package Co...,2005,Mercedes-Benz,CLK CLK 500 AMG Package Convertible,,,CLK 500,49345,14995,Black,Charcoal/Dark Ash,Automatic,5.0L NA V8 single overhead cam (SOHC) 24V,RWD,WDBTK75J95F123493,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
1,2008 Audi A6 4.2 quattro S-Line,2008,Audi,A6 4.2 quattro S-Line,Sedan,A6,4.2 quattro,94540,9995,Brilliant Black,Amaretto,Automatic,4.2L V8 350hp 325ft. lbs.,AWD,WAUDV74FX8N162949,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
2,2009 Toyota Camry LE,2009,Toyota,Camry LE,Sedan,Camry,LE,125050,8495,Magnetic Gray Metallic,Ash,Automatic,2.4L I4 158hp 161ft. lbs.,FWD,4T4BE46K89R114118,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
3,2011 BMW X3 xDrive28i,2011,BMW,X3 xDrive28i,SUV,X3,xDrive28i,98952,13995,Jet Black,Black Nevada Leather,Automatic,3.0L I6 240hp 221ft. lbs.,AWD,5UXWX5C51BL715699,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
4,2011 Mercedes-Benz ML 350 4MATIC,2011,Mercedes-Benz,ML 350 4MATIC,SUV,M-Class,ML 350 4MATIC,98922,14995,Obsidian Black Metallic,Black Leather,Automatic,3.5L V6 268hp 258ft. lbs.,AWD,4JGBB8GB4BA723044,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
5,2012 Audi A6 3.0T quattro Prestige,2012,Audi,A6 3.0T quattro Prestige,Sedan,A6,3.0T quattro Prestige,98933,17995,White,Brown,Automatic,3.0L Supercharged V6 310hp 325ft. lbs.,AWD,WAUHGAFC4CN137257,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
6,2013 Mercedes-Benz E 550 4MATIC Sport,2013,Mercedes-Benz,E 550 4MATIC Sport,Sedan,E-Class,E 550 4MATIC,113887,20495,Obsidian Black Metallic,Almond/Mocha,Automatic,4.7L Twin Turbo V8 402hp 443ft. lbs.,AWD,WDDHF9BB6DA701796,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
7,2014 Mercedes-Benz ML 350 4MATIC,2014,Mercedes-Benz,ML 350 4MATIC,SUV,M-Class,ML 350 4MATIC,98705,22495,Black,Black,Automatic,3.5L V6 302hp 273ft. lbs.,AWD,4JGDA5HB8EA352421,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
8,2015 Audi A4 2.0T quattro Premium Plus,2015,Audi,A4 2.0T quattro Premium Plus,Sedan,A4,2.0T quattro Premium Plus,80411,20595,Silver,Black,Automatic,2.0L Flex Fuel Turbo I4 220hp 258ft. lbs.,AWD,WAUFFAFL3FN008344,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358
9,2015 Audi A6 3.0T quattro Premium Plus,2015,Audi,A6 3.0T quattro Premium Plus,Sedan,A6,3.0T quattro Premium Plus,105344,20695,Black,Brown,Automatic,3.0L Supercharged V6 310hp 325ft. lbs.,AWD,WAUFGAFC6FN031145,Bostonyan Auto Group,119 Worcester St,1760,Natick,MA,https://www.bostonyanautogroup.com/view-inventory,2022-04-04 15:39:03.595358


In [49]:

conn = sqlite3.connect('cars.db')
sql_query = "SELECT * FROM parsing_errors_test"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,error,dealership,date,url
0,Data Validation,Direct Auto Mecca,2022-04-03 20:31:05.597298,https://www.directautomecca.com/view-inventory...
1,Data Validation,Direct Auto Mecca,2022-04-03 20:32:12.270002,https://www.directautomecca.com/view-inventory...
2,Data Validation,Direct Auto Mecca,2022-04-03 20:37:39.130588,https://www.directautomecca.com/view-inventory...
3,Data Validation,Direct Auto Mecca,2022-04-03 20:38:22.908299,https://www.directautomecca.com/view-inventory...
4,Data Validation,Direct Auto Mecca,2022-04-03 20:38:24.213473,https://www.directautomecca.com/inventory.aspx...


In [27]:
result.to_csv("errors.csv")

In [4]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT dealership_name, count(*) AS num_of_vehicles FROM inventory_staging GROUP BY 1"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,dealership_name,num_of_vehicles
0,Avon Auto Brokers,324
1,Blasius Boston,69
2,Bostonyan Auto Group,23
3,Direct Auto Mecca,156
4,Fafama Auto Sales,107
5,J&M Automotive,463
6,Johns Auto Sales,123
7,Newton Automotive Sales,40


In [17]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT dealership_name, count(*) AS num_of_vehicles FROM inventory GROUP BY 1"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,dealership_name,num_of_vehicles
0,Avon Auto Brokers,627
1,Blasius Boston,358
2,Bostonyan Auto Group,41
3,Direct Auto Mecca,303
4,Fafama Auto Sales,239
5,J&M Automotive,939
6,Johns Auto Sales,270
7,Newton Automotive Sales,75


In [14]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT * FROM inventory_staging"
result = pd.read_sql_query(sql_query, conn)

In [16]:
result

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date


In [13]:
result.loc[result['make'].isnull(), :]

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
810,Mercury Monterey Convenience,1968,,,,,Convenience,54350,13995.0,Cream,,Automatic,390cc,RWD,8Z47X504086,Johns Auto Sales,181 Somerville Avenue,2143,Somerville,MA,https://johnsautosales.com/newandusedcars?clea...,2022-04-06 22:26:47.041822


In [19]:
conn = sqlite3.connect('cars.db')
sql_query = "SELECT * FROM inventory WHERE dealership_name = 'Newton Automotive Sales'"
result = pd.read_sql_query(sql_query, conn)
result

Unnamed: 0,title,year,make,model_trim,vehicle_type,model,trim,vehicle_mileage,price,exterior_color,interior_color,transmission,engine,drivetrain,vin,dealership_name,dealership_address,dealership_zipcode,dealership_city,dealership_state,inventory_url,scraped_date
0,2018 Audi A4 2.0T quattro Premium Plus,2018,Audi,A4 2.0T quattro Premium Plus,,,,15679,36590,Moonlight Blue Metallic,Nougat Brown Interior,Automatic 7-Speed,2.0L I4 Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
1,2017 Audi TT 2.0T quattro,2017,Audi,TT 2.0T quattro,,,,29520,39480,Nano Gray Metallic,"Rock Gray, Fine Nappa Leather",Automatic 6-Speed,I4 2.0L Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
2,2018 Audi Q7 3.0T quattro Prestige,2018,Audi,Q7 3.0T quattro Prestige,,,,53912,48560,Black,Brown,Automatic 8-Speed,V6 3.0L Supercharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
3,2017 Audi A4 2.0T quattro Premium Plus,2017,Audi,A4 2.0T quattro Premium Plus,,,,53298,29350,White,Beige,Automatic 7-Speed,I4 2.0L Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
4,2018 Audi SQ5 3.0T quattro Prestige,2018,Audi,SQ5 3.0T quattro Prestige,,,,99106,36850,Black,Black,Automatic 8-Speed,V6 3.0L Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
5,2013 Audi A4 2.0T quattro Premium,2013,Audi,A4 2.0T quattro Premium,,,,54283,17980,White,Black,Automatic 8-Speed,I4 2.0L Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
6,2018 Audi S4 3.0T quattro Prestige,2018,Audi,S4 3.0T quattro Prestige,,,,62800,41580,Black,Black,Automatic 8-Speed,V6 3.0L Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
7,2020 Audi Q5 2.0T quattro Premium Plus,2020,Audi,Q5 2.0T quattro Premium Plus,,,,29709,47480,Mythos Black Metallic,"Nougat Brown, Leather Seating",Automatic 7-Speed,I4 2.0L Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
8,2018 Audi Q7 3.0T quattro Premium Plus,2018,Audi,Q7 3.0T quattro Premium Plus,,,,99848,34980,Graphite Gray Metallic,Black,Automatic 8-Speed,V6 3.0L Supercharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
9,2018 Audi S7 4.0T quattro Premium Plus,2018,Audi,S7 4.0T quattro Premium Plus,,,,85521,48760,Mythos Black Metallic,Black,Automatic 7-Speed,V8 4.0L Twin Turbocharger,AWD,,Newton Automotive Sales,249 Centre Street,2458,Newton,MA,https://www.newtonautoandsales.com/cars-for-sale,2022-03-15 18:01:28.249708
