In [1]:
import parse_inventory as pi
import parse_dealership
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import sqlite3
import numpy as np

## Test Connection

Check that the html for the page is being pulled in correctly

In [2]:
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}

URL = 'https://www.jmautomotive.com/cars-for-sale-in-Naugatuck-CT-Hartford-New-Haven/used_cars'
response = requests.get(URL, headers = headers)
soup = BeautifulSoup(response.text, "html.parser")


In [3]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en-us">
 <head>
  <title>
   Used cars for sale in Naugatuck, Waterbury, Hartford, New Haven, CT | J&amp;M Automotive Sls&amp;Svc LLC
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="black" name="apple-mobile-web-app-status-bar-style"/>
  <meta content="width=device-width, user-scalable=yes, initial-scale=1.0, minimum-scale=1.0, maximum-scale=9.0" name="viewport"/>
  <meta content="index, follow" name="robots"/>
  <meta content="J&amp;M Automotive Sls&amp;Svc LLC, used cars for sale, used car dealer, Connecticut, CT, Naugatuck, Waterbury, Hartford, New Haven, New Haven County, Waterbury CT, Hartford CT, Prospect CT, Oxford CT, Seymour CT, Cheshire CT, Southbury CT, Wolcott CT, Watertown CT, Woodbury CT, Woodbridge CT, Ansonia CT, Hamden CT, Derby CT, North Haven CT and Shelton CT,  used cars, used suv, used trucks, used pickups, used minivans, car financing, service and repair center in Naugatuck, CT" name="k

## Test Extracting Columns

### Title

In [4]:
pi.parse_subsection_attr(soup, 'title','div', 'a', 'thumbnail', 'listitemlink')

['Used 2016 Ford F-150 4WD SuperCrew 145 XLT for sale in Naugatuck, CT',
 "Used 2017 Ford F-150 XLT 4WD SuperCrew 5.5' Box for sale in Naugatuck, CT",
 'Used 2016 Ford Super Duty F-350 Srw 4WD Crew Cab 156 Platinum for sale in Naugatuck, CT',
 'Used 2017 Mini Countryman Cooper ALL4 for sale in Naugatuck, CT',
 'Used 2012 Toyota Fj Cruiser 4WD 4dr Auto for sale in Naugatuck, CT',
 'Used 2013 Mercedes-benz C-class 4dr Sdn C 300 Luxury 4MATIC for sale in Naugatuck, CT',
 'Used 2015 Ford Expedition 4WD 4dr Platinum for sale in Naugatuck, CT',
 'Used 2018 Volkswagen Jetta 1.4T SE Auto for sale in Naugatuck, CT',
 'Used 2015 Ford Super Duty F-250 Srw 4WD Crew Cab 156 XL for sale in Naugatuck, CT',
 'Used 2017 Infiniti Q50 3.0t Premium AWD for sale in Naugatuck, CT',
 'Used 2017 Kia Optima LX Auto for sale in Naugatuck, CT',
 'Used 2015 Toyota Tundra Double Cab 5.7L V8 6-Spd AT SR for sale in Naugatuck, CT',
 'Used 2013 Audi Tts 2dr Cpe S tronic quattro 2.0T Prest for sale in Naugatuck, CT',


### Year

In [8]:
pi.parse_main_section_attr_text_all(soup, 'div', 'thumbnail', 'itemprop', 'vehicleModelDate')

['2016',
 '2017',
 '2016',
 '2017',
 '2012',
 '2013',
 '2015',
 '2018',
 '2015',
 '2017',
 '2017',
 '2015',
 '2013',
 '2015',
 '2013',
 '2015',
 '2014',
 '1993',
 '2014',
 '2004',
 '2016',
 '2014',
 '2017',
 '2015',
 '2012',
 '2009',
 '2016',
 '2018',
 '2016',
 '2020',
 '2018',
 '2019',
 '2012',
 '2018',
 '2014',
 '2015',
 '2014',
 '2012',
 '2016',
 '2016',
 '2018',
 '2015',
 '2016',
 '2014',
 '2014',
 '2013',
 '2015',
 '2020',
 '2018',
 '2019']

### Make

In [11]:
pi.parse_main_section_attr_text_all(soup, 'div', 'thumbnail', 'itemprop', 'manufacturer')

['Ford',
 'Ford',
 'Ford',
 'Mini',
 'Toyota',
 'Mercedes-benz',
 'Ford',
 'Volkswagen',
 'Ford',
 'Infiniti',
 'Kia',
 'Toyota',
 'Audi',
 'GMC',
 'Jeep',
 'Lexus',
 'Audi',
 'Chevrolet',
 'Mercedes-benz',
 'Chevrolet',
 'Lexus',
 'Toyota',
 'Nissan',
 'Chevrolet',
 'GMC',
 'Honda',
 'Hyundai',
 'Volvo',
 'BMW',
 'Acura',
 'Dodge',
 'Dodge',
 'Honda',
 'Nissan',
 'Subaru',
 'Toyota',
 'Lexus',
 'Nissan',
 'Nissan',
 'Honda',
 'Infiniti',
 'Kia',
 'Chrysler',
 'Ford',
 'Audi',
 'Cadillac',
 'GMC',
 'Mazda',
 'Toyota',
 'Chevrolet']

### Model

In [10]:
pi.parse_main_section_attr_text_all(soup, 'div', 'thumbnail', 'itemprop', 'model')

['F-150',
 'F-150',
 'Super Duty F-350 Srw',
 'Countryman',
 'Fj Cruiser',
 'C-class',
 'Expedition',
 'Jetta',
 'Super Duty F-250 Srw',
 'Q50',
 'Optima',
 'Tundra',
 'Tts',
 'Yukon',
 'Wrangler Unlimited',
 'Is 250',
 'S4',
 'Corvette',
 'E-class',
 'Ssr',
 'Rx 350',
 'Fj Cruiser',
 'Titan',
 'Silverado 3500hd',
 'Sierra 1500',
 'Ridgeline',
 'Genesis',
 'S60',
 '6 Series',
 'Tlx',
 'Challenger',
 'Challenger',
 'Accord',
 'Rogue',
 'Xv Crosstrek',
 '4runner',
 'Es 350',
 'Maxima',
 'Maxima',
 'Pilot',
 'Q50',
 'Optima',
 'Town & Country',
 'Fusion',
 'S4',
 'Xts',
 'Sierra 1500',
 'Cx-5',
 'Sienna',
 'Colorado']

### Trim

In [12]:
pi.parse_main_section_attr_text_all(soup, 'div', 'thumbnail', 'itemprop', 'vehicleConfiguration')

['4WD SuperCrew 145 XLT',
 "XLT 4WD SuperCrew 5.5' Box",
 '4WD Crew Cab 156 Platinum',
 'Cooper ALL4',
 '4WD 4dr Auto',
 '4dr Sdn C 300 Luxury 4MATIC',
 '4WD 4dr Platinum',
 '1.4T SE Auto',
 '4WD Crew Cab 156 XL',
 '3.0t Premium AWD',
 'LX Auto',
 'Double Cab 5.7L V8 6-Spd AT SR',
 '2dr Cpe S tronic quattro 2.0T Prest',
 '4WD 4dr Denali',
 '4WD 4dr Sport',
 '4dr Sport Sdn AWD',
 '4dr Sdn S Tronic Prestige',
 '2dr Convertible',
 '2dr Cpe E 350 4MATIC',
 'Reg Cab 116.0 WB LS',
 'AWD 4dr',
 '4WD 4dr Auto',
 '4x4 Crew Cab SV',
 '4WD Crew Cab 153.7 High Country',
 '4WD Ext Cab 143.5 Work Truck',
 '4WD Crew Cab RTL',
 '4dr Sdn V6 3.8L AWD',
 'T5 AWD Dynamic',
 '4dr Sdn 640i RWD Gran Coupe',
 '3.5L SH-AWD w/Technology Pkg',
 'GT AWD',
 'SXT AWD',
 '4dr I4 Auto LX',
 'AWD SL',
 '5dr Auto 2.0i Premium',
 '4WD 4dr V6 Limited',
 '4dr Sdn',
 '4dr Sdn V6 CVT 3.5 SV',
 '4dr Sdn 3.5 Platinum',
 'AWD 4dr EX',
 '3.0t LUXE AWD',
 '4dr Sdn LX',
 '4dr Wgn Touring',
 '4dr Sdn SE FWD',
 '4dr Sdn Man Premium