In [15]:
import requests
from bs4 import BeautifulSoup
import numpy as np

#This header will be added to requests to identify the user to the web admin
headers = {'user-agent': 'Alex Sylvester/Vancouver, alexander.d.sylvester@gmail.com'}

#get the first craigslist page for all vehicles with price above 100
response = requests.get('https://vancouver.craigslist.org/d/cars-trucks/search/cta?min_price=100', headers=headers).text 
soup = BeautifulSoup(response.text, 'lxml')#covert to beutiful soup object

#the rest of this cell is from this tutorial: https://towardsdatascience.com/web-scraping-craigslist-a-complete-tutorial-c41cea4f4981
#find the total number of posts to find the limit of the pagination
results_num = soup.find('div', class_= 'search-legend')
results_total = int(results_num.find('span', class_='totalcount').text) #pulled the total count of posts as the upper bound of the pages array

#each page has 119 posts so each new page is defined as follows: s=120, s=240, s=360, and so on. So we need to step in size 120 in the np.arange function
pages = np.arange(0, results_total+1, 120)

array([   0,  120,  240,  360,  480,  600,  720,  840,  960, 1080, 1200,
       1320, 1440, 1560, 1680, 1800, 1920, 2040, 2160, 2280, 2400, 2520,
       2640, 2760, 2880, 3000])

In [30]:
import pandas as pd
from time import sleep #to avoid bombarding the webpage with requests
from random import randint #avoid throttling by not sending too many requests one after the other

car_df = pd.DataFrame({}) #create an empty dataframe to hold the all the car data

for page in pages:
    print('page:', page)
    
    #get request
    response = requests.get("https://vancouver.craigslist.org/d/cars-trucks/search/cta?" 
                   + "s=" #the parameter for defining the page number 
                   + str(page) #the page number in the pages array from earlier
                   + "&min_price=100" #only get entries with price above $100
                  )
    sleep(1) #wait one second between reqests
    
    soup = BeautifulSoup(response.text, 'lxml')#covert to beutiful soup object
    entries = soup.find_all('li', class_='result-row') #get a list of entries on the page

    for entry in entries: #loop over the cars on the page
        
        price = entry.find('span', class_='result-price').text #price of the car
        title = entry.find('a', class_='result-title hdrlnk') #get the title/link for the entry
        car_url = title['href'] #get the url from the title
        
        sleep(1) #wait one second between reqests
        car = requests.get(car_url).text #get the web page for the car 
        car_soup = BeautifulSoup(car, 'lxml') #convert the webpage to a soup object

        all_attrs = car_soup.find_all('p', class_='attrgroup') #all of the vehicle attribute groups
        make = all_attrs[0].b.text #first attribute group is the title
        print(make)
        attrs = all_attrs[1].find_all('span') #the rest of the attributes are grouped together
        attrs = [attr.text for attr in attrs] #convert attributes to a list

        car_lib = {attr.split(':')[0]: attr.split(':')[1] for attr in attrs if ':' in attr} #library to hold the attributes
        car_lib['make'] = make #add the title to the library
        car_lib['price'] = price #add the price to the library
        
        car_df = car_df.append(car_lib, ignore_index=True) #append the car data to the dataframe

car_df

page: 0
2007 chevy cobalt ss
2017 Audi A4
2018 porsche cayenne s e-hybrid
2018 gmc yukon xl slt 4wd
2019 Honda CR-V
1997 cadillac catera
2001 honda accord ex-l
2003 ford f450 super duty
2004 gmc sierra 2500hd 4x4
2018 Audi A5 Coupe
2019 hyundai santa fe awd
2018 porsche boxster gts
2003 pontiac montana
2014 Kia Soul EX GDI
2004 chevrolet optra
1996 Ford Ranger
2007 Mercedes B200
2011 Ford Ranger
2017 Honda Odyssey
2012 volkswagen jetta
2013 mini cooper
2012 volkswagen jetta
2006 Mercedes Benz B 200
2015 MINI Cooper Hardtop
2006 honda odyssey
2004 jaguar xj8
2004 pontiac grand am
2020 BMW 250i Convertible
2017 Honda Fit
2004 jaguar xj
2013 Nissan Altima SL
1989 Jaguar XJS
2010 honda accord
2017 Honda Fit
2008 toyota prius
2014 nissan versa note sl
2020 Audi A3 Technik TFSI quattro
2008 lexus es 350
2005 chevy equinox
2001 dodge durango
2006 2006 honda civic
2003 dodge durango
2012 toyota corolla
2018 Audi Q7
2013 bmw 328i
2017 Honda Civic
2016 jeep cherokee
2017 chevrolet bolt ev lt
201

1994 nissan altima
2006 chevrolet hhr ls
2018 Toyota RAV4 Hybrid
2005 2005 nissan pathfinder SE
2015 mazda 3
2004 gmc sierra 2500hd
2001 honda accord ex-l
2020 Jeep Grand Cherokee SRT 8
2006 mazda 3
1996 ford taurus
2009 mini cooper s
2013 FORD E-150
2017 subaru brz
2010 audi a5 quattro
2013 Honda Fit
2000 volkswagen jetta sedan
2010 mercedes benz c300
2003 Ford Explorer Eddie Bauer 4x4
2004 dodge grand caravan
2020 Subaru WRX
2018 Toyota Sienna
2003 mazda protege lx
2019 Mazda Mazda3
2009 hyundai santa fe
2017 Nissan leaf
1994 1994 Western Star LSVW
2006 HYUNDAI AZERA
2002 2002 gmc yukon
2004 hyundai sonata
2021 CHEVROLET COLORADO ZR2
2013 lexus rx 350,lexus es 350
2015 audi s5
2017 BMW X3
2012 mercedes benz ml350
2016 Subaru Outback
2004 daihatsu hijet
2019 Toyota Camry
2006 2006 dodge grand caravan sxt
2006 honda civic lx sedan 4d
2010 Acura MDX
2018 mazda cx-5 signature awd
2017 jaguar xe
2016 HYUNDAI SANTA SPORT AWD
2018 cadillac xt5 luxury awd
2008 Honda civic
2006 2006 BMW M6
20

2016 gmc terrain sle awd
2012 gmc sierra 1500
2011 honda civic
2017 CADILLAC XTS
2020 Jeep Grand Cherokee Limited
2018 Honda Civic
2019 volkswagen jetta
2006 Acura TL
2009 honda civic
1964 ford galaxie 500 convertible
2011 honda civic
2017 mazda cx-3 gx awd
2014 fiat 500e
2007 honda cr-v
2018 mazda cx-3 gs
2013 audi q5
2018 mazda mazda3 gt
2017 honda civic si coupe
2009 subaru forester
2015 mercedes-benz cla250
2018 Honda Civic
2020 Jeep Grand Cherokee Limited
1996 mazda protege
2009 dodge journey
2015 Volkswagen Jetta
2018 Honda Civic
2017 mazda mazda3 gt
2020 Nissan Qashqai
2016 hyundai elantra
2009 hyundai elantra
2018 mazda mazda3 gt
2018 Honda HR-V
2016 Volkswagen GTI
2005 infiniti g35
2017 chevrolet volt lt
2017 Mitsubishi RVR
2020 Jeep Grand Cherokee
2017 mazda cx5
2016 Honda CR-V
2017 mazda mazda3
2010 bmw 328i
2017 Jeep Wrangler Unlimited Rubicon
2010 BMW 328
1996 Dodge Caravan
2018 Toyota Corolla
2019 mazda mazda3 sport gt
2013 ford focus titanium
2018 Honda Civic
2018 kia fo

2012 Toyota Matrix
2015 Dodge Grand Caravan
2009 infiniti g37
2010 toyota yaris
2018 Mercedes-Benz GLS550 4MATIC SUV
2019 Ram 1500 Classic
2016 Ford Escape SE AWD
2019 lexus nx300
2014 mercedes benz gl350 bluetec
2018 Toyota 4Runner
2015 Mercedes-Benz GL63 AMG
2017 Chevrolet Bolt EV
2005 honda civic
2021 toyota corolla
2016 Civic Sedan Touring
2018 mercedes benz cla250
2019 Hyundai Sonata
2009 honda civic
2020 ram 1500 big horn
2002 honda civic
2005 ford f150
2021 toyota 4runner
2011 Audi S4 3.0T Prem S tronic qtro
2001 ford explorer sport
2005 DODGE MAGNUM RT
2002 Mercedes
2011 Honda Covid Coupe
2005 honda accord LX-G
2020 2020  Toyota prius prime
2015 Ford F-350 Dually
2003 pontiac montana
2016 bmw x5 xdrive35i
2017 Porsche 911 Carrera Cabriolet
2017 subaru wrx
1997 bmw 318i
2015 jeep cherokee limited 4x4
2018 bmw 430i xdrive
2005 Porsche 911 Carrera S
2018 Nissan NV200 Compact Cargo
2009 honda accord ex-l
2012 Honda CR-Z CVT
1999 Mercedes benz e430
2020 Land Rover Defender 110
1971 

2016 mitsubishi rvr se awd
2001 2001 ford f350
1993 1993 toyota camry
2011 ford f350
1993 toyota previa
2008 chrysler 300 touring
2006 ford focus
2020 Jaguar F-Type
2020 mazda 3
2015 nissan rogue
2008 jeep compass Limited
2003 2003 Mazda Protege
2017 volvo v90
2006 Hyundai Santa Fe
2003 Mitsubishi Galant
2012 ford focus se
2017 mercedes-benz c-class
2015 ford f-150
2015 audi s8 plus 4.0t quattro
2013 MINI COOPER
2009 Acura RDX
2006 Toyota Avalon limited
2017 mitsubishi mirage es
2009 mercedes benz B200 turbo
2008 dodge dakota 4x4
2016 dodge journey se
2017 nissan rogue sl awd
2017 JEEP CHEROKEE NORTH
2016 HONDA CIVIC EX-T
2016 kia soul ev
2016 Kia soul EV
2017 acura rdx elite
2017 2017 2017 Frightliner Bus
2019 2019 ford expedition lmt
2016 BMW X4
2008 jeep compass
2016 freightliner cascadia
2013 KIA SOUL 4U
2014 Jeep Patriot
2014 MAZDA MAZDA5 GT
2009 pontiac torrent
2006 Ford Mustang V6 Convertible
2009 bmw 750li
2004 bmw 3 series
page: 1560
2008 honda accord
2018 nissan pathfinder sv

2009 Bmw x5
1999 Toyota
2001 ford f-150
2000 vw passat
2018 porsche cayenne s e-hybrid
1998 toyota sienna
2008 mercedes benz c300
2010 jeep compass
2005 saab 9-2x
2016 Porsche Cayenne GTS
2009 mitsubishi outlander
1972 GMC
2000 nissan pathfinder se
2008 mini cooper
2008 2008 Mercedes-Benz E320
2018 porsche cayenne platinum edition
2008 Audi A4 S Line
page: 1920
2008 mini cooper
2008 2008 Mercedes-Benz E320
2018 porsche cayenne platinum edition
2008 Audi A4 S Line
2009 2009 smart car for two 09 10 08
1998 jeep wrangler
2016 cadillac escalade platinum
2010 Hyundai Sonata
2016 2016 Mazda CX5
2003 dodge grand caravan
2006 mazda 6
2001 audi tt
2018 Lexus ES 350
2010 dodge caliber r/t
2019 Infiniti QX80
2018 Ford F150 XLT
2016 subaru crosstrek
2010 hyundai santa fe
2018 honda civic type r
2009 honda fit sport
2017 Toyota Highlander XLE Hybrid
2017 Toyota Corolla LE
2018 Toyota Highlander XLE
2013 2017 VW Jetta
2019 Hino 195
2008 mercedes-benz s-class
2008 pontiac wave
2012 Kia Soul
2007 jeep

2008 Dodge Ram 1500 quad cab
2020 CHEVROLET BOLT EV 420 RANGE
2018 Ford Super Duty F-550 DRW
2015 lexus nx200t premium pkg
2018 lexus rx350 f sport awd
2017 audi a4
2010 Nissan Altima
2018 MAZDA CX-5 GT
2005 bmw x5
2020 Hyundai Kona
2013 fiat 500 sport
1992 honda civic
2012 honda civic
2019 Mitsubishi RVR
2006 nissan altima 2.5 s
1994 ford ranger xlt
2007 volkswagen golf
2006 toyota corolla s model
2017 Ford Explorer Platinum
1977 buick riviera
2008 bmw 328i
2000 volvo v70r
2004 audi s4 avant
2010 gmc 1500 sierra
2011 2011 VW Jetta
2014 nissan versa note
2007 dodge ram 3500
2019 Toyota Camry SE
2016 Infiniti Q50
2009 bmw 750li
2009 gmc sierra 1500
2011 gmc terrain
2019 chevrolet cruze
2002 hyandai elantra
2016 Tesla model S70
2004 Toyota Corolla
2011 hyundai santa fe gls
2014 kia forte sx
2010 hyundai santa fe limited awd
2012 dodge journey r/t
2005 Dodge Caravan SE
2019 Mitsubishi Mirage
2013 acura ilx
2010 Ford f150
2020 toyota prius prime
2013 ford focus titanium
2007 2007 Hyundai A

2017 BMW X5 3.5I M SPORT
2001 Mazda Miata
2018 BMW X5M
2003 Toyota Camry xle
2015 honda accord ex-l
2016 mercedes benz g63 amg
1999 1999
2006 Mini Cooper
1966 ford mustang
1967 Canadian made iron
2016 dodge grand caravan
2016 honda fit
2014 ford mustang
1970 1970 cougar
2009 suzuki sx4
2002 dodge ram 2500
2003 Ford F-350 centennial edition
2010 ford f-150
2003 Volvo s60
2016 Kia Optima EX Hybrid
2019 chevrolet blazer rs
1999 bmw m3 coupe
1999 bmw m3 coupe
2010 BMW X5M
2016 Tesla Model S 90D
1992 Ferrari 348TS
2019 dodge ram 1500
2007 Mb  C 280 Avantgarte
2018 audi a5 2.0t quattro
1960 international
2015 2015 kia forte 5
2001 mercedes benz c240
1981 fiat spider
2017 bmw 340i m sport package
2007 Mercedes B2000
2004 chevrolet optra
2005 Honda Stepwagon
2006 gmc savana 2500
2013 Hyundai Genesis coupe
1965 ford fairlane
2004 NISSAN QUEST
2013 FIAT 500
2009 VOLKSWAGEN ROUTAN
2003 BMW X 5
2000 ford f350 super duty
2010 Nissan Sentra
2010 mercedes benz ml350 bluetec
2009 honda civic
1987 volv

Unnamed: 0,fuel,make,odometer,price,title status,transmission,VIN,condition,drive,paint color,type,cylinders,size
0,gas,2007 chevy cobalt ss,210000,"$2,300",clean,manual,,,,,,,
1,gas,2017 Audi A4,83339,"$23,995",clean,automatic,WAUANAF47HN020134,excellent,4wd,grey,sedan,,
2,hybrid,2018 porsche cayenne s e-hybrid,46749,"$78,995",clean,automatic,WP1AE2A2XJLA71352,,,black,,,
3,gas,2018 gmc yukon xl slt 4wd,49500,"$59,990",clean,automatic,,excellent,4wd,black,SUV,8 cylinders,full-size
4,gas,2019 Honda CR-V,19124,"$31,250",clean,automatic,2HKRW2H58KH136996,excellent,4wd,,SUV,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,electric,2019 audi e-tron,19500,"$84,999",clean,automatic,,excellent,4wd,black,SUV,,mid-size
2996,gas,2018 2018 ford f150,51505,"$49,999",clean,automatic,,like new,4wd,white,truck,6 cylinders,
2997,gas,2020 lexus es 350,7000,"$45,958",clean,automatic,,like new,fwd,grey,sedan,6 cylinders,mid-size
2998,electric,2019 Chevrolet Volt,26599,"$33,995",clean,automatic,,excellent,fwd,black,sedan,,compact


In [56]:
#store car data in a csv file
car_df.to_csv('cars.csv')