### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### HTTP Request

#### store website in variable

In [2]:
website = "https://www.cars.com/shopping/results/?stock_type=all&makes%5B%5D=lamborghini&models%5B%5D=&list_price_max=&maximum_distance=20&zip="

#### Get Request

In [3]:
response = requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

In [7]:
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title data-suffix=" | Cars.com">New and used Lamborghini for Sale Near Me | Cars.com</title>
<meta content="Shop Lamborghini vehicles for sale at Cars.com. Research, compare, and save listings, or contact sellers directly from 680 Lamborghini models nationwide." name="description"/>
<meta content="noindex, nofollow" name="robots"/>
<meta content="Cars.com" property="og:site_name"/>
<meta content="website" property="og:type"/>
<meta content="New and used Lamborghini for Sale Near Me | Cars.com" property="og:title"/>
<meta content="https://www.cars.com/shopping/results/" property="og:url"/>
<meta content="Shop Lamborghini vehicles for sale at Cars.com. Research, compare, and save listings, or contact sellers directly from 680 Lamborghini models nationwide." property="og:description"/>
<meta content

### Results

In [10]:
results = soup.find_all('div', {'class': 'vehicle-card'})

In [11]:
len(results)

20

In [13]:
results[0]

<div class="vehicle-card" data-listing-id="9f0a6376-599c-46f5-9436-e4cfaf023466" data-tracking-id="1" data-tracking-type="srp-vehicle-card" id="9f0a6376-599c-46f5-9436-e4cfaf023466" phx-hook="VehicleCard" phx-target="5">
<a class="vehicle-card-visited-tracking-link" href="/vehicledetail/9f0a6376-599c-46f5-9436-e4cfaf023466/" rel="nofollow" tabindex="-1"></a>
<div class="vehicle-card-main js-gallery-click-card">
<div class="vehicle-card-photos js-gallery-click-gallery">
<div class="gallery-wrap">
<div class="image-wrap" data-index="0">
<img alt="" class="vehicle-image" loading="lazy" src="https://platform.cstatic-images.com/large/in/v2/0ed6601e-75f3-5b96-b8f7-10fecdc86520/ab41e310-26e4-48b6-a85e-46dad64f1e71/iB4MThYYDDRxUdVBJMBgtO1g8LQ.jpg"/>
</div>
<div class="image-wrap" data-index="1">
<img alt="" class="vehicle-image" loading="lazy" src="https://platform.cstatic-images.com/large/in/v2/0ed6601e-75f3-5b96-b8f7-10fecdc86520/ab41e310-26e4-48b6-a85e-46dad64f1e71/pDlOwdUC6waI0LJw7UR_t8o6j

### Target necessary data

In [17]:
# Name
# Price
# Review Rating
# Review Count
# Product Link
# Product Details

#### Name

In [31]:
results[0].find('h2').text.strip()

'2021 Lamborghini Urus'

#### Price

In [17]:
results[0].find('div', {'class':"mileage"}).text

'2,500 mi.'

#### Review Rating

In [19]:
results[0].find('span', {'class': 'sds-rating__count'}).text

'4.4'

#### Review Count

In [25]:
results[0].find('span', {'class': 'sds-rating__link sds-button-link'}).text

'(568 reviews)'

#### Price

In [26]:
results[0].find('span', {'class': 'primary-price'}).text

'$299,000'

#### Dealer name

In [30]:
results[0].find('div', {'class': 'dealer-name'}).text.strip()

'Exotic Motors'

#### Relative URL

#### Product Details

### Put everything together inside a For-Loop

In [70]:
class Custom:
    def add(list_name, data):
        try:
            list_name.append(data)
        except AttributeError:
            list_name.append('n/a')

In [42]:
name = []
mileage = []
dealer_name = []
rating = []
rating_count = []
price = []

for r in results:
    Custom.add(name, r.find('h2').text.strip())
    Custom.add(mileage, r.find('div', {'class':"mileage"}).text)
    Custom.add(rating_count, r.find('span', {'class': 'sds-rating__count'}).text)
    Custom.add(rating, r.find('span', {'class': 'sds-rating__link sds-button-link'}).text)
    Custom.add(price, r.find('span', {'class': 'primary-price'}).text)
    Custom.add(dealer_name, r.find('div', {'class': 'dealer-name'}).text.strip())

In [49]:
# name
# mileage
# rating_count
# rating
# price
# dealer_name

['(568 reviews)',
 '(15 reviews)',
 '(5 reviews)',
 '(3 reviews)',
 '(692 reviews)',
 '(7 reviews)',
 '(64 reviews)',
 '(620 reviews)',
 '(3,697 reviews)',
 '(629 reviews)',
 '(27 reviews)',
 '(136 reviews)',
 '(4,514 reviews)',
 '(84 reviews)',
 '(58 reviews)',
 '(1,239 reviews)',
 '(9 reviews)',
 '(41 reviews)',
 '(1,239 reviews)',
 '(73 reviews)']

#### Combine URLs

### Create Pandas Dataframe

In [55]:
data = pd.DataFrame({'Name': name,
                     'Milage': mileage,
                     'DealerName': dealer_name,
                     'Rating': rating,
                     'RatingCount': rating_count,
                     'Price': price})

#### Data cleaning

In [57]:
data

Unnamed: 0,Name,Milage,DealerName,Rating,RatingCount,Price
0,2021 Lamborghini Urus,"2,500 mi.",Exotic Motors,(568 reviews),4.4,"$299,000"
1,2021 Lamborghini Urus,"1,183 mi.",Eurocar Inc.,(15 reviews),5.0,"$349,999"
2,2021 Lamborghini Urus,"6,159 mi.",O'Gara Coach Beverly Hills,(5 reviews),2.6,"$304,990"
3,2019 Lamborghini Urus Base,"13,486 mi.",Pro Cars USA Inc,(3 reviews),5.0,"$259,895"
4,2019 Lamborghini Urus Base,"18,001 mi.",Napleton Aston Martin Maserati,(692 reviews),4.7,"$258,800"
5,2020 Lamborghini Huracan EVO Base,"15,730 mi.",Tactical Fleet,(7 reviews),5.0,"$294,900"
6,2019 Lamborghini Urus Base,"23,513 mi.",Lamborghini Paramus,(64 reviews),5.0,"$249,995"
7,2017 Lamborghini Huracan LP610-4S,"15,352 mi.",Honda Marysville,(620 reviews),4.8,"$254,997"
8,2020 Lamborghini Urus Base,"23,797 mi.",Audi San Diego,"(3,697 reviews)",4.7,"$263,890"
9,2021 Lamborghini Urus HUGE MSRP! LOADED! FULL ...,"7,700 mi.",Chicago Motor Cars,(629 reviews),4.8,"$359,800"


In [63]:
data['Rating'] = data['Rating'].apply(lambda x: x.strip("(reviews) "))
data['Milage'] = data['Milage'].apply(lambda x: x.strip(" mi."))
data['Price'] = data['Price'].apply(lambda x: x.strip("$ "))

In [64]:
data

Unnamed: 0,Name,Milage,DealerName,Rating,RatingCount,Price
0,2021 Lamborghini Urus,2500,Exotic Motors,568,4.4,299000
1,2021 Lamborghini Urus,1183,Eurocar Inc.,15,5.0,349999
2,2021 Lamborghini Urus,6159,O'Gara Coach Beverly Hills,5,2.6,304990
3,2019 Lamborghini Urus Base,13486,Pro Cars USA Inc,3,5.0,259895
4,2019 Lamborghini Urus Base,18001,Napleton Aston Martin Maserati,692,4.7,258800
5,2020 Lamborghini Huracan EVO Base,15730,Tactical Fleet,7,5.0,294900
6,2019 Lamborghini Urus Base,23513,Lamborghini Paramus,64,5.0,249995
7,2017 Lamborghini Huracan LP610-4S,15352,Honda Marysville,620,4.8,254997
8,2020 Lamborghini Urus Base,23797,Audi San Diego,3697,4.7,263890
9,2021 Lamborghini Urus HUGE MSRP! LOADED! FULL ...,7700,Chicago Motor Cars,629,4.8,359800


### Output in Excel

In [68]:
data.to_excel('cars.xlsx', index=False)

### Part 2 - Pagination - Scrape 20 Pages

In [72]:
class Custom:
    def add(list_name, data):
        try:
            list_name.append(data.text.strip())
        except AttributeError:
            list_name.append('n/a')

name = []
mileage = []
dealer_name = []
rating = []
rating_count = []
price = []

for i in range(1, 11):
    website = f'https://www.cars.com/shopping/results/?page={i}&page_size=20&dealer_id=&keyword=&list_price_max=&list_price_min=&makes[]=lamborghini&maximum_distance=20&mileage_max=&sort=best_match_desc&stock_type=all&year_max=&year_min=&zip='
    response = requests.get(website)
    soup = BeautifulSoup(response.content, 'html.parser')
    results = soup.find_all('div', {'class': 'vehicle-card'})
    for r in results:
        Custom.add(name, r.find('h2'))
        Custom.add(mileage, r.find('div', {'class':"mileage"}))
        Custom.add(rating_count, r.find('span', {'class': 'sds-rating__count'}))
        Custom.add(rating, r.find('span', {'class': 'sds-rating__link sds-button-link'}))
        Custom.add(price, r.find('span', {'class': 'primary-price'}))
        Custom.add(dealer_name, r.find('div', {'class': 'dealer-name'}))
        
data = pd.DataFrame({'Name': name,
                     'Milage': mileage,
                     'DealerName': dealer_name,
                     'Rating': rating,
                     'RatingCount': rating_count,
                     'Price': price})
data['Rating'] = data['Rating'].apply(lambda x: x.strip("(reviews) "))
data['Milage'] = data['Milage'].apply(lambda x: x.strip(" mi."))
data['Price'] = data['Price'].apply(lambda x: x.strip("$ "))

    

In [77]:
data.to_excel('data_crawled.xlsx', index=False)