# Web Scrapping: Renting Cars Data
Extraction of tables for several car brands.

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import re
import lxml
import sys
sys.path.append('..')
from modules.car_scraping import webpages_generator, cars_links_generator, scrape_used_cars_data, scrape_renting_cars_data

In [2]:
# url link
base_rent_cars_url = 'https://www.coches.com/renting-coches/ofertas-renting/?page='

In [3]:
# Webpages generator: extract all the webs that holds renting cars
rent_cars_pages = webpages_generator(base_rent_cars_url, init_pages=1, n_pages=226)

In [4]:
# Declare regex common path for all the cars' links
rent_cars_reg_exp = "https://.*/renting-coches/.*-.*-.*-.*-.*"

In [5]:
# Cars' links generator
rent_cars_urls = cars_links_generator(rent_cars_pages, reg_exp=rent_cars_reg_exp)
rent_cars_urls[-5:]

['https://www.coches.com/renting-coches/mercedes-clase+glb/2b605964-e9cf-43da-8c9c-980b0a9fdb8c',
 'https://www.coches.com/renting-coches/mercedes-clase+glb/1825157e-449a-4cba-884f-2d8d660bed19',
 'https://www.coches.com/renting-coches/mercedes-clase+glb/d4d40317-d6bd-4369-893a-a48ec6638352',
 'https://www.coches.com/renting-coches/mercedes-clase+glb/680e2cce-0512-4f3f-be8a-91c556a2ec58',
 'https://www.coches.com/renting-coches/mercedes-clase+glb/4030e9ed-fafc-44f3-b140-4e553fe58354']

In [6]:
# Checking that all the urls were loaded correctly:
print(len(rent_cars_urls))

2699


In [None]:
# Extracting all the data for each renting car
renting_cars_data = scrape_renting_cars_data(rent_cars_urls)

print(renting_cars_data[:5])
print(renting_cars_data[-5:])

Defining DataFrame

In [8]:
# Defining columns
cols_rent = ['title', 'price', 'contract_months', 'km_year', 'fuel_type', 'color', 'warranty', 'maintenance', 'tires',
             'power', 'co2_emiss', 'doors', 'gear', 'status', 'chassis', 'height', 'length', 'width', 'trunk_vol',
             'max_speed', 'seats', 'urban_cons', 'xtrurban_cons', 'mixed_cons', 'weight', 'tank_vol', 'acceleration']

In [16]:
df_rent = pd.DataFrame(renting_cars_data, columns=cols_rent).drop(0)

df_rent.sample(5)

Unnamed: 0,title,price,contract_months,km_year,fuel_type,color,warranty,maintenance,tires,power,...,width,trunk_vol,max_speed,seats,urban_cons,xtrurban_cons,mixed_cons,weight,tank_vol,acceleration
1635,BMW Serie 1,395,60,40.0,Diesel,BLANCO,Incluido,Incluido,Incluido,116cv(85Kw),...,"179,9 cm",380 l,200 km/h,5,"4,3 l","3,5 l","3,8 l",1.385 kg,42 l,"10,0 s"
1510,MINI Countryman,453,36,20.0,Diesel,BLANCO,Incluido,Incluido,Incluido,150cv(110Kw),...,"182,2 cm",450 l,204 km/h,5,"4,8 l","3,8 l","4,2 l",1.475 kg,51 l,"9,0 s"
398,MERCEDES Clase GLA,446,48,10.0,Diesel,Consultar,Incluido,Incluido,No incluido,150cv(110Kw),...,"183,4 cm",425 l,208 km/h,5,"5,9 l","3,2 l","4,6 l",1.615 kg,43 l,"8,0 s"
1333,FIAT Talento,326,60,15.0,Diesel,BLANCO,Incluido,Incluido,No incluido,120cv(88Kw),...,"195,6 cm",0 l,166 km/h,3,"7,2 l","5,8 l","6,4 l",1.726 kg,80 l,"0,0 s"
450,MERCEDES Clase GLC,1006,24,50.0,Diesel,Consultar,Incluido,Incluido,Incluido,194cv(143Kw),...,"189,0 cm",550 l,215 km/h,5,"6,8 l","3,6 l","5,2 l",1.835 kg,66 l,"7,0 s"


In [17]:
print(df_rent.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2372 entries, 1 to 2372
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   title            2372 non-null   object
 1   price            2372 non-null   object
 2   contract_months  2372 non-null   object
 3   km_year          2372 non-null   object
 4   fuel_type        2372 non-null   object
 5   color            2372 non-null   object
 6   warranty         2372 non-null   object
 7   maintenance      2372 non-null   object
 8   tires            2372 non-null   object
 9   power            2372 non-null   object
 10  co2_emiss        2372 non-null   object
 11  doors            2372 non-null   object
 12  gear             2372 non-null   object
 13  status           2372 non-null   object
 14  chassis          2372 non-null   object
 15  height           2372 non-null   object
 16  length           2372 non-null   object
 17  width            2372 non-null   

Generating the output in a csv

In [18]:
output_rent = 'data/renting_cars.csv'

In [19]:
df_rent.to_csv(output_rent, index=False)
print(f"Renting cars exported into: {output_rent}")

Renting cars exported into: data/renting_cars.csv
