First of all lets grab the data from the website and save it to a file. We will use the `requests` library to do this. We will also use the `BeautifulSoup` library to parse the HTML and extract the data we want.

In [14]:
import requests
from bs4 import BeautifulSoup
import re

In [55]:
# Create a function that takes a url and returns a dictionary with the data we have collected
def get_car_data(url):
    raw_html = requests.get(url)
    soup = BeautifulSoup(raw_html.content, 'html.parser')

    price_html = soup.find_all(name='span', attrs={'class': re.compile('PriceInfo_price__.*')})
    price = price_html[0].text
    price = price[2:-2]
    price = float(price.replace(',', ''))
    
    mileage_html = soup.find(name='dt', string='Mileage').find_next_sibling('dd').text
    mileage_html = mileage_html.replace('km', '')
    mileage_html = mileage_html.replace(',', '')
    mileage_html = int(mileage_html)
    
    first_registration_html = soup.find(name='div', string='First registration')\
                                  .find_next_sibling('div')\
                                  .find_next_sibling('div').text
    make_model_html = soup.find(name='div', attrs={'class': re.compile('StageTitle_makeModelContainer__.*')})
    make = make_model_html.find_all('span')[0].text
    model = make_model_html.find_all('span')[1].text
    
    # Find with soup the A tag with the class LocationWithPin_locationItem__.* and get the text
    location_html = soup.find(name='a', attrs={'class': re.compile('LocationWithPin_locationItem__.*')})
    location = location_html.text
    
    
    return {
        'price': price,
        'mileage': mileage_html,
        'first_registration': first_registration_html,
        'make': make,
        'model': model,
        'location': location,
        'url': url
    }

In [56]:
cars_to_analyze = [
  'https://www.autoscout24.com/offers/audi-a5-coupe-3-2-fsi-quattro-s-line-plus-xenon-b-o-gasoline-black-e6193db6-e5a4-4ef3-a179-8a73a93fec2b?ipc=recommendation&ipl=homepage-bestresult-listings&position=1&source_otp=t10&source=homepage_last-search',
  'https://www.autoscout24.com/offers/mercedes-benz-cls-350-7g-tronic-gasoline-silver-c1b46f5f-d7a6-4a63-88ec-cb5baf02413c?ipc=recommendation&ipl=homepage-engine-itemBased&position=3&source_otp=nfm&source=homepage_recommender'
]
for car in cars_to_analyze:
    print(get_car_data(car))

{'price': 9990.0, 'mileage': 200000, 'first_registration': '06/2008', 'make': 'Audi ', 'model': 'A5', 'location': 'Hannover, DE'}
{'price': 12990.0, 'mileage': 149000, 'first_registration': '10/2005', 'make': 'Mercedes-Benz ', 'model': 'CLS 350', 'location': 'Bad Kreuznach / Planig, DE'}


In [None]:
# Expose the data for prometheus
from prometheus_client import start_http_server, Gauge

# Expose the car data with the prometheus client
def expose_car_data(car_data):
    price_gauge = Gauge('car_price', 'Price of the car', ['make', 'model', 'location', 'url'])
    mileage_gauge = Gauge('car_mileage', 'Mileage of the car', ['make', 'model', 'location', 'url'])
    first_registration_gauge = Gauge('car_first_registration', 'First registration of the car', ['make', 'model', 'location', 'url'])
    price_gauge.labels(car_data['make'], car_data['model'], car_data['location'], car_data['url']).set(car_data['price'])
    mileage_gauge.labels(car_data['make'], car_data['model'], car_data['location'], car_data['url']).set(car_data['mileage'])
    first_registration_gauge.labels(car_data['make'], car_data['model'], car_data['location'], car_data['url']).set(car_data['first_registration'])

# start http server for prometheus
start_http_server(8000)