In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [62]:
def scrape_listings(url):
    # Open the url
    r = requests.get(url)
    # Parse the html
    soup = BeautifulSoup(r.text, 'html.parser')
    
    # Find all div-elements with class 
    listings = soup.find_all('div', {'class': 'ListItem_wrapper__J_a_C'})
    # Iterate over the listing
    rows = []
    for element in listings:
        row = {}
        # Find the a-element (model of car)
        headers = element.find('a', {'class': 'ListItem_title__znV2I ListItem_title_new_design__lYiAv Link_link__pjU1l'})
        row['url'] = 'https://www.autoscout24.de' + headers.get('href')
        model = headers.find('h2')
        span_elements = model.find_all('span')
        for span_element in span_elements:
            span_element.decompose() 
        row['Marke'] = "Audi"
        row['Model'] = model.text.replace("Audi", "")
        
        # Find all details about car
        details = element.find('div', {'class': 'VehicleDetailTable_container__mUUbY'})
        for detail in details:
            text = detail.text
            if(text.endswith('km')):
                row['Kilometerstand'] = text.split(' ')[0].replace('.', '')
            elif(text == "Automatik" or text == "Schaltgetriebe"):
                row['Antrieb'] = text
            elif(len(text.split('/')) == 2):
                row['Baujahr'] = text.split('/')[1]
            elif(text.endswith('PS)')):
                row['Motorleistung'] = text.split(' ')[0]
    
        # Find price of car
        price = element.find('p', {'class': 'Price_price__WZayw PriceAndSeals_current_price__XscDn'})
        if(price == None):
            continue
        first_index_find = price.text.find('€') + 2
        last_index_find = price.text.find(',')
        row['Preis'] = price.text[first_index_find : last_index_find]
        
        rows.append(row)
        
    return rows

In [30]:
df = scrape_listings("https://www.autoscout24.de/lst/audi?atype=C&cy=D&damaged_listing=exclude&desc=0&fregfrom=2010&fregto=2023&ocs_listing=include&powertype=kw&search_id=sphlth5ci4&sort=standard&source=homepage_search-mask&ustate=N%2CU!")

In [31]:
audi_df = pd.DataFrame(df)
audi_df.head()

Unnamed: 0,url,Marke,Model,Kilometerstand,Antrieb,Baujahr,Motorleistung,Preis
0,https://www.autoscout24.de/angebote/audi-a5-co...,Audi,A5,117981,Schaltgetriebe,2013,125,12.95
1,https://www.autoscout24.de/angebote/audi-s3-sp...,Audi,S3,186500,Automatik,2012,195,12.499
2,https://www.autoscout24.de/angebote/audi-a5-3-...,Audi,A5,237520,Automatik,2016,160,15.98
3,https://www.autoscout24.de/angebote/audi-a4-2-...,Audi,A4,252333,Automatik,2011,105,5.75
4,https://www.autoscout24.de/angebote/audi-a6-al...,Audi,A6 allroad,216394,Automatik,2013,180,15.99


In [32]:
def scrape_details(df):
    all_details = []  # Tüm detayları depolamak için bir liste
    
    # Iterate over the rows of df
    for i in range(len(df)):
        # Open the URL
        r = requests.get(df['url'].iloc[i])
        # Parse the HTML
        soup = BeautifulSoup(r.text, 'html.parser')
        # Find the div-element with class 'DetailPage_slicesContainer__wHHae false'
        sections = soup.find_all('section')
        details = {}
        for section in sections:
            detail = {}
            details_grid = section.find('div', {'class': 'DetailsSection_childrenSection__NQLD7'})
            if details_grid == None:
                continue
            # Bilgileri ekrana bastır
            dl_elements = details_grid.find_all('dl', {'class': 'DataGrid_defaultDlStyle__969Qm'})
            if(len(dl_elements) == 0):
                continue
            for dl in dl_elements:
                dt_elements = dl.find_all('dt', {'class': 'DataGrid_defaultDtStyle__yzRR_'})
                dd_elements = dl.find_all('dd', {'class': 'DataGrid_defaultDdStyle__29SKf'})
                for dt, dd in zip(dt_elements, dd_elements):
                    detail[dt.text] = dd.text
                
            details.update(detail)
        all_details.append(details)
    details_df = pd.DataFrame(all_details)
    df = pd.concat([df, details_df], axis=1)
    return df

In [33]:
audi_df = scrape_details(audi_df)

In [34]:
audi_df.to_csv('audi_uncleaned.csv')

## Data Preprocessing

In [35]:
df_audi = pd.read_csv('audi_uncleaned.csv', index_col=0)
df_audi.head()

Unnamed: 0,url,Marke,Model,Kilometerstand,Antrieb,Baujahr,Motorleistung,Preis,Barzahlungspreis,Anzahlung,...,Energieeffizienzklasse,CO₂-Effizienz,Schlussrate,Schlüsselnummer,Baujahr.1,Zylinder,Gänge,Leergewicht,Bruttodarlehensbetrag,Bearbeitungsgebühren
0,https://www.autoscout24.de/angebote/audi-a5-co...,Audi,A5,117981,Schaltgetriebe,2013,125,12.95,"€ 12.950,-","€ 0,-",...,,,,,,,,,,
1,https://www.autoscout24.de/angebote/audi-s3-sp...,Audi,S3,186500,Automatik,2012,195,12.499,"€ 12.499,-","€ 0,-",...,C,Auf der Grundlage der gemessenen CO₂-Emissione...,"€ 18.525,60",,,,,,,
2,https://www.autoscout24.de/angebote/audi-a5-3-...,Audi,A5,237520,Automatik,2016,160,15.98,,"€ 0,-",...,,,"€ 44.373,12",,,,,,,
3,https://www.autoscout24.de/angebote/audi-a4-2-...,Audi,A4,252333,Automatik,2011,105,5.75,,"€ 0,-",...,,,,0588/AHA,2011.0,4.0,,,,
4,https://www.autoscout24.de/angebote/audi-a6-al...,Audi,A6 allroad,216394,Automatik,2013,180,15.99,,"€ 0,-",...,,,,,,,,,,


In [36]:
def drop_features(df):
    feature_list = ['url', 'Marke', 'Model', 'Kilometerstand', 'Preis', 'Motorleistung',  
                'Antrieb', 'Baujahr', 'Sitzplätze', 'Türen', 'Hubraum', 'Außenfarbe']
    drop_list = [col for col in df.columns if col not in feature_list]
    df.drop(drop_list, inplace=True, axis=1)
    return df

In [37]:
df_audi = drop_features(df_audi)

In [38]:
df_audi.head()

Unnamed: 0,url,Marke,Model,Kilometerstand,Antrieb,Baujahr,Motorleistung,Preis,Sitzplätze,Türen,Hubraum,Außenfarbe
0,https://www.autoscout24.de/angebote/audi-a5-co...,Audi,A5,117981,Schaltgetriebe,2013,125,12.95,4,2,1.798 cm³,Schwarz
1,https://www.autoscout24.de/angebote/audi-s3-sp...,Audi,S3,186500,Automatik,2012,195,12.499,5,4,1.984 cm³,Schwarz
2,https://www.autoscout24.de/angebote/audi-a5-3-...,Audi,A5,237520,Automatik,2016,160,15.98,4,4,2.967 cm³,Weiß
3,https://www.autoscout24.de/angebote/audi-a4-2-...,Audi,A4,252333,Automatik,2011,105,5.75,5,5,1.968 cm³,Grau
4,https://www.autoscout24.de/angebote/audi-a6-al...,Audi,A6 allroad,216394,Automatik,2013,180,15.99,5,4,2.967 cm³,Grün


In [58]:
def preprocessing(df):
    df["Preis"] = df["Preis"].astype(str).str.replace('.', '').astype('float')
    df["Hubraum"] = df["Hubraum"].astype(str).str.replace(' cm³', '').astype('float')
    df["Hubraum"] = df["Hubraum"].astype(str).str.replace('.', '')
    return df

In [59]:
df_audi = preprocessing(df_audi)

In [60]:
df_audi.head()

Unnamed: 0,url,Marke,Model,Kilometerstand,Antrieb,Baujahr,Motorleistung,Preis,Sitzplätze,Türen,Hubraum,Außenfarbe
0,https://www.autoscout24.de/angebote/audi-a5-co...,Audi,A5,117981,Schaltgetriebe,2013,125,12950000.0,4,2,1798,Schwarz
1,https://www.autoscout24.de/angebote/audi-s3-sp...,Audi,S3,186500,Automatik,2012,195,124990000.0,5,4,1984,Schwarz
2,https://www.autoscout24.de/angebote/audi-a5-3-...,Audi,A5,237520,Automatik,2016,160,15980000.0,4,4,2967,Weiß
3,https://www.autoscout24.de/angebote/audi-a4-2-...,Audi,A4,252333,Automatik,2011,105,5750000.0,5,5,1968,Grau
4,https://www.autoscout24.de/angebote/audi-a6-al...,Audi,A6 allroad,216394,Automatik,2013,180,15990000.0,5,4,2967,Grün


In [69]:
df = scrape_listings("https://www.autoscout24.de/lst/audi?atype=C&cy=D&desc=0&fregfrom=2010&ocs_listing=include&page=2&search_id=j3aqupn9ns&sort=standard&source=listpage_pagination&ustate=N%2CU")
audi_df = pd.DataFrame(df)
audi_df = scrape_details(audi_df)
audi_df = drop_features(audi_df)
audi_df = preprocessing(audi_df)

In [70]:
audi_df

Unnamed: 0,url,Marke,Model,Kilometerstand,Antrieb,Baujahr,Motorleistung,Preis,Sitzplätze,Türen,Kilometerstand.1,Hubraum,Außenfarbe,Baujahr.1
0,https://www.autoscout24.de/angebote/audi-a6-av...,Audi,A6,193300,Automatik,km),200,19811.0,5,5,193.300 km,2967,Grau,
1,https://www.autoscout24.de/angebote/audi-a6-av...,Audi,A6,158500,Automatik,km),180,16700.0,5,4,158.500 km,2967,Grau,
2,https://www.autoscout24.de/angebote/audi-s5-sp...,Audi,S5,150000,Automatik,2010,245,18999.0,4,4,150.000 km,2995,Blau,
3,https://www.autoscout24.de/angebote/audi-a8-4-...,Audi,A8,68788,Automatik,2013,309,22800.0,5,4,68.788 km,3993,Grau,
4,https://www.autoscout24.de/angebote/audi-a3-2-...,Audi,A3,149700,Schaltgetriebe,2011,103,9980.0,5,4,149.700 km,1968,Grau,
5,https://www.autoscout24.de/angebote/audi-a4-av...,Audi,A4,128000,Automatik,2014,150,17998.0,5,4,128.000 km,2967,Weiß,
6,https://www.autoscout24.de/angebote/audi-a3-qu...,Audi,A3,49986,Schaltgetriebe,2018,110,22990.0,5,4,49.986 km,1968,Blau,2018.0
7,https://www.autoscout24.de/angebote/audi-a4-2-...,Audi,A4,99978,Schaltgetriebe,2010,155,11980.0,5,4,99.978 km,1984,Bronze,
8,https://www.autoscout24.de/angebote/audi-a6-av...,Audi,A6,366000,Automatik,2014,150,8490.0,5,5,366.000 km,2773,Schwarz,2014.0
9,https://www.autoscout24.de/angebote/audi-a5-sp...,Audi,A5,104000,Automatik,2016,180,21990.0,4,5,104.000 km,2967,Braun,
