In [217]:
from mobilede_utils import request_car_detail_page

html = request_car_detail_page('358672814')

with open('car.html', 'w') as f:
    f.write(html)

In [2]:
from car_mapping import CAR_MAKE_MAP, CAR_MAKE_MODEL_MAP
from mobilede_utils import request_search_page

search_page = request_search_page('volkswagen', 'id.3')

with open('search.html', 'w') as f:
    f.write(search_page)

In [67]:
from mobilede_utils import request_car_detail_page

html = request_car_detail_page('361833685')

In [68]:
from mobilede_parser import extract_car_data

data = extract_car_data(html)
data

{'firstRegistrationMonth': 3,
 'firstRegistrationYear': 2022,
 'make': 'Audi',
 'model': 'Q4',
 'modelTypeName': '50 e-tron quattro AHK Standhz. HUD Navi Matri',
 'color': 'Grey',
 'metallicColor': False,
 'kilometers': '26686',
 'price': '56880',
 'registration': '03/2022',
 'electric': True,
 'power': '220',
 'batteryCapacity': None,
 'automatic': True,
 'undamaged': None,
 'seats': '5',
 'leather_upholstery': True,
 'isSUV': True,
 'airconditioning': True,
 'elec_cons': None,
 'n_prev_owners': '1',
 'features': ['ABS',
  'Adaptive cornering lights',
  'Adaptive Cruise Control',
  'Alloy wheels',
  'Ambient lighting',
  'Android Auto',
  'Apple CarPlay',
  'Arm rest',
  'Autom. dimming interior mirror',
  'Auxiliary heating',
  'Blind spot assist',
  'Bluetooth',
  'Cargo barrier',
  'Central locking',
  'DAB radio',
  'Digital cockpit',
  'Electric seat adjustment',
  'Electric side mirror',
  'Electric tailgate',
  'Electric windows',
  'Emergency brake assist',
  'Emergency call s

In [69]:
FEATURES_TO_COLUMN_NAME = {
    'Adaptive Cruise Control': 'parking_sensors',
    'Navigation system': 'satellite_navigator',
    'Adaptive Cruise Control': 'cruise_control_adaptive',
    'Parking sensor: Camera': 'parking_camera_simple_camera',
    'Heated seats': 'seat_heaters',
    'Alloy wheels': 'alloy_wheels',
    'Electric side mirror': 'electric_mirrors',
    'Lane change assist': 'lane_departure_warning_system',
    'Electric tailgate': 'electrically_operated_tailgate',
    'Emergency brake assist': 'emergency_brake_assist',
    'Collision avoidance system': 'collision_avoidance_system',
    'Cruise control: Traditional': 'cruise_control_traditional',
    'Parking sensor: Self-steering systems': 'parking_assistant',
    'Trailer coupling, swiveling': 'tow_bar', # done
    'Trailer coupling, detachable': 'tow_bar', # done
    'Trailer coupling, fix': 'tow_bar', # done
    'Heated steering wheel': 'heated_steering_wheel', # done
    'Sunroof': 'sunroof', # done
    'Panoramic roof': 'sun_hatch_with_panorama', # done
    'Adaptive lighting': 'adaptive_headlights', # done
    'Sport seats': 'sport_seats', # done
    'Auxiliary heating': 'fuel_battery_powered_heater', # done
    'Parking sensor: 360° camera': 'parking_camera_360-degree_camera', # muutetaan kameraksi vain
    'Battery preheating': 'battery_preheating',
    'Electric seat adjustment': 'electric_seats_without_memory', # check
    'Air suspension': 'air_suspension', # done
    'Adaptive cornering lights': 'curve_lights', # done
    'Head-up display': 'head_up_display', # done
    'Sports suspension': 'sport_base', # done,    
}


In [70]:
import pandas as pd
drivetype_df = pd.read_csv('./drivetype.csv')

CURRENT_YEAR = 2023
CURRENT_MONTH = 10

def get_age(reg_y, reg_m):
    if reg_y and reg_m:
        return CURRENT_YEAR - reg_y + (CURRENT_MONTH - reg_m) / 12
        
    return CURRENT_YEAR - reg_y

def get_drive_type(make, model, features):
    if 'Four wheel drive' in features:
        return '4wd'
    make_df = drivetype_df[(drivetype_df['make'] == make) &( drivetype_df['model'] == model)].sort_values(by='driveType', ascending=False).reset_index(drop=True)
    return make_df['driveType'].iloc[0]


print(get_drive_type('tesla', 'model 3', []))
print(get_drive_type('tesla', 'model 3', ['Four wheel drive']))
print(get_drive_type('bmw', 'ix', []))

rwd
4wd
4wd


In [96]:
import re

capacities = pd.read_csv('capacities.csv')
nettiauto_data = pd.read_csv("../nettiauto/nettiauto_dataset.csv")
make_model_mean_capacities = nettiauto_data.groupby(by=["make","model"])["batteryCapacity"].mean()

def get_capacity(make, model, modelTypeName):
    m = (capacities["make"] == make) & (capacities["model"] == model)
    for i, row in capacities[m].iterrows():
        if bool(re.search(r'{}'.format(row["regex"]), modelTypeName, re.I)) ^ bool(row["inverse"]):
            return float(row["batteryCapacity"])
    if (make, model) in make_model_mean_capacities.index:
        return float(make_model_mean_capacities[make, model])
    raise Exception("Battery capacity missing and it cannot be deduced with given model.")

In [97]:

make = data.get('make', '').lower()
model = data.get('model', '').lower().replace('id.', 'id')
totalOwenrs_string = data['n_prev_owners'] if 'n_prev_owners' in data and data['n_prev_owners'] else 1

features_set = set(data['features'])

result = {
    'make': make,
    'model': model,
    'color': data.get('color', '').lower(),
    'driveType': get_drive_type(make, model, data['features']),
    'price': float(data.get('price', '-1')),
    'totalOwners': int(totalOwenrs_string),
    'kilometers': int(data.get('kilometers', '-1')),
    'seats': int(data.get('seats', '5')),
    'power': float(data.get('power', '-1')),
    'batteryCapacity': float(c) if (c:=data['batteryCapacity']) else get_capacity(make, model, data["modelTypeName"]),
    'age': get_age(data['firstRegistrationYear'], data['firstRegistrationMonth']),
    'isSuv': data['isSUV'],
    'metallicColor': data['metallicColor'],
    'airconditioning': data['airconditioning'],
    **{FEATURES_TO_COLUMN_NAME[key]: key in features_set for key in FEATURES_TO_COLUMN_NAME.keys()}
}

result

Exception: Battery capacity missing and it cannot be deduced with given model.