# Create Logging Data

In this script we are creating logging data frames in order to ensure that we avoid as many mistakes as possible in our data retrieving process and be as transparent as possbile.

## Preamble
Loading the necessary libraries and setting the `user` that should be used.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
%run Pipeline_Functions.ipynb
%run Detailed_Scraping_Config.ipynb
%run Logging_Functions.ipynb

In [2]:
user = "chris"

## Instantiating the Logging DF

In [3]:
start_year = int(1990)
end_year = int(2010)
runner = 0

user_list = ["chris", "christopher", "janik", "chris2"]

logging_df = instantiate_logging_df(start_year = start_year, end_year = end_year,
                                    user_list = user_list,
                                    model_dictionaries = model_dictionaries)
logging_df

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
0,chris,mercedes-benz,170,1990.0,1990.0,2010.0,not yet scraped
0,chris,mercedes-benz,180,1990.0,1990.0,2010.0,not yet scraped
0,chris,mercedes-benz,190,1990.0,1990.0,2010.0,not yet scraped
0,chris,mercedes-benz,200,1990.0,1990.0,2010.0,not yet scraped
0,chris,mercedes-benz,208,1990.0,1990.0,2010.0,not yet scraped
...,...,...,...,...,...,...,...
0,chris2,renault,twizy,1990.0,1990.0,2010.0,not yet scraped
0,chris2,renault,vel-satis,1990.0,1990.0,2010.0,not yet scraped
0,chris2,renault,wind,1990.0,1990.0,2010.0,not yet scraped
0,chris2,renault,zoe,1990.0,1990.0,2010.0,not yet scraped


## Saving new Logging DFs

In [4]:
save_new_logging_dfs = False

if save_new_logging_dfs:
    logging_df.to_csv("logging/logging_df_" + str(user) + ".csv", index = False)

# Adjusting the Logging DF

## Resetting the Curr Year of a Model

In [6]:
do_21 = True

if do_21:
    adj_user = "christopher"
    year_to_reset_to = 2025
    models_to_reset = ["105", "120", "130", "135", "citigo", "enyaq", "fabia", "favorit",
                                           "felicia", "forman", "kamiq", "karoq", "kodiaq", "octavia", "rapid%2Fspaceback",
                                          "roomster", "scala","adam", "agila", "ampera", "antara", "arena", "ascona", "astra", "calibra", "campo",
                                "cascada", "combo", "combo-life", "combo-e", "combo-e-life", "commodore", "corsa", "corsa-e",
                                "crossland", "crossland-x", "diplomat", "frontera", "grandland-x", "gt", "insignia", "kadett",
                                "karl", "manta", "meriva", "mokka", "mokka-x", "mokka-e", "monterey", "monza", "movano",
                                "movano-e", "omega", "rekord", "rocks-e", "signum", "sintra", "speedster", "tigra",
                                "vectra", "vivaro", "vivaro-e", "zafira", "zafira-life", "zafira-tourer",'356','550','718-(alle)','911er-(alle)','912','914','918','924','928','944','959',
                                    '962','968','boxster','carrera-gt','cayenne','cayman','macan','panamera','targa',
                                    'taycan','4-runner','allion','alphard','altezza','aristo','auris','avalon','avensis','avensis-verso','aygo','aygo-x','bb','belta','bZ4X','c-hr',
                                   'caldina','cami','camry','carina','celica','chaser','coaster','corolla','corolla-cross','corolla-verso','corona','corsa','cressida','cresta','crown',
                                   'duet','dyna','estima','fj-cruiser','fj40','fortuner','fun_cruiser','funcargo','gaia','gr86','harrier','hdj','hiace','highlander','hilux','ipsum','iq',
                                   'ist','kj','land-cruiser','land-cruiser-prado','lite-ace','mark-ii','mark-x','mirai','model-f','noah','opa','paseo','passo','pick-up','picnic','platz','premio',
                                   'previa','prius','prius+','proace-city','ractis','raum','rav-4','seqoia','sienna','solara','sprinter','starlet','supra','tacoma','tercel','town-ace','tundra',
                                   'urban-cruiser','venza','verossa','verso','verso-s','vista','vitz','voxy','will','windom','wish','yaris','yaris-cross']
    reset_current_year(user = adj_user, year_to_reset_to = year_to_reset_to,
                       models_to_reset = models_to_reset)

## Adding New Models

Since some models are named "e-klasse-(alle)", we needed to disentagle them for a more transparent view on our data. This is done here.

In [7]:
do_22 = False
adj_user2 = "chris"
start_year = 1990
curr_year = 1990
end_year = 2024
brand = "mercedes-benz"
models_to_add = ["e-200", "e-220", "e-230", "e-240", "e-250", "e-260", "e-270", "e-280", "e-290",
                 "e-300", "e-320", "e-350", "e-36-amg", "e-400", "e-420", "e-43-amg", "e-430", "e-450",
                 "e-50-amg", "e-500", "e-53-amg", "e-55-amg", "e-550", "e-60-amg", "e-63-amg", "glc-200", "glc-220", "glc-250",
                 "glc-300", "glc-300", "glc-350", "glc-400", "glc-43-amg", "glc-450", "glc-63-amg", "gle-250",
                 "gle-300", "gle-350", "gle-400", "gle-43-amg", "gle-450", "gle-500", "gle-53-amg", "gle-580",
                 "gle-63-amg"]
if do_22:
    add_models_to_logging(user = adj_user2,
                          start_year = start_year,
                          curr_year = curr_year,
                          end_year = end_year,
                          brand = brand,
                          models_to_add = models_to_add)

## Sorting the Logging DF

In [8]:
do_23 = False
adj_user3 = "chris"
models_to_sort = ["e-200", "e-220", "e-230", "e-240", "e-250", "e-260", "e-270", "e-280", "e-290",
                 "e-300", "e-320", "e-350", "e-36-amg", "e-400", "e-420", "e-43-amg", "e-430", "e-450",
                 "e-50-amg", "e-500", "e-53-amg", "e-55-amg", "e-550", "e-60-amg", "e-63-amg", "glc-200", "glc-220", "glc-250",
                 "glc-300", "glc-300", "glc-350", "glc-400", "glc-43-amg", "glc-450", "glc-63-amg", "gle-250",
                 "gle-300", "gle-350", "gle-400", "gle-43-amg", "gle-450", "gle-500", "gle-53-amg", "gle-580",
                 "gle-63-amg"]
if do_23:
    prioritize_models(models_to_sort = models_to_sort,
                      user = adj_user3)


## Checking on Logging DFs

In [7]:
logging_df_test = pd.read_csv("logging/logging_df_" + str(user) + ".csv")
logging_df_test[(logging_df_test["user"] == user) & (logging_df_test["curr_year"] == 2025)]

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
307,christopher,skoda,105,1990.0,2025.0,2024.0,not yet scraped
308,christopher,skoda,120,1990.0,2025.0,2024.0,not yet scraped
309,christopher,skoda,130,1990.0,2025.0,2024.0,not yet scraped
310,christopher,skoda,135,1990.0,2025.0,2024.0,not yet scraped
311,christopher,skoda,citigo,1990.0,2025.0,2024.0,not yet scraped
...,...,...,...,...,...,...,...
485,christopher,toyota,yaris,1990.0,2025.0,2024.0,not yet scraped
486,christopher,toyota,yaris-cross,1990.0,2025.0,2024.0,not yet scraped
492,christopher,fiat,130,1990.0,2025.0,2024.0,not yet scraped
608,christopher,volvo,944,1990.0,2025.0,2024.0,not yet scraped


In [None]:
logging_df_test