# Create Logging Data

In this script we are creating logging data frames in order to ensure that we avoid as many mistakes as possible in our data retrieving process and be as transparent as possbile.

## Preamble
Loading the necessary libraries and setting the `user` that should be used.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
%run Logging_Dictionary.ipynb
%run Logging_Functions.ipynb

In [2]:
# user = "chris" handled below now!

## Instantiating the Logging DF

In [2]:
start_year = int(2005)
end_year = int(2024)
runner = 0

user_list = ["chris", "christopher", "janik", "chris2", "recent_data"]

logging_df = instantiate_logging_df(start_year = start_year, end_year = end_year,
                                    user_list = user_list,
                                    model_dictionaries = model_dictionaries)
logging_df

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
0,chris,mercedes-benz,170,2005.0,2005.0,2024.0,not yet scraped
0,chris,mercedes-benz,180,2005.0,2005.0,2024.0,not yet scraped
0,chris,mercedes-benz,190,2005.0,2005.0,2024.0,not yet scraped
0,chris,mercedes-benz,200,2005.0,2005.0,2024.0,not yet scraped
0,chris,mercedes-benz,208,2005.0,2005.0,2024.0,not yet scraped
...,...,...,...,...,...,...,...
0,recent_data,bmw,z3,2005.0,2005.0,2024.0,not yet scraped
0,recent_data,bmw,z3-m,2005.0,2005.0,2024.0,not yet scraped
0,recent_data,bmw,z4,2005.0,2005.0,2024.0,not yet scraped
0,recent_data,bmw,z4-m,2005.0,2005.0,2024.0,not yet scraped


## Saving new Logging DFs

In [3]:
save_new_logging_dfs = True
saving_user = "christopher"

if save_new_logging_dfs:
    logging_df.to_csv("logging_data/logging_df_" + str(saving_user) + ".csv", index = False)

# Adjusting the Logging DF

## Resetting the Curr Year of a Model

In [14]:
do_21 = False

if do_21:
    adj_user = "janik"
    year_to_reset_to = 2025
    models_to_reset = ["aerostar", "b-max", "bronco", "c-max", "capri", "connect-elektro", "consul", "cougar", "courier", "crown",
                                 "customline", "econoline", "econovan", "ecosport", "edge", "escape", "escort", "excursion", "expedition",
                                 "explorer", "express", 
                                 "f-1", "f-100", "f-150", "f-250", "f-350", "f-360", "f-450", "f-550", "f-650", "f-super-duty", 
                                 "fairlane", "falcon", "fiesta", "flex", "focus", "focus-c-max", "focus-cc", "freestar", "freestyle", "fusion",
                                 "galaxy", "gran-torino", "granada", "grand-c-max", "grand-tourneo", "gt", "ka%2Fka+", "kuga", "m", "maverick", "mercury",
                                 "mondeo", "mustang", "mustang-mach-e", "orion", "probe", "puma", "ranger", "ranger-raptor", "rs-200", "s-max", "scorpio",
                                 "sierra", "sportka", "streetka", "taunus", "taurus", "thunderbird", "torino", "tourneo", "tourneo-connect", "tourneo-courier",
                                 "tourneo-custom", "e-transit", "transit", "transit-bus", "transit-connect", "transit-courier", "transit-custom", "windstar",
                      "1310", "berlina", "break", "dokker", "double-cap", "drop-side", "duster", "jogger", "lodgy", "logan", "nova", "pick-up", "sandero", "solenza", "spring", 
                      "besta", "carens", "carnival", "ceed-%2F-cee'd", "ceed-sw-%2F-cee'd-sw", "cerato", "clarus", "e-niro", "elan", "ev6", "ev9", "joice", "k2500", "k2700", "k2900", "leo", "magentis",
                                "mentor", "mohave%2Fborrego", "niro", "opirus", "optima", "picanto", "pregio", "pride", "proceed-%2F-pro_cee'd", "retona", "rio", "roadster", "rocsta",
                                "sephia", "shuma", "sorento", "soul", "spectra", "sportage", "stinger", "stonic", "venga", "xceed",
                       "brabus", "city-coupé%2Fcity-cabrio", "crossblade", "forfour", "fortwo", "roadster", "smart-%231", "smart-%233"]
    reset_current_year(user = adj_user, year_to_reset_to = year_to_reset_to,
                       models_to_reset = models_to_reset)

## Adding New Models

Since some models are named "e-klasse-(alle)", we needed to disentagle them for a more transparent view on our data. This is done here.

In [7]:
do_22 = False
adj_user2 = "chris"
start_year = 1990
curr_year = 1990
end_year = 2024
brand = "mercedes-benz"
models_to_add = ["e-200", "e-220", "e-230", "e-240", "e-250", "e-260", "e-270", "e-280", "e-290",
                 "e-300", "e-320", "e-350", "e-36-amg", "e-400", "e-420", "e-43-amg", "e-430", "e-450",
                 "e-50-amg", "e-500", "e-53-amg", "e-55-amg", "e-550", "e-60-amg", "e-63-amg", "glc-200", "glc-220", "glc-250",
                 "glc-300", "glc-300", "glc-350", "glc-400", "glc-43-amg", "glc-450", "glc-63-amg", "gle-250",
                 "gle-300", "gle-350", "gle-400", "gle-43-amg", "gle-450", "gle-500", "gle-53-amg", "gle-580",
                 "gle-63-amg"]
if do_22:
    add_models_to_logging(user = adj_user2,
                          start_year = start_year,
                          curr_year = curr_year,
                          end_year = end_year,
                          brand = brand,
                          models_to_add = models_to_add)

## Sorting the Logging DF

In [8]:
do_23 = False
adj_user3 = "chris"
models_to_sort = ["e-200", "e-220", "e-230", "e-240", "e-250", "e-260", "e-270", "e-280", "e-290",
                 "e-300", "e-320", "e-350", "e-36-amg", "e-400", "e-420", "e-43-amg", "e-430", "e-450",
                 "e-50-amg", "e-500", "e-53-amg", "e-55-amg", "e-550", "e-60-amg", "e-63-amg", "glc-200", "glc-220", "glc-250",
                 "glc-300", "glc-300", "glc-350", "glc-400", "glc-43-amg", "glc-450", "glc-63-amg", "gle-250",
                 "gle-300", "gle-350", "gle-400", "gle-43-amg", "gle-450", "gle-500", "gle-53-amg", "gle-580",
                 "gle-63-amg"]
if do_23:
    prioritize_models(models_to_sort = models_to_sort,
                      user = adj_user3)


## Checking on Logging DFs

In [15]:
user = "janik"
logging_df_test = pd.read_csv("logging_data/logging_df_" + str(user) + ".csv")
logging_df_test[(logging_df_test["user"] == user) & (logging_df_test["curr_year"] == 2025)]

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
774,janik,volkswagen,181,1990,2025,2024,2024-05-18 14 18
775,janik,volkswagen,amarok,1990,2025,2024,2024-05-18 14 26
776,janik,volkswagen,anfibio,1990,2025,2024,2024-05-18 14 29
777,janik,volkswagen,arteon,1990,2025,2024,2024-05-18 19 37
778,janik,volkswagen,atlas,1990,2025,2024,2024-05-18 19 40
...,...,...,...,...,...,...,...
1093,janik,smart,roadster,1990,2025,2024,not yet scraped
1094,janik,smart,smart-%231,1990,2025,2024,not yet scraped
1095,janik,smart,smart-%233,1990,2025,2024,not yet scraped
1127,janik,mazda,pick-up,1990,2025,2024,not yet scraped


In [16]:
logging_df_test

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
0,chris,mercedes-benz,170,1990,1990,2024,not yet scraped
1,chris,mercedes-benz,180,1990,1990,2024,not yet scraped
2,chris,mercedes-benz,190,1990,1990,2024,not yet scraped
3,chris,mercedes-benz,200,1990,1990,2024,not yet scraped
4,chris,mercedes-benz,208,1990,1990,2024,not yet scraped
...,...,...,...,...,...,...,...
1387,chris2,renault,twizy,1990,1990,2024,not yet scraped
1388,chris2,renault,vel-satis,1990,1990,2024,not yet scraped
1389,chris2,renault,wind,1990,1990,2024,not yet scraped
1390,chris2,renault,zoe,1990,1990,2024,not yet scraped


In [17]:
logging_df_test[(logging_df_test["user"] == user) & (logging_df_test["curr_year"] == 1990)]

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
1030,janik,ford,sonstige,1990,1990,2024,not yet scraped
1046,janik,dacia,sonstige,1990,1990,2024,not yet scraped
1087,janik,kia,sonstige,1990,1990,2024,not yet scraped
1096,janik,smart,sonstige,1990,1990,2024,not yet scraped
1097,janik,mazda,121,1990,1990,2024,not yet scraped
...,...,...,...,...,...,...,...
1211,janik,land-rover,range-rover-evoque,1990,1990,2024,not yet scraped
1212,janik,land-rover,range-rover-sport,1990,1990,2024,not yet scraped
1213,janik,land-rover,range-rover-velar,1990,1990,2024,not yet scraped
1214,janik,land-rover,series,1990,1990,2024,not yet scraped


In [18]:
logging_df_test[(logging_df_test["user"] == user)]

Unnamed: 0,user,brand,model,start_year,curr_year,end_year,last_scraped
774,janik,volkswagen,181,1990,2025,2024,2024-05-18 14 18
775,janik,volkswagen,amarok,1990,2025,2024,2024-05-18 14 26
776,janik,volkswagen,anfibio,1990,2025,2024,2024-05-18 14 29
777,janik,volkswagen,arteon,1990,2025,2024,2024-05-18 19 37
778,janik,volkswagen,atlas,1990,2025,2024,2024-05-18 19 40
...,...,...,...,...,...,...,...
1211,janik,land-rover,range-rover-evoque,1990,1990,2024,not yet scraped
1212,janik,land-rover,range-rover-sport,1990,1990,2024,not yet scraped
1213,janik,land-rover,range-rover-velar,1990,1990,2024,not yet scraped
1214,janik,land-rover,series,1990,1990,2024,not yet scraped
