# Logging Functions

In this script we define the `logging functions` we are using to create the logging data frames.

## Instantiation

Instantiation of the logging data frames.

In [None]:
def instantiate_logging_df(start_year, end_year, user_list, model_dictionaries):
    # Initialize variables
    runner = 0
    
    # Create an empty DataFrame for logging
    logging_df = pd.DataFrame({"user" : [], "brand" : [], "model" : [], "start_year" : [], "curr_year" : [], "end_year" : [], "last_scraped" : []})
    
    # Iterate over user, brand, and model combinations to populate the logging DataFrame
    for curr_user in model_dictionaries.keys():
        for curr_brand in model_dictionaries[curr_user].keys():
            for curr_model in model_dictionaries[curr_user][curr_brand]:
                # Create a DataFrame for each model entry
                curr_df = pd.DataFrame({"user" : [user_list[runner]], "brand" : [curr_brand], "model" : [curr_model], "start_year" : [start_year], "curr_year" : [start_year], "end_year" : [end_year], "last_scraped" : ["not yet scraped"]})
                # Concatenate the current DataFrame with the main logging DataFrame
                logging_df = pd.concat([logging_df, curr_df])
                
        # Increment the runner to track user index
        runner += 1
        
    return(logging_df)

#### Logging DF sorting (prioritizing) functions

In [7]:
# This function prioritizes a specific brand in the logging DataFrame for a given user.
def prioritize_brand(brand_to_sort, user):
    
    # Read the logging DataFrame
    logging_df = pd.read_csv("logging_data/logging_df_" + str(user) + ".csv")
    
    # Check if the brand exists in the logging DataFrame
    if brand_to_sort in logging_df["brand"].unique():
        
        # Move the rows with the specified brand to the top
        logging_df = pd.concat([logging_df[logging_df['brand'] == brand_to_sort], logging_df[logging_df['brand'] != brand_to_sort]])
        # Save the modified DataFrame back to the file
        logging_df.to_csv("logging/logging_df_" + str(user) + ".csv", index = False)
        
    else:
        # Print a message if the brand is not found in the DataFrame
        print("Brand '" + str(brand_to_sort) + "' not found!")

In [None]:
# This function prioritizes specific models in the logging DataFrame for a given user.
def prioritize_models(models_to_sort, user):
    
    # Read the logging DataFrame
    logging_df = pd.read_csv("logging_data/logging_df_" + str(user) + ".csv")
    
    # Iterate over models to prioritize
    for curr_model_to_sort in models_to_sort:
        
        # Check if the model exists in the logging DataFrame
        if curr_model_to_sort in logging_df["model"].unique():
            
            # Move the rows with the specified model to the top
            logging_df = pd.concat([logging_df[logging_df['model'] == curr_model_to_sort], logging_df[logging_df['model'] != curr_model_to_sort]])
            # Save the modified DataFrame back to the file
            logging_df.to_csv("logging_data/logging_df_" + str(user) + ".csv", index = False)
        else:
            # Print a message if the model is not found in the DataFrame
            print("Model '" + str(model_to_sort) + "' not found!")

#### Logging DF setting start, end and current year.

In [None]:
# This function sets the start and end year for scraping for a given user in the logging DataFrame.
def set_start_and_end_year(user, start_year, end_year):
    
    # Read the logging DataFrame
    logging_df = pd.read_csv("logging_data/logging_df_" + str(user) + ".csv")
    
    # Set the start and end year
    logging_df["start_year"] = start_year
    logging_df["end_year"] = end_year
    
    # Save the modified DataFrame back to the file
    logging_df.to_csv("logging_data/logging_df_" + str(user) + ".csv", index = False)

In [None]:
# This function resets the current year for specified models to a given year in the logging DataFrame for a given user.
def reset_current_year(user, year_to_reset_to, models_to_reset):
    
    # Read the logging DataFrame
    logging_df = pd.read_csv("logging_data/logging_df_" + str(user) + ".csv")
    
    # Iterate over models to reset
    for curr_model_to_reset in models_to_reset:
        
        # Check if the model exists in the logging DataFrame
        if curr_model_to_reset in logging_df["model"].unique():
            # Set the current year for the specified model
            logging_df.loc[logging_df["model"] == curr_model_to_reset, "curr_year"] = year_to_reset_to
            # Save the modified DataFrame back to the file
            logging_df.to_csv("logging_data/logging_df_" + str(user) + ".csv", index = False)

        else:
            # Print a message if the model is not found in the DataFrame
            print("Model '" + str(curr_model_to_reset) + "' not found!")

In [None]:
# This function adds models to the logging DataFrame for a given user.
def add_models_to_logging(user, start_year, curr_year, end_year, brand, models_to_add):
    
    # Read the logging DataFrame
    logging_df = pd.read_csv("logging_data/logging_df_" + str(user) + ".csv")
    
    # Iterate over models to add
    for curr_model_to_add in models_to_add:
        
        # Check if the model is not already present for the specified brand
        if curr_model_to_add not in list(logging_df[logging_df["brand"] == brand]["model"].unique()):
            
            # Create a DataFrame entry for the new model
            curr_entry = pd.DataFrame({"user" : [user], "brand" : [brand], "model" : [curr_model_to_add], "start_year" : [start_year], "curr_year" : [curr_year], "end_year" : [end_year], "last_scraped" : ["not yet scraped"]})
            # Concatenate the new entry with the logging DataFrame
            logging_df = pd.concat([logging_df, curr_entry])
    
    # Save the modified DataFrame back to the file
    logging_df.to_csv("logging_data/logging_df_" + str(user) + ".csv", index = False)