# Dictionary Structure for User Engagement Charts

In this notebook, I have created a dictionary named `user_action_dictionaries` which stores the charts of each course and also the overall engagement charts term-wise. 

## Structure of the Dictionary

```python
{
    "t1-2025": {
        "overall": {
            "raw_metrics": "raw_df",
            "unnormalized_scores": "unnormalized_df",
            "log_normalized_scores": "log_normalized_df"
        },
        "subject_1": {
            "raw_metrics": "raw_df",
            "unnormalized_scores": "unnormalized_df",
            "log_normalized_scores": "log_normalized_df"
        },
        "subject_2": {
            "raw_metrics": "raw_df",
            "unnormalized_scores": "unnormalized_df",
            "log_normalized_scores": "log_normalized_df"
        }
    },
    "t3-2024": {
        "overall": {
            "raw_metrics": "raw_df",
            "unnormalized_scores": "unnormalized_df",
            "log_normalized_scores": "log_normalized_df"
        },
        "subject_1": {
            "raw_metrics": "raw_df",
            "unnormalized_scores": "unnormalized_df",
            "log_normalized_scores": "log_normalized_df"
        }
    },
    "t2-2024": "...same",
    "t1-2024": "...same"
}


In [None]:
import pandas as pd
import numpy as np

# Imports from other programs
from global_functions_1 import sanitize_filepath, get_current_trimester, get_previous_trimesters,get_all_course_specific_df, get_overall_engagement_df
from fetch_category_IDs_107 import df_map_category_to_id

params={"page": "0"}
params={"page": "1"}


In [2]:
curr_plus_prev_trimesters = get_previous_trimesters(get_current_trimester())[:2] # The items of this list will act as keys of the dictionary.
print(curr_plus_prev_trimesters)

['t1-2025', 't3-2024']


In [3]:
def get_trimester_dates(trimester): # THIS WILL HELP IN SENDING DATES AS PARAMS FOR QUERIES LIKE 103
    t, y = trimester.split('-')
    trimester_dates = {
        't1': ('01/01', '30/04'),
        't2': ('01/05', '31/08'),
        't3': ('01/09', '31/12')
    }
    return (f"{trimester_dates[t][0]}/{y}",f"{trimester_dates[t][1]}/{y}")
get_trimester_dates("t3-2025")

('01/09/2025', '31/12/2025')

In [4]:
user_actions_dictionaries = {}
for k in curr_plus_prev_trimesters:
	user_actions_dictionaries[k] = {}
user_actions_dictionaries

{'t1-2025': {}, 't3-2024': {}}

In [None]:
error_list = []
count = 0
for row in df_map_category_to_id.itertuples():
    # count+=1
    # if count%10!=0: continue
    try:
        category_id = row.category_id
        category_name = sanitize_filepath(row.name).lower() # Proper filenames without :," " etc
        for key in user_actions_dictionaries: # keys are actually the terms, like "t1-2025","t3-2024"
            try:
                if category_name not in user_actions_dictionaries[key]:
                    user_actions_dictionaries[key][category_name] = {}
                    user_actions_dictionaries[key][category_name]["raw_metrics"] = pd.DataFrame()
                    user_actions_dictionaries[key][category_name]["unnormalized_scores"] = pd.DataFrame()
                    user_actions_dictionaries[key][category_name]["log_normalized_scores"] = pd.DataFrame()
                term = key
                start_date, end_date = get_trimester_dates(term)
                params = {"category_id": str(category_id), "start_date": start_date, "end_date": end_date}
                
                raw_metrics_df, unnormalized_scores_df, log_normalized_scores_df = get_all_course_specific_df(query_params=tuple(params.items()))

                # if not user_actions_df.empty and len(user_actions_df)>75: # THIS WILL BE IMPLEMENTED LATER AFTER DISCUSSION
                    
                user_actions_dictionaries[key][category_name]["raw_metrics"] = raw_metrics_df # So now we have the raw metrics for each category for each term.

                user_actions_dictionaries[key][category_name]["unnormalized_scores"] = unnormalized_scores_df

                user_actions_dictionaries[key][category_name]["log_normalized_scores"] = log_normalized_scores_df

            except Exception as exec:
                print(f"Error; {exec} for subject: {category_name} for term: {term}")
                error_list.append(key,category_name,exec)
                continue
    except Exception as exec:
        print(f"Error; {exec} for subject: {category_name}")
        error_list.append(key,category_name,exec)
        continue

In [6]:
# for key in user_actions_dictionaries.keys():
#     print(*list(user_actions_dictionaries[key].keys()), sep="\n", end="\n*****\n")

In [7]:
# user_actions_dictionaries["t1-2025"]["database_management_systems"].keys()

In [8]:
# user_actions_dictionaries["t1-2025"]["database_management_systems"]["raw_metrics"].head()
# user_actions_dictionaries["t1-2025"]["database_management_systems"]["unnormalized_scores"].head()
# user_actions_dictionaries["t1-2025"]["database_management_systems"]["log_normalized_scores"].head()

In [9]:
for key in user_actions_dictionaries:
    term=key
    start_date, end_date = get_trimester_dates(term)
    params = {"start_date": start_date, "end_date": end_date}
    
    user_actions_dictionaries[term]["overall"] = {
        "raw_metrics": pd.DataFrame(),
        "unnormalized_scores": pd.DataFrame(),
        "log_normalized_scores": pd.DataFrame()
    }
    raw_metrics_all_users_df, unnormalized_scores_all_users_df, log_normalized_scores_all_users_df = get_overall_engagement_df(query_params=tuple(params.items()))

    user_actions_dictionaries[term]["overall"]["raw_metrics"] = raw_metrics_all_users_df
    user_actions_dictionaries[term]["overall"]["unnormalized_scores"] = unnormalized_scores_all_users_df
    user_actions_dictionaries[term]["overall"]["log_normalized_scores"] = log_normalized_scores_all_users_df
    

params={"page": "0", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "1", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "2", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "3", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "4", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "0", "start_date": "01/09/2024", "end_date": "31/12/2024"}
params={"page": "1", "start_date": "01/09/2024", "end_date": "31/12/2024"}
params={"page": "2", "start_date": "01/09/2024", "end_date": "31/12/2024"}
params={"page": "3", "start_date": "01/09/2024", "end_date": "31/12/2024"}
params={"page": "4", "start_date": "01/09/2024", "end_date": "31/12/2024"}


In [12]:
term = "t1-2025"
user_actions_dictionaries[term]["overall"].keys()

dict_keys(['raw_metrics', 'unnormalized_scores', 'log_normalized_scores'])

In [15]:
user_actions_dictionaries[term]["overall"]["log_normalized_scores"].head()

Unnamed: 0,user_id,likes_given,likes_received,topics_created,posts_created,days_visited,solutions,initial_score,z_score
802,933,5.268,5.858,3.466,5.313,4.745,2.89,27.54,11.31
14,11,5.46,6.729,3.219,5.298,4.736,1.609,27.051,11.11
3335,4025,4.625,4.575,3.434,4.745,4.71,2.89,24.979,10.23
303,348,4.111,5.142,1.946,5.631,4.466,3.219,24.515,10.03
2,-2,3.466,4.22,8.242,8.42,0.0,0.0,24.348,9.96
