In [43]:
import pandas as pd
import altair as alt
import os
import IPython.display

In [None]:
def create_stacked_bar_chart(raw_metrics, subject, term, path_to_static_folder):
    # Assuming raw_metrics is already loaded
    metrics = ['created_new_topic', 'likes_given', 'likes_received', 'replied', 'solved_a_topic']
    # Only use metrics that exist in the DataFrame
    metrics = [m for m in metrics if m in raw_metrics.columns]
    

    # Filter out metrics where all users have value 0
    filtered_metrics = raw_metrics.set_index('acting_username')[metrics].loc[:, (raw_metrics[metrics] != 0).any()]

    if not filtered_metrics.empty:
        # Convert to long format for Altair
        raw_metrics_long = raw_metrics.melt(id_vars=['acting_username'], 
                                            value_vars=metrics, 
                                            var_name='Activity Type', 
                                            value_name='Count')

        # Create Altair stacked bar chart
        chart = alt.Chart(raw_metrics_long).mark_bar().encode(
            x=alt.X('Count:Q', title="Total User Interactions",
                    axis=alt.Axis(titleFontSize=14)),
            y=alt.Y('acting_username:N', title="Username", sort='-x',
                    axis=alt.Axis(titleFontSize=14)),
            color=alt.Color('Activity Type:N', title="Activity Type"),
            tooltip=['acting_username', 'Activity Type', 'Count']
        ).properties(
            title=f'Most Active Users ({subject})',
            width=600,
            height=400
        ).configure_legend(
            orient='right'
        )

        # Save the chart as an interactive HTML file
        viz_folder = os.path.join(path_to_static_folder, "visualizations", "course_specific_charts", term)  # Folder for visualizations
        os.makedirs(viz_folder, exist_ok=True)
        chart.save(f'{viz_folder}/{subject}.html')

        # print(f"Saved visualization: {subject.lower().replace(' ', '_')}.html")
        # Display the chart in the Jupyter Notebook
        # IPython.display.display(chart)

    else:
        print("No non-zero metrics to display.")

In [None]:
def create_empty_chart():
    text="""Course was either not offered this term OR it had extremely less interactions on discourse"""
    # Create a dummy dataframe with a single row
    df = pd.DataFrame({'x': [0], 'y': [0], 'text': [text]})

    # Create the chart
    chart = alt.Chart(df).mark_text(
        align='center',
        baseline='middle',
        fontSize=18,
        color='gray'
    ).encode(
        x=alt.value(250),  # X position in pixels
        y=alt.value(150),  # Y position in pixels
        text='text:N'
    ).properties(
        width=500,
        height=300
    )

    return chart

In [46]:
# File processing logic remains the same
scores_data_path = "../data/scores"
term_scores_folder_list = list(os.listdir(scores_data_path))
term_scores_folder_list.remove("overall_scores")
path_to_static_folder = "../static"
empty_chart = create_empty_chart() # empty chart will be shown for those courses which had extremely less engagement or which were not available for that term.

for term in term_scores_folder_list:    # e.g term = t1_2024
    term_score_folder_fullpath = os.path.join(scores_data_path,term) # ../data/scores/t1_2024
    # print(term_score_folder_fullpath)
    for file in list(os.listdir(term_score_folder_fullpath)):
        full_path = os.path.join(term_score_folder_fullpath, file) # ../data/scores\t1_2024\Advanced_Algorithms.xlsx
        subject=file.removesuffix(".xlsx").lower()
        # Read "log_normalized_scores" sheet and get top 5 users
        log_normalized_df = pd.read_excel(full_path, sheet_name="log_normalized_scores")
        if not log_normalized_df.empty:
            top_10 = log_normalized_df.head(10).acting_username.to_list()

            # Filter "raw_metrics" for these top 5 users
            raw_metrics = pd.read_excel(full_path, sheet_name="raw_metrics")
            raw_metrics = raw_metrics[raw_metrics.acting_username.isin(top_10)]
            create_stacked_bar_chart(raw_metrics=raw_metrics, 
                                    subject=subject,
                                    term=term, 
                                    path_to_static_folder=path_to_static_folder)
        else: # For empty dataframes, just output a message
            viz_folder = os.path.join(path_to_static_folder, "visualizations", "course_specific_charts", term)  # Folder for visualizations
            os.makedirs(viz_folder, exist_ok=True)
            empty_chart.save(f'{viz_folder}/{subject}.html')

Saved visualization: ai__search_methods_for_problem_solving.html
Saved visualization: big_data_and_biological_networks.html
Saved visualization: business_analytics.html
Saved visualization: business_data_management.html
Saved visualization: computational_thinking.html
Saved visualization: computer_system_design.html
Saved visualization: database_management_systems.html
Saved visualization: data_visualization_design.html
Saved visualization: deep_learning.html
Saved visualization: deep_learning_for_computer_vision.html
Saved visualization: english_i.html
Saved visualization: english_ii.html
Saved visualization: large_language_models.html
Saved visualization: machine_learning_foundations.html
Saved visualization: machine_learning_practice.html
Saved visualization: machine_learning_techniques.html
Saved visualization: managerial_economics.html
Saved visualization: mathematics_for_data_science_i.html
Saved visualization: mathematics_for_data_science_ii.html
Saved visualization: modern_appl