In [4]:
import pandas as pd
import altair as alt
import os
import IPython.display

In [5]:


def create_stacked_bar_chart(raw_metrics, subject, path_to_static_folder):
    # Assuming raw_metrics is already loaded
    metrics = ['created_new_topic', 'likes_given', 'likes_received', 'replied', 'solved_a_topic']

    # Filter out metrics where all users have value 0
    filtered_metrics = raw_metrics.set_index('acting_username')[metrics].loc[:, (raw_metrics[metrics] != 0).any()]

    if not filtered_metrics.empty:
        # Convert to long format for Altair
        raw_metrics_long = raw_metrics.melt(id_vars=['acting_username'], 
                                            value_vars=metrics, 
                                            var_name='Activity Type', 
                                            value_name='Count')

        # Create Altair stacked bar chart
        chart = alt.Chart(raw_metrics_long).mark_bar().encode(
            x=alt.X('Count:Q', title="Total User Interactions",
                    axis=alt.Axis(titleFontSize=14)),
            y=alt.Y('acting_username:N', title="Users", sort='-x',
                    axis=alt.Axis(titleFontSize=14)),
            color=alt.Color('Activity Type:N', title="Activity Type"),
            tooltip=['acting_username', 'Activity Type', 'Count']
        ).properties(
            title=f'Most Active Users ({subject})',
            width=600,
            height=400
        ).configure_legend(
            orient='right'
        )

        # Save the chart as an interactive HTML file
        viz_folder = os.path.join(path_to_static_folder, "visualizations")  # Folder for visualizations
        os.makedirs(viz_folder, exist_ok=True)
        chart.save(f'{viz_folder}/most_active_users_{subject.lower().replace(" ", "_")}.html')

        print(f"Saved visualization: most_active_users_{subject.lower().replace(' ', '_')}.html")
        # Display the chart in the Jupyter Notebook
        IPython.display.display(chart)

    else:
        print("No non-zero metrics to display.")

In [6]:
# File processing logic remains the same
file_path = "../subject_wise_engagement/course_excel_data_t1_2024"
path_to_static_folder = "../static"

for file in list(os.listdir(file_path)):
    if "all_users" in file:
        continue  # Skip files that contain "all_users"

    full_path = os.path.join(file_path, file)

    # Read "log_normalized_scores" sheet and get top 5 users
    log_normalized_df = pd.read_excel(full_path, sheet_name="log_normalized_scores")
    top_5 = log_normalized_df.head().acting_username.to_list()

    # Filter "raw_metrics" for these top 5 users
    raw_metrics = pd.read_excel(full_path, sheet_name="raw_metrics")
    raw_metrics = raw_metrics[raw_metrics.acting_username.isin(top_5)]

    create_stacked_bar_chart(raw_metrics, file.removesuffix(".xlsx"), path_to_static_folder)

Saved visualization: most_active_users_database_management_systems.html


Saved visualization: most_active_users_english_ii.html


Saved visualization: most_active_users_machine_learning_foundations.html


Saved visualization: most_active_users_modern_application_development_i.html


Saved visualization: most_active_users_programming_in_python.html
