# This notebook creates some basic visualizations of the data for whole discourse. The data is NOT related to any specific category.

In [None]:
import altair as alt
import pandas as pd
import numpy as np
import os
import IPython.display

def create_stacked_bar_chart(raw_metrics, term, path_to_static_folder):
    # Define the metrics we are interested in
    metrics = ['likes_received', 'likes_given', 'days_visited', 'solutions',
               "topics_created","posts_created"] # 'topics_viewed', 'posts_read' removed

    # Ensure 'username' is included when filtering
    filtered_metrics = raw_metrics.set_index(['user_id', 'username'])[metrics].loc[:, (raw_metrics[metrics] != 0).any()]

    # Transform data to long format
    long_df = filtered_metrics.reset_index().melt(id_vars=["user_id", "username"], var_name="metric", value_name="count")


    if not long_df.empty:
        # Create the Altair stacked bar chart
        chart = alt.Chart(long_df).mark_bar().encode(
            x=alt.X("count:Q", title="Total User Interactions", 
                    axis=alt.Axis(format="~s", titleFontSize=14)),  # Formatting x-axis ticks
            y=alt.Y("username:N", title="Username", sort="-x", 
                    axis=alt.Axis(titleFontSize=14)),  # Changed from user_id to username
            color=alt.Color("metric:N", title="Activity Type"),
            tooltip=["username", "metric", "count"]  # Changed tooltip from user_id to username
        ).properties(
            title=f"Most Active Users for ({term})",
            width=600,
            height=400
        )


        # Save as interactive HTML
        viz_folder = os.path.join(path_to_static_folder, "visualizations", "overall_discourse_charts")  # Folder for visualizations of overall engaagament
        os.makedirs(viz_folder, exist_ok=True)
        chart.save(f'{viz_folder}/most_active_users_{term}.html')

        # Display in Jupyter Notebook
        IPython.display.display(chart)
    else:
        print("No non-zero metrics to display.")


In [2]:
full_path = "../data/scores/overall_scores/t1_2024.xlsx"

# Load the unnormalized scores

unnormalized_df = pd.read_excel(full_path, sheet_name="unnormalized_scores")  # Replace sheet_names[0] with the correct sheet name if needed

# Select top 10 users based on their total engagement score
unnormalized_df = unnormalized_df[unnormalized_df["user_id"]>0]
top_10_users = unnormalized_df.head(10)
top_10_users

Unnamed: 0,user_id,likes_received,likes_given,days_visited,solutions,topics_created,posts_created,initial_score,z_score
1,348,661,266,115,120,27,893,2505.6,66.27
2,933,613,326,121,18,58,571,1260.0,33.29
4,11,584,148,121,11,16,424,975.9,25.76
5,15826,526,200,121,4,15,418,875.7,23.11
6,17066,97,79,115,38,44,305,754.8,19.91
7,23071,27,85,120,42,51,120,616.0,16.23
8,3024,160,240,120,23,19,151,603.3,15.9
9,12082,228,57,119,3,6,401,554.0,14.59
10,25214,22,12,79,39,71,115,545.0,14.35
11,6616,125,322,119,12,23,164,508.5,13.38


In [3]:
id_username_mapping = pd.read_csv("../data/id_username_mapping.csv")
id_username_mapping.sample(5, random_state=42)

Unnamed: 0,user_id,username,name,email
6129,7859,USHA,Usha Bandyopadhyay,21f2000230@ds.study.iitm.ac.in
5170,6677,21f2000456,ATCHAYA A,21f2000456@ds.study.iitm.ac.in
17191,20870,moulik,Moulik Laddha,23f2002041@ds.study.iitm.ac.in
22591,26297,24dp1000010,YOGESH pANDEY,24dp1000010@ds.study.iitm.ac.in
6633,8397,21f3002127,Vanapalli Shanmukha Siva Narendra,21f3002127@ds.study.iitm.ac.in


In [4]:
top_10_users_2 = top_10_users.merge(id_username_mapping, on="user_id")

In [5]:
# Generate Altair visualization
path_to_static_folder = "../static"
all_users_scores_folder = "../data/scores/overall_scores"
for data_file in os.listdir(all_users_scores_folder):
    full_path = os.path.join(all_users_scores_folder, data_file)
    unnormalized_df = pd.read_excel(full_path, sheet_name="unnormalized_scores")
    unnormalized_df = unnormalized_df[unnormalized_df["user_id"]>0]
    top_10_users = unnormalized_df.head(10)
    top_10_users_2 = top_10_users.merge(id_username_mapping, on="user_id")
    
    term = data_file.removesuffix(".xlsx")
    create_stacked_bar_chart(top_10_users_2, term=term, path_to_static_folder=path_to_static_folder)