In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
from wordcloud import WordCloud 
import textwrap 

In [None]:
turn_level_df = pd.read_csv('./turn_level_data.csv') 
topic_level_df = pd.read_csv('./topic_level_data.csv') 
report_level_df = pd.read_csv('./report_level_data.csv')

concept_map = topic_level_df[['concept_name', 'concept_prompt']].drop_duplicates().set_index('concept_name')['concept_prompt'].to_dict()

In [None]:
topic_score = (topic_level_df.groupby(['user', 'concept_name'])['score'].sum() / topic_level_df.groupby('user')['score'].sum()).reset_index().groupby('concept_name')['score'].mean() 

In [None]:
user = 'cchen'
user_turn_level_df = turn_level_df.loc[turn_level_df['user'] == user].copy() 
user_topic_level_df = topic_level_df.loc[topic_level_df['user'] == user].copy() 
user_topic_score = user_topic_level_df.groupby('concept_name')['score'].sum() / user_topic_level_df['score'].sum()
user_report_df = report_level_df.loc[report_level_df['user'] == user]

## Conversation Behavior 

In [None]:
data = [
    {'Metric': 'Total Turns', 'Value': len(user_turn_level_df)}, 
    {'Metric': 'Number of AI Turns', 'Value': (user_turn_level_df['role'] == 'assistant').sum()},
    {'Metric': 'Number of User Turns', 'Value': (user_turn_level_df['role'] == 'user').sum()},
] 
data.append({
    'Metric': 'Avg Number of Words in User Response', 
    'Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'user']['word_count'].mean():,.2f}"
})
data.append({
    'Metric': 'Avg Number of Minutes Spent for User Response', 
    'Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'user']['time_spent'].mean():,.2f}"
})
for col in ['relevance', 'specificity', 'clarity', 'constructiveness', 'politeness', 'sentiment', 'overall']: 
    data.append({
        'Metric': f"Avg {col.capitalize()} Score", 
        'Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'user'][col].mean():,.2f}"
    })
data.append({
    'Metric': 'Most Discussed Topic', 
    'Value': f"{user_topic_score.idxmax()} ({user_topic_score.max():,.2%})"
})
data.append({
    'Metric': 'Topic Variation', 
    'Value': f"{user_topic_score.std():,.2%}"
})
pd.DataFrame(data)

In [None]:
fig, ax = plt.subplots(figsize=(10, 8), facecolor='w') 
data = user_topic_score.sort_values().copy() 
# data.index = [textwrap.fill(concept_map[item].strip(), width=40) for item in data.index]
data.plot.barh(ax=ax)
ax.set_xlabel('Percent of Topic Coverage')
ax.set_ylabel('Topic')
ax.xaxis.set_major_formatter("{x:,.0%}")

In [None]:
user_topic_level_df.loc[user_topic_level_df['score'] == 1].groupby('concept_name').agg(
    num_turns=pd.NamedAgg(column='content_id', aggfunc='count'), 
    avg_word_count=pd.NamedAgg(column='word_count', aggfunc='mean'),
    avg_time_spent=pd.NamedAgg(column='time_spent', aggfunc='mean'),
    avg_relevance=pd.NamedAgg(column='relevance', aggfunc='mean'),
    avg_specificity=pd.NamedAgg(column='specificity', aggfunc='mean'),
    avg_clarity=pd.NamedAgg(column='clarity', aggfunc='mean'),
    avg_constructiveness=pd.NamedAgg(column='constructiveness', aggfunc='mean'),
    avg_politeness=pd.NamedAgg(column='politeness', aggfunc='mean'),
    avg_sentiment=pd.NamedAgg(column='sentiment', aggfunc='mean'),
    avg_overall=pd.NamedAgg(column='overall', aggfunc='mean'),
    avg_semantic_similarity=pd.NamedAgg(column='semantic_similarity', aggfunc='mean'),
)

In [None]:
wordcloud = WordCloud(
    background_color='white', 
    colormap='coolwarm'
).generate(' '.join([' '.join(eval(kw_list)) for kw_list in user_turn_level_df.loc[user_turn_level_df['role'] == 'user']['keywords']]))
fig, ax = plt.subplots(figsize=(10, 8), facecolor='w') 
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
plt.show() 

## Comparison to Peers

In [None]:
data = [
    {'Metric': 'Total Turns', 'User Value': len(user_turn_level_df), 'Avg Student Value': turn_level_df.groupby('user')['time'].count().mean()}, 
    {'Metric': 'Number of AI Turns', 'User Value': (user_turn_level_df['role'] == 'assistant').sum(), 'Avg Student Value': turn_level_df.loc[turn_level_df['role'] == 'assistant'].groupby('user')['time'].count().mean()}, 
    {'Metric': 'Number of User Turns', 'User Value': (user_turn_level_df['role'] == 'user').sum(), 'Avg Student Value': turn_level_df.loc[turn_level_df['role'] == 'user'].groupby('user')['time'].count().mean()}, 
] 
data.append({
    'Metric': 'Avg Number of Words in User Response', 
    'User Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'user']['word_count'].mean():,.2f}", 
    'Avg Student Value': f"{turn_level_df.loc[turn_level_df['role'] == 'user'].groupby('user')['word_count'].mean().mean():,.2f}"
})
data.append({
    'Metric': 'Avg Number of Words in AI Response', 
    'User Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'assistant']['word_count'].mean():,.2f}", 
    'Avg Student Value': f"{turn_level_df.loc[turn_level_df['role'] == 'assistant'].groupby('user')['word_count'].mean().mean():,.2f}"
})
data.append({
    'Metric': 'Avg Number of Minutes Spent for User Response', 
    'User Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'user']['time_spent'].mean():,.2f}", 
    'Avg Student Value': f"{turn_level_df.loc[turn_level_df['role'] == 'user'].groupby('user')['time_spent'].mean().mean():,.2f}"
})
for col in ['relevance', 'specificity', 'clarity', 'constructiveness', 'politeness', 'sentiment', 'overall']: 
    data.append({
        'Metric': f"Avg {col.capitalize()} Score", 
        'User Value': f"{user_turn_level_df.loc[user_turn_level_df['role'] == 'user'][col].mean():,.2f}", 
        'Avg Student Value': f"{turn_level_df.loc[turn_level_df['role'] == 'user'].groupby('user')[col].mean().mean():,.2f}"
    })
data.append({
    'Metric': 'Most Discussed Topic', 
    'User Value': f"{user_topic_score.idxmax()} ({user_topic_score.max():,.2%})", 
    'Avg Student Value': f"{topic_score.idxmax()} ({topic_score.max():,.2%})"
})
data.append({
    'Metric': 'Topic Variation', 
    'User Value': f"{user_topic_score.std():,.2%}", 
    'Avg Student Value': f"{topic_score.std():,.2%}"
})
pd.DataFrame(data)

In [None]:
wordcloud = WordCloud(
    background_color='white', 
    colormap='coolwarm'
).generate(' '.join([' '.join(eval(kw_list)) for kw_list in turn_level_df.loc[turn_level_df['role'] == 'user']['keywords']]))
fig, ax = plt.subplots(figsize=(10, 8), facecolor='w') 
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
plt.show() 

In [None]:
df = pd.DataFrame({
    'User': user_topic_score.sort_values().copy(),
    'Avg Student': topic_score.reindex(user_topic_score.sort_values().index).copy()
})
# df.index = [textwrap.fill(concept_map[item].strip(), width=40) for item in df.index]

fig, ax = plt.subplots(figsize=(10, 8), facecolor='w')
df.plot.barh(ax=ax)

ax.set_xlabel('Percent of Topic Coverage')
ax.set_ylabel('Topic')
ax.legend(title='Data Series')
ax.xaxis.set_major_formatter('{x:,.0%}')

In [None]:
fig, ax = plt.subplots(figsize=(10, 6), facecolor='w')
data = user_topic_score - topic_score 
# data.index = [textwrap.fill(concept_map[item].strip(), width=40) for item in data.index]
data.plot.barh(ax=ax)
ax.set_xlabel('User Percent of Topic Coverage Difference from Average')
ax.set_ylabel('Topic')
ax.xaxis.set_major_formatter("{x:,.0%}")

## Correlation with Final Report Quality 

In [None]:
user_turn_scores = turn_level_df.loc[turn_level_df['role'] == 'user'].groupby('user')[['word_count', 'time_spent', 'relevance', 'specificity', 'clarity', 'constructiveness', 'politeness', 'sentiment', 'overall', 'semantic_similarity']].mean()
user_turn_scores.columns = [f"user_{col}" for col in user_turn_scores.columns]
report_level_df = report_level_df.merge(user_turn_scores.reset_index(), on='user')

In [None]:
report_level_df[['user_word_count', 'user_time_spent', 'user_relevance', 'user_specificity', 'user_clarity', 'user_constructiveness', 'user_politeness', 'user_sentiment', 'user_overall', 'user_semantic_similarity', 'overall']].corr()['overall']