In [None]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

In [None]:
# data preprocessing
df = pd.read_csv('formresponses.csv')

columns = ['ALIAS','viz','stats','math','art','ui','code','graph','hci','eval','comm','collab','GIT','avg']
df2 = df[columns].copy()
df2.dropna(subset=columns[1:], inplace=True)
names = ['Alias','Visualization','Statistics','Math','Art','UI','Coding','Graphics','HCI','Evaluation','Communication','Collaboration','GIT','Average']
df2.rename(columns=dict(zip(columns, names)), inplace=True)

columns2 = ['ALIAS','If you are studying at a university other than KTH, which is it?','In what year did you start your university degree ?','What year and month you expect to graduate?','MAJOR','What degree are you pursuing?','Please, tell me about yourself. What interest you? Do you have any hobbies?','What do you expect to learn in Information Visualization? How do expect to use what you learn?','What courses have you completed that are relevant to Information Visualization, where and when?','Do you use KTH Canvas?']
df3 = df[columns2].copy()

In [None]:
# sort (descending)
def sort_dataframe(column_name):
    if column_name != 'Alias':
        sorted_df = df2.sort_values(column_name, ascending=False)
    else:
        sorted_df = df2
    display(sorted_df)
column_dropdown = widgets.Dropdown(
    options=[col for col in names if col != 'Alias'],
    description='Sort by:',
    layout=widgets.Layout(width='300px')
)
widgets.interact(sort_dataframe, column_name=column_dropdown)

# box plot
plt.figure(figsize=(10, 6))
sns.boxplot(data=df2[names[1:]])
plt.xticks(rotation=45, ha='right', fontsize=8)
plt.ylabel('Value')
plt.show()

# scatter plot
def scatterplot(column_name):
    if column_name != 'Average':
        plt.figure(figsize=(12, 9))
        plt.scatter(df2[column_name], df2['Average'], alpha=0.5)
        plt.xlabel(column_name)
        plt.ylabel('Average')
        plt.title(f'{column_name} vs Average')
        plt.grid(True)

        overlapping_aliases = {}
        for i, alias in enumerate(df2['Alias']):
            x = df2[column_name][i]
            y = df2['Average'][i]
            if (x, y) in overlapping_aliases:
                overlapping_aliases[(x, y)].append(alias)
            else:
                overlapping_aliases[(x, y)] = [alias]
        for position, aliases in overlapping_aliases.items():
            if len(aliases) > 1:
                aliases_text = ', '.join(aliases)
            else:
                aliases_text = aliases[0]
            plt.annotate(aliases_text, position, fontsize=8)
        plt.show()

scatter_dropdown = widgets.Dropdown(
    options=[col for col in names if col != 'Alias' and col != 'Average'],
    description='Sort by:',
    layout=widgets.Layout(width='300px')
)
widgets.interact(scatterplot, column_name=scatter_dropdown)

In [None]:
# compare potential groupmates
alias_dropdowns = []
for i in range(5):
    dropdown = widgets.Dropdown(
        options=df3['ALIAS'],
        description=f'Select Alias {i+1}:'
    )
    alias_dropdowns.append(dropdown)
output = widgets.Output(layout={'max_height': '800px', 'overflow': 'scroll'})

def on_alias_dropdown_change(change):
    selected_aliases = [dropdown.value for dropdown in alias_dropdowns if dropdown.value]
    selected_rows = df3[df3['ALIAS'].isin(selected_aliases)]
    selected_rows = selected_rows.set_index('ALIAS').loc[selected_aliases].reset_index()
    with output:
        output.clear_output()
        display(HTML(selected_rows.to_html(index=False)))

for dropdown in alias_dropdowns:
    dropdown.observe(on_alias_dropdown_change, names='value')
display(widgets.VBox([widgets.HBox(alias_dropdowns), output]))