In [None]:
import streamlit as st
import pandas as pd
from collections import defaultdict
from google.cloud import firestore
from google.oauth2 import service_account
import plotly.express as px
import matplotlib.pyplot as plt

In [None]:
# Connection to firebase
fb_credentials = st.secrets["firebase"]
creds = service_account.Credentials.from_service_account_info(fb_credentials)
db = firestore.Client(credentials=creds, project="feature-annotation")

In [None]:
# Annotation counter 
stats = db.collection("stats").stream()
cnt_dict = defaultdict(list)
for stat in stats:
    cnt = stat.to_dict()["annotation_count"]
    cnt_dict[cnt].append(stat.id)

for k, v in cnt_dict.items():
    print(f'{k} annotations: {len(v)} samples')

In [None]:
# Load annotations and preprocess
annotations = db.collection("annotations").stream()
df = pd.DataFrame([a.to_dict() for a in annotations])

df['user_timestamp'] = pd.to_datetime(df['user_timestamp'], unit='s')
df = df.replace('Please', pd.NA)

df.head()

In [None]:
df.user_interp.isna().sum()

In [None]:
# Table of mean user_interp score by component_set_name
print(f'Dropping {df.user_interp.isna().sum()} rows with missing user_interp')
df_interp = df.dropna(subset=['user_interp'])
df_interp['user_interp'] = df_interp['user_interp'].astype(float)
df_interp.groupby('component_set_name')['user_interp'].mean().sort_values(ascending=False).plot(kind='bar', ylabel="Mean interpretability score in %", title="Mean interpretability score by component_set_name")

In [None]:
for i in df_interp['component_set_name'].unique():
    print(f'Component set: {i}')
    dfg = df_interp[df_interp['component_set_name'] == i]
    print(dfg['user_interp'].mean())
    print(dfg['user_interp'].std())
    dfg.groupby('user_interp')['user_interp'].count().plot(kind='bar', ylabel="Count", title=f"Interpretability score distribution for {i}")
    plt.show()


In [None]:
df_interp.groupby('component_set_name')['user_interp'].std().sort_values(ascending=False)

### User annotation times

In [None]:
# Check timestamps by user
df.user_name.value_counts()

In [None]:
user = "Can"
fig = df[df.user_name == user].user_timestamp.hist(bins=50, xlabelsize=10, ylabelsize=10, figsize=(10,5))
fig.set_title(f"{user}'s annotation timestamps")
# df[df.user_name == "cocolugoj"].user_timestamp.dt.hour.hist(bins=50, xlabelsize=10, ylabelsize=10, figsize=(10,5))

### Inter- annotator agreement

In [None]:
cnt_dict_rev = defaultdict(list)
for k in cnt_dict.keys():
    for v in cnt_dict[k]:
        cnt_dict_rev[v].append(k)
df['sample_cnt'] = [cnt_dict_rev[sample_id][0] for sample_id in df.sample_id]

In [None]:
# Calculate the Inter-annotator agreement using pearson correlation for all components which were labeled twice and for each component_set_name separately
df = df.dropna(subset=['user_interp'])
df['user_interp'] = df['user_interp'].astype(float)
df['sample_id'] = df['sample_id'].astype(float)
df = df[df.sample_cnt == 2]
print(f'Pearson correlation of components labeled twice per component set\n')
for comp in sorted(df['component_set_name'].unique()):
    dfg = df[df["component_set_name"] == comp]
    dfg = dfg.sort_values(by=['sample_id'])
    annotations1 = dfg.iloc[::2].reset_index(drop=True)
    annotations2 = dfg.iloc[1::2].reset_index(drop=True)
    print(f'{comp}:\tr = {annotations1.user_interp.corr(annotations2.user_interp):.2f}  (n={len(annotations1)})')


In [None]:
df[df.user_name == "cocolugoj"][df.component_set_name == "sparse_RC"]