In [None]:
import streamlit as st
import pandas as pd
from collections import defaultdict
from google.cloud import firestore
from google.oauth2 import service_account
import plotly.express as px

In [None]:
# Connection to firebase
fb_credentials = st.secrets["firebase"]
creds = service_account.Credentials.from_service_account_info(fb_credentials)
db = firestore.Client(credentials=creds, project="feature-annotation")

In [None]:
# Annotation counter 
stats = db.collection("stats").stream()
cnt_dict = defaultdict(list)
for stat in stats:
    cnt = stat.to_dict()["annotation_count"]
    cnt_dict[cnt].append(stat.id)

for k, v in cnt_dict.items():
    print(f'{k} annotations: {len(v)} samples')

In [None]:
# Load annotations and preprocess
annotations = db.collection("annotations").stream()
df = pd.DataFrame([a.to_dict() for a in annotations])

df['user_timestamp'] = pd.to_datetime(df['user_timestamp'], unit='s')
df = df.replace('Please', pd.NA)

df.head()

In [None]:
df.user_interp.isna().sum()

In [None]:
# Table of mean user_interp score by component_set_name
print(f'Dropping {df.user_interp.isna().sum()} rows with missing user_interp')
df_interp = df.dropna(subset=['user_interp'])
df_interp['user_interp'] = df_interp['user_interp'].astype(float)
dd = df_interp.groupby('component_set_name')['user_interp'].mean().sort_values(ascending=False)
df_interp.groupby('component_set_name')['user_interp'].mean().sort_values(ascending=False).plot(kind='bar', ylabel="Mean interpretability score in %", title="Mean interpretability score by component_set_name")

In [None]:
# Check timestamps by user
df.user_name.value_counts()

In [None]:
user = "Can"
fig = df[df.user_name == user].user_timestamp.hist(bins=50, xlabelsize=10, ylabelsize=10, figsize=(10,5))
fig.set_title(f"{user}'s annotation timestamps")
# df[df.user_name == "cocolugoj"].user_timestamp.dt.hour.hist(bins=50, xlabelsize=10, ylabelsize=10, figsize=(10,5))