In [1]:
import pandas as pd
import firebase_admin
from firebase_admin import credentials
from firebase_admin import db
from pathlib import Path
from firebase_admin import firestore
import datetime
import numpy as np

In [2]:
# Fetch the service account key JSON file contents
filename = './key/digital-education-b7441-firebase-adminsdk-o3tx7-26bb9a352d.json'
cred = credentials.Certificate(filename)
default_app = firebase_admin.initialize_app(cred)
db = firestore.client()

In [3]:
def load_df(name):
    col = list(db.collection(name).stream())
    col_dict = list(map(lambda x: x.to_dict(), col))
    df = pd.DataFrame(col_dict)
    return df

In [4]:
df_users = load_df('users')
df_users = df_users.rename(columns={'user':'uuid', 'timestamp':'time'})
df_users.head()

Unnamed: 0,age,language,uuid,username,time
0,23,DE,1f46a7c6-dc11-4f4c-8c22-6f2081e42629,Arne,
1,23,EN,e70c0a67-906c-4e67-95ba-000640f02168,FDNJK,
2,21,EN,257199b2-db6c-43aa-8ec3-57cadf2aaf2b,Bonjez,1669555000000.0
3,23,EN,f76b3145-4588-497b-bb2c-ca5a4e27e300,asdf,1669558000000.0
4,12,EN,f53a4df6-8e30-48da-814c-bd44dc77ddbb,ffds,1669556000000.0


In [5]:
df_logs = load_df('logs')
df_logs = df_logs.rename(columns={'user':'uuid', 'timestamp':'time'})

df_logs.head()

Unnamed: 0,currentactivity,cause,timespent,part,uuid,time,nextactivity,group
0,3.0,screenChange,0.126,Problem Solving,db55aeb4-c195-4626-afd9-69d340affd89,1669549050242,4,
1,15.0,screenChange,0.072,Instruction,c0eff1b8-d3d6-4cb5-a10e-aee380d8796c,1669573947216,16,
2,39.0,screenChange,2.129,Instruction,3ee60990-2dfd-4e6b-8890-02b673b9495e,1669558356144,40,
3,9.0,screenChange,0.064,Instruction,e1449c07-56a9-43ed-936a-6cd0db5a9143,1669575184832,10,
4,24.0,screenChange,0.24,Instruction,7c4e5ae8-9c49-4c49-88db-06bc175970e9,1669543840617,25,


In [6]:
df_boards = load_df('boards')
df_boards.head()

Unnamed: 0,height,uuid,time,id,width,part,slide,gridString
0,1,765f54c0-2d09-4736-a4fb-1e6c202f8bfb,1669573688679,0,4,ps,15,0000
1,1,3b09cd4a-2fc1-4673-9ec8-2a25e3532711,1669573856910,1,9,ps,10,000000000
2,1,c0eff1b8-d3d6-4cb5-a10e-aee380d8796c,1669573963434,0,9,ps,10,000000000
3,8,e1449c07-56a9-43ed-936a-6cd0db5a9143,1669575239444,0,8,assessment,7,0000000000000000000000000000010000000000000000...
4,1,04997195-3320-4372-ad30-6400988107c2,1669573466775,0,9,ps,11,000000000


In [7]:
df_feedback = load_df('feedbacks')
df_feedback.head()

Unnamed: 0,part,message,uuid,time,slide
0,assessment,adf,04997195-3320-4372-ad30-6400988107c2,1669573489004,4
1,assessment,adsf,04997195-3320-4372-ad30-6400988107c2,1669573486029,3
2,ps,Ez,4a893c00-09e0-4df3-8256-745a2f5056a4,1669558720213,5
3,ps,Haha,54619c2f-fda5-4391-a12b-423070a17d15,1669561253981,12
4,ps,gcd,a5590fdc-7172-4b79-ba5a-4d4b06647330,1669558477287,16


In [8]:
df_logs.groupby('uuid').head()

Unnamed: 0,currentactivity,cause,timespent,part,uuid,time,nextactivity,group
0,3.0,screenChange,0.126,Problem Solving,db55aeb4-c195-4626-afd9-69d340affd89,1669549050242,4,
1,15.0,screenChange,0.072,Instruction,c0eff1b8-d3d6-4cb5-a10e-aee380d8796c,1669573947216,16,
2,39.0,screenChange,2.129,Instruction,3ee60990-2dfd-4e6b-8890-02b673b9495e,1669558356144,40,
3,9.0,screenChange,0.064,Instruction,e1449c07-56a9-43ed-936a-6cd0db5a9143,1669575184832,10,
4,24.0,screenChange,0.240,Instruction,7c4e5ae8-9c49-4c49-88db-06bc175970e9,1669543840617,25,
...,...,...,...,...,...,...,...,...
2955,1.0,screenChange,0.466,Problem Solving,f23aa3c4-f367-4ba1-841d-40abb619a5bb,1669564318024,2,
2966,2.0,screenChange,0.093,Problem Solving,dd5506dc-abda-4075-b12b-95ae00324d36,1669549538794,3,
2968,2.0,screenChange,0.165,Problem Solving,a27da6ba-0e1c-4457-a82a-bba376ed6a93,1669550319354,3,
2981,,screenChange,0.263,,a3f3d41a-d5d5-497e-9ea0-43c12dbbc3ec,1669543522017,2,Problem Solving


In [9]:
def convert_timestamp(x):
    if not np.isnan(x):
        return datetime.datetime.fromtimestamp(x/1000)
    else:
        return x

In [10]:
def query_user(uuid):
    user_data = df_users[df_users.uuid == uuid]

    username = user_data.username.iloc[0]
    registered_at = convert_timestamp(user_data.time.iloc[0])
    language = user_data.language.iloc[0]
    age = user_data.age.iloc[0]

    print(f'querying data for user {username}, uuid: {uuid}, age {age}, language: {language}, registered at {registered_at}')
    user_logs = df_logs[df_logs.uuid == uuid]
    user_feedbacks = df_feedback[df_feedback.uuid == uuid]
    user_boards = df_boards[df_boards.uuid == uuid]
    print(user_boards.groupby('slide'))
    user_history = pd.concat([user_logs, user_feedbacks, user_boards],sort=False).sort_values('time')
    user_history['time'] = user_history['time'].apply(lambda x: convert_timestamp(x))
    user_history = user_history.set_index('time')

    return user_data, user_history

In [11]:
## ENTER USERNAME YOU WISH TO QUERY HERE
user_to_query = 'Rosine'

test_uuid = df_users[df_users.username == user_to_query]['uuid'].iloc[0]

data, hist = query_user(test_uuid)
hist.head(64)
hist.to_csv('hist.csv')

querying data for user Turniphead, uuid: e1449c07-56a9-43ed-936a-6cd0db5a9143, age 23, language: EN, registered at 2022-11-27 19:53:02.347000
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001B39FE7A520>
