In [215]:
from pathlib import Path
from copy import deepcopy
from typing import cast, List, Literal, TypedDict

import pandas as pd
from ulid import ULID
from google.api_core.datetime_helpers import DatetimeWithNanoseconds

DF_PATH = Path('data/final_df_250603.csv')
assert DF_PATH.exists(), f"Data file not found at {DF_PATH}"

In [216]:
EventType = Literal["new-session", "interaction"]

class EventData(TypedDict):
    correctTag: str 
    draggedTag: str
    eventType: EventType
    orderingId: int
    sessionId: str
    targetWord: str

class Word(TypedDict):
    word: str
    tag: str

class SessionData(TypedDict):
    eventType: EventType
    serie: str
    sessionId: str
    startTime: DatetimeWithNanoseconds
    words: List[Word]

In [287]:
SESSION_ID = 'rZvC0RiJEH'
# SESSION_ID = 'SX-gw0Y4ER'

In [298]:
new_session_df = pd.read_csv('data/new_session_data_250603.csv')

def cast_words(words: str) -> List[Word]:
    assert isinstance(words, str), f"Expected str, got {type(words)}"
    words_ = cast(List[Word], eval(words))
    return words_

new_session_df['words'] = new_session_df['words'].apply(cast_words)

new_session_df['words_attempts'] = new_session_df['words'].map(
    lambda word_list: {f"{word['tag']}-{word['word']}-{ULID()}":1 for word in word_list}
)

new_session_df.head(3)

# display(new_session_df[new_session_df['sessionId'] == SESSION_ID]['words_attempts'].values[0])
# ' '.join([w['word'] for w in new_session_df[new_session_df['sessionId'] == SESSION_ID]['words'].values[0]])

Unnamed: 0,startTime,serie,words,eventType,sessionId,words_attempts
0,2025-05-27 00:49:01.547000+00:00,5-ensino-fundamental,"[{'tag': 'Verbo', 'word': 'Será'}, {'tag': 'Co...",new-session,xqEk5ZfKaG,"{'Verbo-Será-01JX8KAKG3RB9Z4B48WBR5Q3NP': 1, '..."
1,2025-05-27 01:01:42.509000+00:00,5-ensino-fundamental,"[{'tag': 'Nome Próprio', 'word': 'Ufa'}, {'tag...",new-session,uQY36KP907,{'Nome Próprio-Ufa-01JX8KAKG3DPAYPR09CY0RKFYS'...
2,2025-05-27 11:11:54.325000+00:00,6-ensino-fundamental,"[{'tag': 'Artigo', 'word': 'A'}, {'tag': 'Subs...",new-session,X2RK9EgWWZ,"{'Artigo-A-01JX8KAKG32D5RG28BWQDVAQBW': 1, 'Su..."


In [260]:
df = pd.read_csv(DF_PATH)
df['sessionId'].sample()

10244    soB5rFM9K4
Name: sessionId, dtype: object

In [289]:
df = pd.read_csv('data/interaction_data_250603.csv')
df['sessionId'].sample()

19756    Jswx-T3seR
Name: sessionId, dtype: object

In [290]:
gp = df[df['sessionId'] == SESSION_ID].copy()
gp

Unnamed: 0,correctTag,targetWord,eventType,orderingId,sessionId,draggedTag
8516,Artigo,Os,interaction,0,rZvC0RiJEH,Verbo


In [291]:
words_attempts = deepcopy(new_session_df[new_session_df['sessionId'] == SESSION_ID]['words_attempts'].values[0])

words_attempts_dict = dict()

for i in list(words_attempts.keys()):
    ud, word, ulid = i.split('-')
    try:
        words_attempts_dict[f'{ud}-{word}'].append(ulid)
    except KeyError:
        words_attempts_dict[f'{ud}-{word}'] = [ulid]
    
# words_attempts_list['Artigo-A'].pop()
words_attempts_dict

{'Artigo-Os': ['01JX8GWW94AXAYAKP8S6P0KJYM'],
 'Substantivo-alunos': ['01JX8GWW94M769Y2SZ8Y8252TB'],
 'Verbo-estudaram': ['01JX8GWW94GNSMW9HFXVQ5TSZJ'],
 'Advérbio-bastante': ['01JX8GWW94PF5RSP0SC5SVC32W'],
 'Pontuação-.': ['01JX8GWW94MT2C4X6TB7WGH33Y']}

In [263]:
def create_words_attempts_list(words_attempts: dict) -> dict:
    words_attempts_dict = dict()

    i:str
    for i in list(words_attempts.keys()):
        ud, word, ulid = i.split('-')
        try:
            words_attempts_dict[f'{ud}-{word}'].append(ulid)
        except KeyError:
            words_attempts_dict[f'{ud}-{word}'] = [ulid]
    return words_attempts_dict

In [None]:
def fix_words_attempts(words_attempts: dict):
    print(type(x))
    print(x)
    raise Exception("This is a placeholder for the fix_words_attempts function.")
    
new_session_df['words_attempts'].apply(fix_words_attempts)

<class 'dict'>
{'Verbo-Será-01JX8KAKG3RB9Z4B48WBR5Q3NP': 1, 'Conjunção Subordinativa-que-01JX8KAKG3CGDK2W2S55P5RRFJ': 1, 'Pronome-alguém-01JX8KAKG3DP4TBKMDN58XTTJ3': 1, 'Verbo-explicou-01JX8KAKG3CSXXANS955PSTAME': 1, 'Pronome-o-01JX8KAKG31RZRZAD97B06B9X9': 1, 'Pronome-que-01JX8KAKG3K0PW3XF2JWHQCM3Y': 1, 'Verbo-aconteceu-01JX8KAKG33WZYBXRK2S7TK476': 1, 'Preposição+Artigo-na-01JX8KAKG3BFTMWY7MV57GYTX4': 1, 'Substantivo-aula-01JX8KAKG3DE95T00QAV7BQXH8': 1, 'Adjetivo-passada-01JX8KAKG3PW8C21RTV580W588': 1, 'Pontuação-?-01JX8KAKG3PX5SB8HP28E17V8P': 1}


Exception: This is a placeholder for the fix_words_attempts function.

In [None]:
# gp.drop_duplicates(subset=['correctTag', 'targetWord', 'sessionId', 'draggedTag', 'serie'])
aux_idx = -1
# subset=['correctTag', 'targetWord', 'sessionId', 'draggedTag', 'serie']
words_attempts = deepcopy(new_session_df[new_session_df['sessionId'] == SESSION_ID]['words_attempts'].values[0])

words_attempts_dict = create_words_attempts_list(words_attempts)

for _, row in gp.iterrows():
    assert row['orderingId'] > aux_idx # garante que ta ordem crescente
    aux_idx = row['orderingId']
    k = f"{row['correctTag']}-{row['targetWord']}"
    assert k in words_attempts_dict, f"Key {k} not found in words_attempts"
    # print(f'{k}-{words_attempts_dict[k][0]}')
    if row['draggedTag'] != row['correctTag']:
        words_attempts[f'{k}-{words_attempts_dict[k][0]}'] += 1
    else:
        words_attempts_dict[k].pop(0)

for k,v in words_attempts_dict.items():
    if len(v) > 0:
        while(v):
            ulid_ = v.pop(0)
            words_attempts[f'{k}-{ulid_}'] -= 1

words_attempts, words_attempts_dict


({'Artigo-Os-01JX8GWW94AXAYAKP8S6P0KJYM': 1,
  'Substantivo-alunos-01JX8GWW94M769Y2SZ8Y8252TB': 0,
  'Verbo-estudaram-01JX8GWW94GNSMW9HFXVQ5TSZJ': 0,
  'Advérbio-bastante-01JX8GWW94PF5RSP0SC5SVC32W': 0,
  'Pontuação-.-01JX8GWW94MT2C4X6TB7WGH33Y': 0},
 {'Artigo-Os': [],
  'Substantivo-alunos': [],
  'Verbo-estudaram': [],
  'Advérbio-bastante': [],
  'Pontuação-.': []})

Gosto de ler liros de aventura e de misterio

Prep-de-ulid1: 1
Prep-de-ulid2: 3
Prep-de-ulid3: 2

Prep-de: [ulid3] 