# MoodMorph Assessment Evaluation Analysis

Jonas de Araújo Luz Junior and
Maria Andréia Formico Rodrigues

PPGIA & Gira Lab, Universidade de Fortaleza (Unifor)
Av. Washington Soares, 1321, Fortaleza, CE, Brazil, 60811-341.
___
# Data Preparation Notebook

In [1]:
import pandas as pd

from local.db_manager import DBManager

## Source data

The source data is read from the `Assessment.sqlite` SQlite database, which contains the raw performance data of the animation generation operations. 

In [2]:
db = DBManager('data/Assessment.sqlite')

df_chars = db.load_dataframe('characters')
df_tests = db.load_dataframe('tests')

display(f"Index of df_chars: {df_chars.index.name}")
display(f"Index of df_tests: {df_tests.index.name}")
display(f"Columns of df_chars: {df_chars.columns.tolist()}")
display(f"Columns of df_tests: {df_tests.columns.tolist()}")

'Index of df_chars: id'

'Index of df_tests: id'

"Columns of df_chars: ['family', 'name', 'blendshapes']"

"Columns of df_tests: ['character_id', 'model', 'emotion', 'level', 'date_time', 'duration', 'animation', 'input_tokens', 'output_tokens', 'total_tokens']"

## Work data

The source data need to be transformed to be useful or presentable.

### Calculated fields

In [3]:
df_tests.head(2)

Unnamed: 0_level_0,character_id,model,emotion,level,date_time,duration,animation,input_tokens,output_tokens,total_tokens
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,314290e3a591271bd6499ad68b008e29788a27a60bc344...,openai/gpt-4o,anger,M,2025-10-12 13:58:53,5.957047,"{\r\n ""timeline"": [\r\n {\r\n ""name"":...",14814,405,15219
2,314290e3a591271bd6499ad68b008e29788a27a60bc344...,openai/gpt-4o,contempt,M,2025-10-12 13:59:29,3.803284,"{\r\n ""timeline"": [\r\n {\r\n ""name"":...",14815,159,14974


In [4]:
# Merge data from characters dataframes to test dataframes.
#
df_work = df_tests.join(df_chars[['family', 'name']],
                        on='character_id')

df_work['character_letter'] = df_work['name'].str[0]

df_work.rename(columns={
    'family': 'character_family',
    'name': 'character_name'
    }, inplace=True)

display(f"Index of df_work: {df_work.index.name}")
display(f"Columns of df_work: {df_work.columns.tolist()}")
df_work[['character_id', 'character_name', 'character_letter']].head(2)

'Index of df_work: id'

"Columns of df_work: ['character_id', 'model', 'emotion', 'level', 'date_time', 'duration', 'animation', 'input_tokens', 'output_tokens', 'total_tokens', 'character_family', 'character_name', 'character_letter']"

Unnamed: 0_level_0,character_id,character_name,character_letter
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,314290e3a591271bd6499ad68b008e29788a27a60bc344...,Atticus,A
2,314290e3a591271bd6499ad68b008e29788a27a60bc344...,Atticus,A


In [5]:
# Add simplified model names.
#
df_work['model_name'] = df_work['model'].str.split('/').str[-1]

display(df_work[['model', 'model_name']].head(2))

Unnamed: 0_level_0,model,model_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,openai/gpt-4o,gpt-4o
2,openai/gpt-4o,gpt-4o


In [6]:
# TOKENS COST :: Calculate the delta and its percent of the token totals.
#
df_work['calculated_tokens'] = df_work['input_tokens'] + df_work['output_tokens']
df_work['delta_tokens'] = df_work['total_tokens'] - df_work['calculated_tokens']
df_work['percent_delta_tokens'] = 100 * df_work['delta_tokens'] / df_work['total_tokens']

display(df_work.columns)
df_work[['input_tokens', 'output_tokens', 'total_tokens', 
         'calculated_tokens', 'delta_tokens', 'percent_delta_tokens']].head(2)

Index(['character_id', 'model', 'emotion', 'level', 'date_time', 'duration',
       'animation', 'input_tokens', 'output_tokens', 'total_tokens',
       'character_family', 'character_name', 'character_letter', 'model_name',
       'calculated_tokens', 'delta_tokens', 'percent_delta_tokens'],
      dtype='object')

Unnamed: 0_level_0,input_tokens,output_tokens,total_tokens,calculated_tokens,delta_tokens,percent_delta_tokens
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,14814,405,15219,15219,0,0.0
2,14815,159,14974,14974,0,0.0


In [7]:
# Identify the models for which the delta is not zero.
#
df_work[df_work['delta_tokens'] != 0]['model'].unique()

array(['xai/grok-4-fast'], dtype=object)

We can see that only the Grok-4 Fast does use a total tokens calculation that does not represent the sum of the input and output tokens.

In [8]:
# Generates the column that counts the emotions.
#
df_work['emotion_count'] = df_work['emotion'].apply(lambda x: x.lower().count('and') + 1)

display(df_work.columns)
df_work[['emotion', 'emotion_count']].head(10)

Index(['character_id', 'model', 'emotion', 'level', 'date_time', 'duration',
       'animation', 'input_tokens', 'output_tokens', 'total_tokens',
       'character_family', 'character_name', 'character_letter', 'model_name',
       'calculated_tokens', 'delta_tokens', 'percent_delta_tokens',
       'emotion_count'],
      dtype='object')

Unnamed: 0_level_0,emotion,emotion_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,anger,1
2,contempt,1
3,disgust,1
4,fear,1
5,happiness,1
6,sadness,1
7,surprise,1
8,happiness and surprise,2
9,anger and contempt,2
10,fear and sadness,2


### Work data info and description

In [9]:
df_work.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1020 entries, 1 to 1020
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   character_id          1020 non-null   object 
 1   model                 1020 non-null   object 
 2   emotion               1020 non-null   object 
 3   level                 1020 non-null   object 
 4   date_time             1020 non-null   object 
 5   duration              1020 non-null   float64
 6   animation             1020 non-null   object 
 7   input_tokens          1020 non-null   int64  
 8   output_tokens         1020 non-null   int64  
 9   total_tokens          1020 non-null   int64  
 10  character_family      1020 non-null   object 
 11  character_name        1020 non-null   object 
 12  character_letter      1020 non-null   object 
 13  model_name            1020 non-null   object 
 14  calculated_tokens     1020 non-null   int64  
 15  delta_tokens          1020

In [10]:
df_work.describe()

Unnamed: 0,duration,input_tokens,output_tokens,total_tokens,calculated_tokens,delta_tokens,percent_delta_tokens,emotion_count
count,1020.0,1020.0,1020.0,1020.0,1020.0,1020.0,1020.0,1020.0
mean,22.578789,13668.201961,1728.398039,15793.032353,15396.6,396.432353,2.521119,1.764706
std,14.478585,1964.471645,1898.725363,3029.722252,3155.044273,955.161835,5.988312,0.730339
min,0.202103,11319.0,33.0,11391.0,11391.0,0.0,0.0,1.0
25%,10.975021,11358.0,356.75,13490.25,12473.75,0.0,0.0,1.0
50%,20.1427,13879.5,960.0,15275.5,15175.0,0.0,0.0,2.0
75%,30.711487,14817.0,2541.25,17286.25,16933.0,0.0,0.0,2.0
max,111.804688,16722.0,21712.0,38431.0,38431.0,5154.0,29.630767,3.0


## Save work dataframe

In [11]:
db.save_dataframe(df_work, 'work', index=True)