In [None]:
import polars as pl

In [None]:
journal_data = pl.read_csv('./journal_entries.csv', separator='|')

In [None]:
journal_data.head()

doc_id,date,year,month,day,hour,sentence,text
str,str,i64,i64,i64,i64,i64,str
"""2024-12-02_16-13-13-235.txt""","""2024-12-02""",2024,12,2,16,1,"""Why do you think that could be…"
"""2024-12-02_16-13-13-235.txt""","""2024-12-02""",2024,12,2,16,2,"""There are plenty of other reas…"
"""2024-12-02_16-13-13-235.txt""","""2024-12-02""",2024,12,2,16,3,"""Today has been such a great da…"
"""2024-12-02_19-20-00-554.txt""","""2024-12-02""",2024,12,2,19,1,"""I guess I've got to break the …"
"""2024-12-02_19-20-00-554.txt""","""2024-12-02""",2024,12,2,19,2,"""Somebody hit my truck on it's …"


In [None]:
sentences = journal_data['text'].to_list()

In [None]:
sentences

['Why do you think that could be the case?',
 'There are plenty of other reasons why probably.',
 "Today has been such a great day so far that there's really nothing on my mind but success, sweet, sweet success and nothing else.",
 "I guess I've got to break the bad news to you all...",
 "Somebody hit my truck on it's left side panel while I was eating a delicious chimichanga from Walmart.",
 "I've never cried more over a bnuch of beans and rice then when I saw the metal get dented like a foldable crepe.",
 'This is a part of the process.',
 'What more could there be really?',
 "I like how there's so much to see and do!",
 'What a brand new world that makes you really start to think about things really?',
 "But aren't you worried?",
 'No, not really.',
 "I've thought about a lot things recently like if there's sense of time space beyond comprehension.",
 'What if there really were other types of reality in the great cosmos?',
 'How could I be so blind as to not see any of it!',
 'Where

In [None]:
from tokenizers import Tokenizer
import onnxruntime as ort

from os import cpu_count
import numpy as np  # only used for the postprocessing sigmoid

# sentences = ["hello world"]  # for example a batch of 1

# labels as (ordered) list - from the go_emotions dataset
labels = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']

tokenizer = Tokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")

# Optional - set pad to only pad to longest in batch, not a fixed length.
# (without this, the model will run slower, esp for shorter input strings)
params = {**tokenizer.padding, "length": None}
tokenizer.enable_padding(**params)

tokens_obj = tokenizer.encode_batch(sentences)

def load_onnx_model(model_filepath):
    _options = ort.SessionOptions()
    _options.inter_op_num_threads, _options.intra_op_num_threads = cpu_count(), cpu_count()
    _providers = ["CPUExecutionProvider"]  # could use ort.get_available_providers()
    return ort.InferenceSession(path_or_bytes=model_filepath, sess_options=_options, providers=_providers)

model = load_onnx_model("model_quantized.onnx")
output_names = [model.get_outputs()[0].name]  # E.g. ["logits"]

input_feed_dict = {
  "input_ids": [t.ids for t in tokens_obj],
  "attention_mask": [t.attention_mask for t in tokens_obj]
}

logits = model.run(output_names=output_names, input_feed=input_feed_dict)[0]
# produces a numpy array, one row per input item, one col per label

def sigmoid(x):
  return 1.0 / (1.0 + np.exp(-x))

# Post-processing. Gets the scores per label in range.
# Auto done by Transformers' pipeline, but we must do it manually with ORT.
model_outputs = sigmoid(logits)

In [None]:
# create a dataframe 
df = pl.DataFrame()

# for example, just to show the top result per input item
for probas in model_outputs:
    top_result_index = np.argmax(probas)
    print(labels[top_result_index], "with score:", probas[top_result_index])
    #sorted_indices = np.argsort(probas)[::-1]

    # Get the corresponding values from labels and probas lists
    #ranked_labels = [labels[i] for i in sorted_indices]
    #ranked_probas = [probas[i] for i in sorted_indices]

    # Create a dictionary from labels and probas
    data = dict(zip(labels, probas))

    # Add the data to the DataFrame as a new row
    df = df.vstack(pl.DataFrame(data, orient="row"))

df

curiosity with score: 0.62250173
neutral with score: 0.72867113
joy with score: 0.61382854
sadness with score: 0.6804175
neutral with score: 0.6172108
sadness with score: 0.53383315
neutral with score: 0.8897027
curiosity with score: 0.57709944
love with score: 0.79359126
curiosity with score: 0.6504733
curiosity with score: 0.5080614
disapproval with score: 0.8499824
neutral with score: 0.4668462
curiosity with score: 0.7443946
neutral with score: 0.24542819
curiosity with score: 0.45311204
neutral with score: 0.57190543
confusion with score: 0.87994885
neutral with score: 0.9510493
love with score: 0.74739504
neutral with score: 0.35738194
neutral with score: 0.9375534
admiration with score: 0.78257483
neutral with score: 0.7206731
neutral with score: 0.9605323


admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,disapproval,disgust,embarrassment,excitement,fear,gratitude,grief,joy,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
0.00197,0.001594,0.001824,0.007722,0.008916,0.002216,0.367905,0.622502,0.001434,0.00198,0.005644,0.001139,0.000597,0.001651,0.001646,0.001146,0.000255,0.000808,0.001471,0.00062,0.005584,0.000056,0.005514,0.000169,0.000457,0.001115,0.006525,0.26142
0.002054,0.002355,0.000723,0.004981,0.082778,0.001035,0.202407,0.007913,0.001052,0.002273,0.012508,0.001054,0.00048,0.000992,0.000724,0.001804,0.000249,0.0014,0.00115,0.000503,0.009656,0.00022,0.058477,0.000501,0.000416,0.000825,0.001471,0.728671
0.514721,0.005169,0.001948,0.008511,0.084323,0.008455,0.001237,0.001136,0.006148,0.002621,0.00525,0.000772,0.000513,0.092634,0.000587,0.05875,0.001005,0.613829,0.011455,0.000974,0.012538,0.026969,0.008357,0.026981,0.000334,0.001619,0.002806,0.018327
0.002316,0.00286,0.001906,0.010791,0.01419,0.056138,0.003089,0.004331,0.008424,0.129481,0.008022,0.002548,0.001987,0.003082,0.011483,0.001876,0.012272,0.006476,0.004132,0.03707,0.012924,0.000599,0.011076,0.006326,0.020937,0.680417,0.00191,0.069102
0.037635,0.004592,0.002067,0.012246,0.035002,0.002152,0.00055,0.000456,0.003131,0.009742,0.002254,0.005116,0.002089,0.027842,0.007848,0.000692,0.001991,0.079268,0.007747,0.002629,0.002861,0.00878,0.01885,0.006296,0.000577,0.008555,0.004123,0.617211
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.001832,0.000605,0.008093,0.034745,0.276817,0.039473,0.003499,0.001454,0.005761,0.036792,0.086785,0.005253,0.001289,0.000467,0.002077,0.000717,0.002336,0.002802,0.02802,0.002852,0.003881,0.000696,0.028706,0.003855,0.004733,0.083049,0.000243,0.357382
0.002789,0.001944,0.001001,0.005919,0.036392,0.006575,0.001252,0.000857,0.004097,0.002089,0.001905,0.001359,0.000355,0.001557,0.001256,0.0008,0.000366,0.004032,0.00285,0.00055,0.005498,0.000458,0.008818,0.000986,0.000428,0.001859,0.000347,0.937553
0.782575,0.000574,0.001254,0.004901,0.077349,0.011227,0.000948,0.001056,0.002414,0.001769,0.004483,0.000792,0.000413,0.003644,0.000383,0.2622,0.00054,0.002692,0.002925,0.000138,0.013707,0.006016,0.00861,0.001827,0.001122,0.001234,0.001286,0.056682
0.006976,0.005238,0.003202,0.004814,0.037277,0.00705,0.001896,0.002429,0.00885,0.001897,0.000849,0.003534,0.001159,0.018469,0.004139,0.00071,0.001095,0.018583,0.143564,0.001586,0.002214,0.001219,0.008574,0.001207,0.000828,0.003977,0.001915,0.720673


In [None]:
df_combined = pl.concat([journal_data, df], how="horizontal")

df_combined

doc_id,date,year,month,day,hour,sentence,text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,disapproval,disgust,embarrassment,excitement,fear,gratitude,grief,joy,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
str,str,i64,i64,i64,i64,i64,str,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""2024-12-02_16-13-13-235.txt""","""2024-12-02""",2024,12,2,16,1,"""Why do you think that could be…",0.00197,0.001594,0.001824,0.007722,0.008916,0.002216,0.367905,0.622502,0.001434,0.00198,0.005644,0.001139,0.000597,0.001651,0.001646,0.001146,0.000255,0.000808,0.001471,0.00062,0.005584,0.000056,0.005514,0.000169,0.000457,0.001115,0.006525,0.26142
"""2024-12-02_16-13-13-235.txt""","""2024-12-02""",2024,12,2,16,2,"""There are plenty of other reas…",0.002054,0.002355,0.000723,0.004981,0.082778,0.001035,0.202407,0.007913,0.001052,0.002273,0.012508,0.001054,0.00048,0.000992,0.000724,0.001804,0.000249,0.0014,0.00115,0.000503,0.009656,0.00022,0.058477,0.000501,0.000416,0.000825,0.001471,0.728671
"""2024-12-02_16-13-13-235.txt""","""2024-12-02""",2024,12,2,16,3,"""Today has been such a great da…",0.514721,0.005169,0.001948,0.008511,0.084323,0.008455,0.001237,0.001136,0.006148,0.002621,0.00525,0.000772,0.000513,0.092634,0.000587,0.05875,0.001005,0.613829,0.011455,0.000974,0.012538,0.026969,0.008357,0.026981,0.000334,0.001619,0.002806,0.018327
"""2024-12-02_19-20-00-554.txt""","""2024-12-02""",2024,12,2,19,1,"""I guess I've got to break the …",0.002316,0.00286,0.001906,0.010791,0.01419,0.056138,0.003089,0.004331,0.008424,0.129481,0.008022,0.002548,0.001987,0.003082,0.011483,0.001876,0.012272,0.006476,0.004132,0.03707,0.012924,0.000599,0.011076,0.006326,0.020937,0.680417,0.00191,0.069102
"""2024-12-02_19-20-00-554.txt""","""2024-12-02""",2024,12,2,19,2,"""Somebody hit my truck on it's …",0.037635,0.004592,0.002067,0.012246,0.035002,0.002152,0.00055,0.000456,0.003131,0.009742,0.002254,0.005116,0.002089,0.027842,0.007848,0.000692,0.001991,0.079268,0.007747,0.002629,0.002861,0.00878,0.01885,0.006296,0.000577,0.008555,0.004123,0.617211
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""2024-12-04_11-23-48-808.txt""","""2024-12-04""",2024,12,4,11,3,"""I have six cats and all of the…",0.001832,0.000605,0.008093,0.034745,0.276817,0.039473,0.003499,0.001454,0.005761,0.036792,0.086785,0.005253,0.001289,0.000467,0.002077,0.000717,0.002336,0.002802,0.02802,0.002852,0.003881,0.000696,0.028706,0.003855,0.004733,0.083049,0.000243,0.357382
"""2024-12-04_11-23-48-808.txt""","""2024-12-04""",2024,12,4,11,4,"""I give them tuna every night w…",0.002789,0.001944,0.001001,0.005919,0.036392,0.006575,0.001252,0.000857,0.004097,0.002089,0.001905,0.001359,0.000355,0.001557,0.001256,0.0008,0.000366,0.004032,0.00285,0.00055,0.005498,0.000458,0.008818,0.000986,0.000428,0.001859,0.000347,0.937553
"""2024-12-04_11-23-48-808.txt""","""2024-12-04""",2024,12,4,11,5,"""I then tell them how much I ap…",0.782575,0.000574,0.001254,0.004901,0.077349,0.011227,0.000948,0.001056,0.002414,0.001769,0.004483,0.000792,0.000413,0.003644,0.000383,0.2622,0.00054,0.002692,0.002925,0.000138,0.013707,0.006016,0.00861,0.001827,0.001122,0.001234,0.001286,0.056682
"""2024-12-04_11-23-48-808.txt""","""2024-12-04""",2024,12,4,11,6,"""I first pet each of their head…",0.006976,0.005238,0.003202,0.004814,0.037277,0.00705,0.001896,0.002429,0.00885,0.001897,0.000849,0.003534,0.001159,0.018469,0.004139,0.00071,0.001095,0.018583,0.143564,0.001586,0.002214,0.001219,0.008574,0.001207,0.000828,0.003977,0.001915,0.720673


In [None]:
df_combined.write_csv('journal_emotions.csv', include_bom=False, separator='|')