In [1]:
from transformers import pipeline
import pandas as pd
import numpy as np
import plotly.express as px
import os

  from .autonotebook import tqdm as notebook_tqdm
2022-11-14 16:28:40.079851: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
sentiment_pipeline = pipeline(model="pysentimiento/robertuito-sentiment-analysis")

Downloading: 100%|██████████| 1.35k/1.35k [00:00<00:00, 325kB/s]
2022-11-14 16:28:48.439921: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at pysentimiento/robertuito-sentiment-analysis.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [3]:
cuento_or = pd.read_csv("data/sample/RC-lecciondecocina.csv")
titul_cuento = "Lección de cocina"
autora = "Rosario Castellanos"

# list columns except for the first one
cols = list(cuento_or.columns)[1:]

cuento = cuento_or.copy()


In [4]:
# replace values with sentiment analysis
for col in cols:
    print(f"Analizando columna {col}")
    cuento[col] = cuento[col].apply(lambda x: sentiment_pipeline(x)[0]["label"] if isinstance(x, str) else np.nan)



Analizando columna s001
Analizando columna s002
Analizando columna s003
Analizando columna s004
Analizando columna s005
Analizando columna s006
Analizando columna s007
Analizando columna s008
Analizando columna s009
Analizando columna s010
Analizando columna s011
Analizando columna s012
Analizando columna s013
Analizando columna s014
Analizando columna s015
Analizando columna s016
Analizando columna s017


In [5]:
# normalize sentiments

sentimientos = cuento.replace({"POS": 1, "NEU": 0, "NEG": -1, "": np.nan})

# append column parrafo from cuento_or to sentimientos

sentimientos["parrafo"] = cuento_or["parrafo"]
sentimientos = sentimientos[["parrafo"] + cols]

sentimientos

Unnamed: 0,parrafo,s001,s002,s003,s004,s005,s006,s007,s008,s009,s010,s011,s012,s013,s014,s015,s016,s017
0,p001,0,-1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,1.0,,,
1,p002,0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0
2,p003,0,0.0,1.0,1.0,-1.0,,,,,,,,,,,,
3,p004,-1,0.0,,,,,,,,,,,,,,,
4,p005,0,-1.0,,,,,,,,,,,,,,,
5,p006,-1,0.0,,,,,,,,,,,,,,,
6,p007,0,0.0,,,,,,,,,,,,,,,
7,p008,-1,-1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,
8,p009,-1,-1.0,0.0,-1.0,0.0,,,,,,,,,,,,
9,p010,-1,-1.0,,,,,,,,,,,,,,,


In [6]:

# cut all the columns to the same length
cuento = cuento[cols].apply(lambda x: x.str[:150], axis=0)

In [7]:
colors = {"POS": "#d93806", "NEU": "#a4fc3b", "NEG": "#4675ed"}

# list with 5 times each color value
colors_list = [colors["POS"]]*5 + [colors["NEU"]]*5 + [colors["NEG"]]*5


In [8]:
fig = px.imshow(sentimientos.set_index('parrafo'), labels=dict(x="Oración", y="Párrafo", color="Sentimiento"),
                title=f"Sentimientos en el cuento '{titul_cuento}' de {autora}",
                width=800, height=800,
                color_continuous_scale=colors_list,)
fig.update_layout(
    # labels
    xaxis_title="Oración",
    yaxis_title="Párrafo",
    # legend
    legend_title="Sentimiento",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    # colorbar
    coloraxis_colorbar=dict(
        title="",
        tickvals=[-1, 0, 1],
        ticktext=["Negativo", "Neutro", "Positivo"]
    ),

    
    plot_bgcolor='rgba(0, 0, 0, 0)',
)
fig.update_layout(coloraxis_showscale=True)
# reduce size of colorbar
fig.update_layout(coloraxis_colorbar=dict(
    lenmode="pixels", len=100,
    yanchor="top", y=0.2,
    xanchor="left", x=0.8
))


fig.update_xaxes(showgrid=False, zeroline=False)
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)
# display text in hover
sentimientos_cualitativo = cuento_or.loc[:, cols].values
textos_completos = cuento.loc[:, cols].values
fig.update_traces(hovertemplate="<b>Oración</b>: %{x}<br><b>Párrafo</b>: %{y}<br><b>Sentimiento</b>: %{customdata[0]}<br><b>Texto</b>: %{customdata[1]}")
fig.update_traces(customdata=np.stack((textos_completos, sentimientos_cualitativo), axis=2))
# fix size for hover
fig.update_layout(hoverlabel=dict(font_size=12))



fig.show()


In [9]:
os.makedirs("viz", exist_ok=True)

fig.write_html(f"viz/{autora}-{titul_cuento}.html")

In [10]:
frecuencia_sent = pd.DataFrame({"sentimiento": sentimientos.values.flatten()}).value_counts().reset_index(name="frecuencia")
frecuencia_sent["sentimiento"] = frecuencia_sent["sentimiento"].replace({1: "Positivo", 0: "Neutro", -1: "Negativo"})
frecuencia_sent = frecuencia_sent.iloc[0:3, :]
# rearrange rows to match colors
frecuencia_sent = frecuencia_sent.iloc[[2, 1, 0], :]
frecuencia_sent

Unnamed: 0,sentimiento,frecuencia
2,Positivo,17
1,Negativo,90
0,Neutro,197


In [11]:
fig2 = px.bar(frecuencia_sent, x="sentimiento", y="frecuencia", color="sentimiento",
                title=f"Frecuencia de sentimientos en el cuento '{titul_cuento}' de {autora}",
                width=800, height=800,
                color_discrete_map=colors)

fig2.show()

In [12]:
fig2.write_html(f"viz/{autora}-{titul_cuento}-frec.html")