# Day13 - 30DayCHartChallenge

corpus/

├── Marie_de_France_Lai_du_Chevrefeuille.txt

├── Anne_Comnene_Alexiade.txt

├── Hildegarde_O_Vis_Aeternitatis.txt


In [18]:
#IMPORT LIBRARIES
import pandas as pd
import numpy as np
import requests
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline


#COLLECT DATA
url = "https://raw.githubusercontent.com/adeline-hub/medieval_poetess/main/python_code/Marie_de_France_Lai_du_Chevrefeuille.txt"
response = requests.get(url)
response.encoding = 'utf-8'

if response.status_code == 200:
    raw_poem = response.text
else:
    print("Erreur lors du chargement :", response.status_code)

#PRE PROCESS DATA
cleaned_poem_MF = re.sub(r'[^\w\s,.\-:;!?«»]', '', raw_poem)

vectorizer = TfidfVectorizer(max_features=100)
tfidf_matrix = vectorizer.fit_transform([cleaned_poem_MF])

lines = cleaned_poem_MF.split('\n')
lines = [line.strip() for line in lines if line.strip() != '']

#NLP
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en", device=-1) #FR>EN
emotion_model = pipeline( "text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1, device=-1)

def translate_line(line):
    if not line.strip():
        return ""
    result = translator(line, max_length=200)[0]
    return result['translation_text']

emotion_data = []
for i, line in enumerate(lines):
    en_line = translate_line(line)
    raw = emotion_model(en_line)
    first = raw[0]
    result = first[0] if isinstance(first, list) else first
    emotion_data.append({'line_number': i, 'fr_text': line, 'en_text': en_line, 'label': result['label'], 'score': result['score']})

#STORE DATA IN DATAFRAME
df_emotions_MF = pd.DataFrame(emotion_data)
df_emotions_MF['label_code'] = pd.Categorical(df_emotions_MF['label']).codes
df_emotions_MF.sample(3)



Device set to use cpu
Device set to use cpu


Unnamed: 0,line_number,fr_text,en_text,label,score,label_code
0,0,Ils étaient tous deux,They were both,neutral,0.839244,4
1,1,comme le chèvrefeuille,like the honeysuckle,neutral,0.656485,4
10,10,"ni vous sans moi, ni moi sans vous! »","neither you without me, nor me without you!""",anger,0.918563,0


In [19]:
#PLOT CONNECTED SCATTERPLOT (LINE + SCATTERPLOT)
import plotly.graph_objects as go

fig = go.Figure()

#1 LINE
fig.add_trace(go.Scatter(
    x=df_emotions_MF['line_number'],
    y=df_emotions_MF['score'],
    mode='lines',
    line=dict(color='black', width=2),
    showlegend=False,
    hoverinfo='skip'
))

#2 SCATTERPLOT
fig.add_trace(go.Scatter(
    x=df_emotions_MF['line_number'],
    y=df_emotions_MF['score'],
    mode='markers',
    marker=dict(
        size=10,
        color=df_emotions_MF['label_code'],
        colorscale='Earth',
        cmin=0,
        cmax=df_emotions_MF['label_code'].max(),
        colorbar=dict(title="Emotion code"),
        line=dict(width=1, color='black')
    ),
    customdata=df_emotions_MF[['en_text', 'label']],
    hovertemplate="""
    <b>Line %{x}</b><br>
    Emotion Score: %{y:.2f}<br>
    Emotion: %{customdata[1]}<br>
    <i>%{customdata[0]}</i><extra></extra>
    """
))

#3 DESIGN
fig.update_layout(
    title="Trajectoire émotionnelle (connected scatterplot) - Marie de France",
    xaxis_title="Numéro de vers",
    yaxis_title="Score de l’émotion",
    template="plotly_white",
    width=800, height=500,
    showlegend=False,
    plot_bgcolor='oldlace',
    paper_bgcolor='oldlace'
)

fig.show()

*Combien faut-il déplorer et regretter*

*que, pour une faute commise*

*sur les conseils du serpent,*

*la tristesse se soit infiltrée dans la femme !*

**Hildegarde voon Bingen**