In [None]:
import openai
import time
import os

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Embedding


##Objective:
Show that question tone (positive, neutral, negative) induces measurable differences in LLM-generated response embeddings.
If the "emotional rebound" is real: embeddings of responses to negative questions should move closer (in vector space) to embeddings of responses to neutral or positive questions (rather than forming an isolated negative cluster).


In other words: the "emotional rebound" or tone correction by the AI leaves a trace in the space of representations.

How to proceed:
For each question, you have a triplet: the same question asked with three different tones.

For each answer produced by the LLM, you retrieve the embedding via the API.

Clustering analysis :

You can visualize (PCA/t-SNE/UMAP) all the embeddings of the responses, by coloring the points according to the tone of the prompt (pos/neutral/neg).

Alternatively, you can analyze the distance between embeddings:

Is the answer to the negative question systematically "closer" to the neutral/positive answer?

Do negative-toned answers move away from one region of space, or on the contrary, are they "recovered" towards the same cluster?

Clustering (k-means, DBSCAN, etc.) can also reveal groupings according to tone, or on the contrary a normalization effect.

Your point:
If "emotional rebound" is real: embeddings of responses to negative questions should move closer (in vector space) to embeddings of responses to neutral or positive questions (rather than forming an isolated negative cluster).

If the LLM corrects the negative tone: you'll observe few "negative clusters"; on the contrary, the majority of embeddings will fall into the same "neutral/positive" space.

You can quantify: is the distance between "negative tone response" and "neutral/positive response" embeddings significantly small?

Ultimate goal: to show that the emotional smoothing effect operates not only in the text, but in the latent space of the model.

In short: you want to prove that tone neutralization by LLM is objectivable and measurable in the embedding space, confirming the robustness and systematicity of the bias.

In [None]:
VERBOSE = True

In [None]:
import getpass
import openai

# Saisie manuelle et masquée
if True:
  openai.api_key = getpass.getpass("Entre ta clé API OpenAI : ")
else:
  openai.api_key = os.getenv("OPENAI_API_KEY")  #  définie dans l’environnement

In [None]:
#openai.api_key = "sk-..."  # Remplace par ta clé
client = openai.OpenAI(api_key=openai.api_key)

In [None]:
print(client.models.list())

In [None]:
VERBOSE = True

## Get data

In [None]:
df = pd.read_csv('/content/output-LLM-responses-v3.csv')

In [None]:
df.info()

In [None]:
cond_controversal = (df['domaine'] == 'opinion')|(df['domaine'] == 'science')| (df['domaine'] == 'société') | (df['domaine'] == 'immigration') | (df['domaine'] == 'climat')
df_controversal = df[cond_controversal]


## Get embeddings

In [None]:
embeddings = []

In [None]:
cpt = 0
MAX = np.inf#3*25
for _, row in tqdm(df.iterrows() ,  total=len(df)):
    texte = row['reponse']
    response = openai.embeddings.create(
        model="text-embedding-3-small",
        input=texte
    )
    embedding = np.array(response.data[0].embedding)
    embeddings.append(embedding)
    if False : time.sleep(1)
    cpt = cpt+1
    if cpt > MAX:
      break



In [None]:
cpt

In [None]:
print (f'type :{type(embedding)} ; len : {len(embedding)}')

In [None]:
len(embeddings)

## Cluster embeddings

Label encoder

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(df['label'])
labels = le.transform(df['label'])
labels.shape

In [None]:
idx_0 = np.where(labels == 0)
idx_1 = np.where(labels == 1)
idx_2 = np.where(labels == 2)

In [None]:
le.inverse_transform([0,1,2])

### PCA

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
prjcted = pca.fit_transform(embeddings)

In [None]:
prjcted[idx_0,:].shape

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(prjcted[idx_2,0] , prjcted[idx_2,1], c = 'cyan' , marker ='o' , label = le.inverse_transform([2]))#positive
plt.scatter(prjcted[idx_0,0] , prjcted[idx_0,1], c = 'black' , marker ='^' , label = le.inverse_transform([0]))#neutral
plt.scatter(prjcted[idx_1,0] , prjcted[idx_1,1], c = 'red' , marker ='+' , s= 200 , label = le.inverse_transform([1]))#negative
plt.legend()
plt.show()

### UMAP

In [None]:
import umap

# Fit UMAP
umap_proj = umap.UMAP(n_components=2, random_state=42).fit_transform(embeddings)

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(umap_proj[idx_2,0] , umap_proj[idx_2,1], c = 'cyan' , marker ='o' , s = 200 , label = le.inverse_transform([2]))#positive
plt.scatter(umap_proj[idx_1,0] , umap_proj[idx_1,1], c = 'red' , marker ='+' , s= 200 , label = le.inverse_transform([1]))#negative
plt.scatter(umap_proj[idx_0,0] , umap_proj[idx_0,1], c = 'black' , marker ='^' , label = le.inverse_transform([0]))#neutral
plt.title('UMAP projection')
plt.legend()
plt.show()

In [None]:
if False:
  # Save embeddings as a new column (as strings or use numpy for arrays)
  df['embedding'] = embeddings
  df.to_csv("with_embeddings.csv", index=False)