# Getting small sessions

In [30]:
import os
import json
import pandas as pd

meetings_df = pd.read_csv('./meetings.csv')

small_session_ids = []
with open('./sizes/small_sessions.json') as f:
    small_session_ids = json.load(f)

small_sessions_df = meetings_df[meetings_df['id_session'].isin(small_session_ids)]
small_sessions_df.head()

Unnamed: 0,id_session,speaker_name,party,speech
168,25345,O SR. PRESIDENTE RODRIGO PACHECO,Rodrigo Pacheco. Bloco Parlamentar PSD/Republi...,"Sob a proteção de Deus, iniciamos os nossos tr..."
169,25345,O SR. PRESIDENTE RODRIGO PACHECO,Rodrigo Pacheco. Bloco Parlamentar PSD/Republi...,Peço aos Srs. Senadores e às Sras. Senadoras q...
170,25345,O SR. OTTO ALENCAR,Bloco Parlamentar PSD/Republicanos/PSD - BA,Prometo guardar a Constituição Federal e as le...
171,25345,O SR. PRESIDENTE RODRIGO PACHECO,Rodrigo Pacheco. Bloco Parlamentar PSD/Republi...,Agradeço ao nobre Senador Otto Alencar e cumpr...
172,25345,O SR. ROGÉRIO CARVALHO,Bloco Parlamentar da Resistência Democrática/P...,"Pelo Rio de Janeiro, Senador Romário."


# Create prompts

In [31]:
def transform_df_to_speeches_list(text_df: pd.DataFrame):
    speakers = text_df['speaker_name']
    speeches = text_df['speech']
    
    speeches_list = ""
    for index, _ in enumerate(speakers.items()):
        speeches_list += f"<llmlingua, compress=False>{speakers.iloc[index]}</llmlingua>: {speeches.iloc[index]}\n"
    return speeches_list

def get_speakers_list_from_dataframe(df):
    return list(set(df['speaker_name']))

In [32]:
def summarization_prompt_for_stances(context: str, dataframe):
    speeches_list = transform_df_to_speeches_list(dataframe)
    speakers_list = get_speakers_list_from_dataframe(dataframe)
    speakers_string = ', '.join(speakers_list)
    
    prompt = f"""
    {speeches_list}

    Consider that it is an expert model in Stance Detection. Stance detection is the task of predicting an author's point of view on a subject of interest. A speech can represent one of four types of stance: for, against or neutral.
For: When an author takes a stance "for" a subject, it means they support or advocate for it. Their speech or writing will likely include arguments, evidence, or opinions that highlight the positive aspects, benefits, or reasons to endorse the subject. For example, if the subject is a proposed policy change, someone taking a "for" stance might emphasize how it could improve people's lives or address important societal issues.
Against: This stance indicates opposition or disagreement with the subject at hand. Authors taking an "against" stance will present arguments, evidence, or opinions that highlight flaws, risks, negative consequences, or reasons to reject the subject. Using the previous example of a proposed policy change, someone taking an "against" stance might argue that it would be ineffective, unfair, or harmful to certain groups.
Neutral: A neutral stance means the author does not express explicit support or opposition towards the subject. They may present information, analysis, or perspectives in a balanced and objective manner without advocating for or against the subject. Neutral stances typically avoid strong opinions or judgments and instead focus on providing a comprehensive understanding of the topic without bias. If the person doesn't say anything about that topic, it means that they should not be listed.
Reply in json format with the following keys: list_latent_topics, stances and summary.
list_latent_topics: should contain the list of all topics discussed in the text, and a short description for each topic.
stances: for each latent_topics key should contain the list of classification of the related speaker's speechs.
summary: should contain the summary of the text.
    Consider that you will receive as input a text with a set of speeches that make up ```{context}```.

    Do the following actions for the text:
    - Determine the all topics being discussed in the text and a brief descriptions of these topics.
    - For each topic and for each speaker, except if the person doesn't say anything about that topic, classify the stance as being FOR, AGAINST, NEUTRAL. Being the following speakers: {speakers_string}.
    - Before your response, translate the summary and the topics to portuguese.
   """
    return prompt

# Send to GPT 3.5 Turbo

In [33]:
%pip install openai

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [34]:
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=""
)

In [35]:
import time

def get_completion(prompt):
    messages = [
        {"role": "user", "content": prompt},
    ]

    start_time = time.time()
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=0,
        n=1,
        stream=False,
    )
    end_time = time.time()
    elapsed_time = end_time - start_time

    content = response.choices[0].message.content
    content_json = json.loads(content)

    return {
        "prompt": prompt,
        'response': content_json,
        'response_elapsed_time': elapsed_time
    }

In [36]:
id_sessions = small_sessions_df["id_session"].unique()

total = len(id_sessions)
time_all = 0
sessions_with_error = []

for index, id_session in enumerate(id_sessions):
    print(f'Session {id_session} started!')

    if os.path.exists(f'./small_responses_v2/{id_session}.json'):
        print(f"Prompt {id_session} already processed")
        print("=-=-=-=-=-=-=")
        continue

    session_df = small_sessions_df[small_sessions_df["id_session"] == id_session]
    
    if session_df["speaker_name"].isnull().values.all():
        print(f'Session {id_session} ignored!')
        sessions_with_error.append(id_session)
        print('=-=-=-=-=')
        continue

    prompt = summarization_prompt_for_stances('parliamentary session', session_df)

    print(f"Processing prompt {id_session}")
    try:
        response_json = get_completion(prompt)
    except Exception as e:
        sessions_with_error.append(int(id_session))

        print(f"Error processing prompt {id_session}")
        print(e)
        print("=-=-=-=-=-=-=")
        continue

    elapsed_time = response_json['response_elapsed_time']
    time_all += elapsed_time

    with open(f'./responses_v2/{id_session}.json', 'w') as file:
        json.dump(response_json, file)
    
    print(f'Session {id_session} compressed!')
    print(f'{index + 1} / {total} completed!')
    print(f"Processed prompt {id_session} in {int(elapsed_time)} seconds")
    print("=-=-=-=-=-=-=")
    time.sleep(10)

print(f"Processed all prompts in {int(time_all)} seconds")
#save in a file the time it took to process all the prompts
with open(f'./small_responses_v2/processing_time.json', 'w') as file:
    json.dump({
        'processing_time': time_all
    }, file)

# Save in a file the sessions that had an error
with open(f'./small_responses_v2/sessions_with_error.json', 'w') as file:
    json.dump({
        'sessions_with_error': sessions_with_error
    }, file)
    
    




Session 25345 started!
Processing prompt 25345


Session 25345 compressed!
1 / 41 completed!
Processed prompt 25345 in 6 seconds
=-=-=-=-=-=-=
Session 25347 started!
Processing prompt 25347
Session 25347 compressed!
2 / 41 completed!
Processed prompt 25347 in 18 seconds
=-=-=-=-=-=-=
Session 25334 started!
Processing prompt 25334
Session 25334 compressed!
3 / 41 completed!
Processed prompt 25334 in 13 seconds
=-=-=-=-=-=-=
Session 25395 started!
Processing prompt 25395
Session 25395 compressed!
4 / 41 completed!
Processed prompt 25395 in 9 seconds
=-=-=-=-=-=-=
Session 25449 started!
Processing prompt 25449
Session 25449 compressed!
5 / 41 completed!
Processed prompt 25449 in 11 seconds
=-=-=-=-=-=-=
Session 25490 started!
Processing prompt 25490
Session 25490 compressed!
6 / 41 completed!
Processed prompt 25490 in 16 seconds
=-=-=-=-=-=-=
Session 25502 started!
Processing prompt 25502
Session 25502 compressed!
7 / 41 completed!
Processed prompt 25502 in 8 seconds
=-=-=-=-=-=-=
Session 25515 started!
Processing prompt 25515
Session 2