In [1]:
from pathlib import Path
import pandas as pd
import pickle
from dotenv import load_dotenv
import os
from openai import OpenAI
from pydantic import BaseModel

load_dotenv()
assert "LSIR_OPENAI_PROJ_KEY" in os.environ, "Please set the LSIR_OPENAI_PROJ_KEY environment variable"

dataset_path = Path("/mnt/datasets/dop-position-mining/generated-reports/frontend-data")
event_name = "event2multianchor"

In [2]:
# list all files under the dataset path
files = list(dataset_path.glob("*"))
for file in files:
    print(file)

/mnt/datasets/dop-position-mining/generated-reports/frontend-data/event1.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/event2.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/event2anchor.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/event2multianchor.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/event3.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/hierarchical_topics_event1.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/hierarchical_topics_event2.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/hierarchical_topics_event2anchor.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/hierarchical_topics_event2multianchor.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/hierarchical_topics_event3.json
/mnt/datasets/dop-position-mining/generated-reports/frontend-data/old
/mnt/datasets/do

In [3]:
df = pd.read_json(dataset_path / f"{event_name}.json")
hierarchical_topics = pd.read_json(dataset_path / f"hierarchical_topics_{event_name}.json")
hierarchical_topics.set_index("Parent_ID", inplace=True)
orig_topics = pd.read_json(dataset_path / f"orig_topics_{event_name}.json")

with open(dataset_path / f"topics_{event_name}.pkl", "rb") as f:
    topic_assignments = pickle.load(f)

In [4]:
assert len(topic_assignments) == len(df)
topic_assignments[:5]

[0, 39, 35, 18, 27]

In [5]:
df.head(2)

Unnamed: 0,id,title,author,summary,body,timestamp,political_issue,is_about_event,stakeholder,position,stance,sentiment,emotion,context,old_targets,support.type,support.content,country.name,country.code,targets
0,134245406231,Zelenskyy Hopes to Make Ukraine a Leading Weap...,Journalist of www.canadastandard.com,Ukrainian President Volodymyr Zelenskyy hosted...,by partnering with Western manufacturers so th...,2023-09-30 23:59:38,elections in russian-occupied territories of U...,True,Vladimir Putin,The annexation of Ukrainian regions was carrie...,Pro,Positive,Joy,Political,"[{'predicate': 'annexation', 'target': 'donets...",Not provided,Not provided,russian federation,RUS,"[{'predicate': 'annexation', 'target': 'Donets..."
1,134245406231,Zelenskyy Hopes to Make Ukraine a Leading Weap...,Journalist of www.canadastandard.com,Ukrainian President Volodymyr Zelenskyy hosted...,by partnering with Western manufacturers so th...,2023-09-30 23:59:38,elections in russian-occupied territories of U...,True,Ukraine,Denounces the referendum votes and recent ball...,Con,Negative,Anger,Political,"[{'predicate': 'denounce', 'target': 'russian ...",Not provided,Not provided,ukraine,UKR,"[{'predicate': 'denounce', 'target': 'Russia',..."


In [6]:
hierarchical_topics.head(2)

Unnamed: 0_level_0,Parent_Name,Topics,Child_Left_ID,Child_Left_Name,Child_Right_ID,Child_Right_Name,Distance,sentences_in_topic,sentence_count
Parent_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
198,sovereignty_putin_united_territory_territories,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",186,luhansk_lnr_elections_participation_lugansk,197,sovereignty_putin_united_territory_territories,1.811794,"[0, 16, 24, 52, 87, 96, 104, 107, 130, 142, 16...",2987
197,sovereignty_putin_united_territory_territories,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",196,putin_united_elections_about_election,195,sovereignty_sovereign_united_null_territory,1.611203,"[0, 16, 24, 52, 87, 96, 104, 107, 130, 142, 16...",2775


In [7]:
orig_topics.head()

Unnamed: 0,Topic,Count,Name,Representation,MMR,POS,keybert,Representative_Docs
0,0,128,0_russian_russia_part_federation,"[russian, russia, part, federation, that, is, ...","[for, and, with, of, by, moldova, putin, citiz...","[part, residents, supply, citizenship, new, ac...","[putin, 20, elections, since, supported, main,...",[The inhabitants of Donets Basin consider them...
1,1,69,1_territories_occupied_condemns_recognize,"[territories, occupied, condemns, recognize, g...","[territories, recognize, favor, significant, i...","[territories, government, favor, results, sign...","[elections, according, since, france, intimida...",[France calls on Russia to cease its war of ag...
2,2,56,2_sovereignty_illegitimate_violation_ukraine,"[sovereignty, illegitimate, violation, ukraine...","[sovereignty, illegitimate, ukraine, territori...","[sovereignty, illegitimate, violation, territo...","[sovereignty, sovereign, occupies, occupying, ...",[The elections in Russian-occupied territories...
3,3,88,3_electoral_voting_electronic_rigging,"[electoral, voting, electronic, rigging, proce...","[electoral, electronic, rigging, remote, kakho...","[electoral, voting, electronic, rigging, proce...","[elections, appealing, expressing, participati...",[The Kremlin probably wanted to avoid an embar...
4,4,50,4_recognizing_unwilling_bank_results,"[recognizing, unwilling, bank, results, cooper...","[recognizing, unwilling, occupiers, left, temp...","[unwilling, bank, results, pseudo, population,...","[inhabitants, legitimacy, elections, territori...",[The elections in the temporarily occupied ter...


In [8]:
openai_client = OpenAI(api_key=os.environ["LSIR_OPENAI_PROJ_KEY"])

In [9]:
from typing import List, Dict

# Define the structure for stakeholders
class Stakeholder(BaseModel):
    name: str
    explanation: str

# Define the structure for key points
class KeyPoint(BaseModel):
    point: str
    explanation: str

# Define the structure for the timeline events
class TimelineEvent(BaseModel):
    date: str
    person: str
    event: str

# Define the main structure
class Summary(BaseModel):
    title: str
    key_points: List[KeyPoint]
    important_stakeholders_per_country: Dict[str, List[Stakeholder]]
    arguments_for: List[str]
    arguments_against: List[str]
    topic: str
    time_analysis: str
    timeline: List[TimelineEvent]

In [10]:
openai_system_prompt_json = """You are assisting dialogue activities to support a peaceful conflict settlement. 
You will be given:
1. The topic name
2. Set of all positions considered to be under the topic.
3. Set of 3 positions considered to be representative for the topic.

For each position in (2) , you will be given the following information:
1. The stakeholder who holds the position.
2. The article title that the position was extracted from.
3. The political issue discussed in the article.
4. The timestamp of the article.
5. The country code of the stakeholder.
6. Set of RDF triplets where the subject is the stakeholder of the position. These triplets demonstrate who and what the stakeholder is addressing in the position.

Your task is to generate a summary of this topic IN ENGLISH. The format of the summary will be as follows:
1. Key points section, where you describe the key points of the topic.
2. Important stakeholders section, where you describe the key stakeholders involved the topic. Explain who they are, what their stance is and how they relate to each other.
3. Arguments section, where you talk about the key arguments and counterarguments.
4. Time analysis section, where you analyse how key arguments, relationships change over time(using the provided timestamps)

Output format will be a json object:
{
    'title': string, title to be used at the top of the summary,
    'key_points': [{'point': title for key point, 'explanation': explanation of key point}, ...]
    'important_stakeholders_per_country': {
        'country': [
            {'name': 'Stakeholder name', 'explanation': 'Who this stakeholder is, why they are important, what their stance is.'},
            {'name': 'Stakeholder name', 'explanation': 'Who this stakeholder is, why they are important, what their stance is.'},
        ],
        'country': [
            {'name': 'Stakeholder name', 'explanation': 'Who this stakeholder is, why they are important, what their stance is.'},
        ]
    },
    'arguments_for': {
        'title': Title to use for arguments for section,
        'items': List of strings containing the arguments for
    },
    'arguments_against': {
        'title': Title to use for arguments against section,
        'items': List of strings containing the arguments against
    },
    'time_analysis': string giving a brief summary of timeline.
    'timeline': [
        {'date': '2023-08-07', 'person': 'Andrej Jerman', 'event': 'Reports on the situation in Russian-occupied Ukraine.'},
        {'date': '2023-08-16', 'person': 'Peter Esben-Petersen', 'event': 'Describes Russia\'s elections as a charade.'},
        ...
    ]
}
"""

In [11]:
# openai_system_prompt = """You are assisting dialogue activities to support a peaceful conflict settlement. 
# You will be given:
# 1. The topic name
# 2. Set of all positions considered to be under the topic.
# 3. Set of 3 positions considered to be representative for the topic.

# For each position in (2) , you will be given the following information:
# 1. The stakeholder who holds the position.
# 2. The article title that the position was extracted from.
# 3. The political issue discussed in the article.
# 4. The timestamp of the article.
# 5. The country code of the stakeholder.
# 6. Set of RDF triplets where the subject is the stakeholder of the position. These triplets demonstrate who and what the stakeholder is addressing in the position.

# Your task is to generate a summary of this topic IN ENGLISH. The format of the summary will be as follows:
# 1. Key points section, where you describe the key points of the topic.
# 2. Important stakeholders section, where you describe the key stakeholders involved the topic. Explain who they are, what their stance is and how they relate to each other.
# 3. Arguments section, where you talk about the key arguments and counterarguments.
# 4. Time analysis section, where you analyse how key arguments, relationships change over time(using the provided timestamps), for this part provide a HTML timeline visualization.

# Note: **Generate the summary in an HTML format, with each section seperated, using nice visualizations and eye catching color schemes**
# """

In [12]:
topic_id = 2
representative_positions = orig_topics.iloc[topic_id+1]["Representative_Docs"]
topic_name = orig_topics.iloc[topic_id+1]["Representation"]
# find index locations in topic_assignments that have the value topic_id
all_position_indexes = [i for i, x in enumerate(topic_assignments) if x == topic_id]
all_positions_df = df.iloc[all_position_indexes]
min_timestamp = all_positions_df["timestamp"].min()
max_timestamp = all_positions_df["timestamp"].max()
all_positions = []
for i, row in all_positions_df.iterrows():
    all_positions.append({
        "stakeholder": row["stakeholder"],
        "article_title": row["title"],
        "political_issue": row["political_issue"],
        "timestamp": row["timestamp"],
        "country_code": row["country.code"],
        "rdf_triplets": row["targets"]
    })

user_prompt = f"""Topic: {topic_name}
Representative positions: {representative_positions}
All positions: {all_positions}
"""

In [13]:
min_timestamp, max_timestamp

(Timestamp('2023-08-07 13:14:27'), Timestamp('2023-09-30 12:00:42'))

In [14]:
print(openai_system_prompt_json+"\n###\n\n"+user_prompt)

You are assisting dialogue activities to support a peaceful conflict settlement. 
You will be given:
1. The topic name
2. Set of all positions considered to be under the topic.
3. Set of 3 positions considered to be representative for the topic.

For each position in (2) , you will be given the following information:
1. The stakeholder who holds the position.
2. The article title that the position was extracted from.
3. The political issue discussed in the article.
4. The timestamp of the article.
5. The country code of the stakeholder.
6. Set of RDF triplets where the subject is the stakeholder of the position. These triplets demonstrate who and what the stakeholder is addressing in the position.

Your task is to generate a summary of this topic IN ENGLISH. The format of the summary will be as follows:
1. Key points section, where you describe the key points of the topic.
2. Important stakeholders section, where you describe the key stakeholders involved the topic. Explain who they ar

In [15]:
import json
with open("summary_format.json", "r") as f:
    summary_format = json.load(f)

In [17]:
openai_response = openai_client.chat.completions.create(
    model="o1-preview",
    messages=[
        {"role": "user", "content": openai_system_prompt_json+"\n###\n\n"+user_prompt},
    ],
    # response_format=summary_format
)
print(openai_response.choices[0].message.content)


```json
{
    "title": "Analysis of the Legitimacy of Elections in Russian-Occupied Ukrainian Territories",
    "key_points": [
        {
            "point": "Russia Conducts Elections in Occupied Ukrainian Territories",
            "explanation": "Russia organized elections in the regions of Ukraine that it currently occupies, including Donetsk, Luhansk, Kherson, and Zaporizhzhia. These elections are seen by Russia as a step towards integrating these territories into the Russian Federation."
        },
        {
            "point": "Ukraine and International Community Condemn the Elections",
            "explanation": "The Ukrainian government and numerous international stakeholders, including the European Union, United States, United Kingdom, G7 nations, and the United Nations, have condemned the elections. They assert that the elections are illegitimate and violate international law and Ukraine's sovereignty."
        },
        {
            "point": "Elections Violate Internatio

In [17]:
# import json

# summary_data = json.loads(openai_response.choices[0].message.content)
# summary_data

{'title': 'Electoral Process and Legitimacy in Russian-Occupied Ukrainian Territories',
 'key_points': [{'point': 'Controversial Elections',
   'explanation': 'The elections held in Russian-occupied Ukrainian territories have been widely criticized and deemed illegitimate by various international actors. These elections are perceived as a Kremlin effort to establish control before the upcoming presidential elections in March.'},
  {'point': 'Shift to Electronic Voting',
   'explanation': 'There is a growing focus on converting electoral infrastructure for electronic voting as a strategy to increase control and efficiency, potentially minimizing public dissent and international scrutiny.'},
  {'point': 'International Condemnation',
   'explanation': 'Numerous stakeholders, including the European Union, G7, and the United Nations, have expressed strong opposition against these elections, citing violations of international laws and undermining democratic principles.'},
  {'point': 'Politi

In [22]:
openai_response_str = openai_response.choices[0].message.content
openai_response_str = openai_response_str.replace("```json", "").replace("```", "")
print(openai_response_str)


{
    "title": "Analysis of the Legitimacy of Elections in Russian-Occupied Ukrainian Territories",
    "key_points": [
        {
            "point": "Russia Conducts Elections in Occupied Ukrainian Territories",
            "explanation": "Russia organized elections in the regions of Ukraine that it currently occupies, including Donetsk, Luhansk, Kherson, and Zaporizhzhia. These elections are seen by Russia as a step towards integrating these territories into the Russian Federation."
        },
        {
            "point": "Ukraine and International Community Condemn the Elections",
            "explanation": "The Ukrainian government and numerous international stakeholders, including the European Union, United States, United Kingdom, G7 nations, and the United Nations, have condemned the elections. They assert that the elections are illegitimate and violate international law and Ukraine's sovereignty."
        },
        {
            "point": "Elections Violate International Law

In [24]:
import json

summary_data = json.loads(openai_response_str)
summary_data

{'title': 'Analysis of the Legitimacy of Elections in Russian-Occupied Ukrainian Territories',
 'key_points': [{'point': 'Russia Conducts Elections in Occupied Ukrainian Territories',
   'explanation': 'Russia organized elections in the regions of Ukraine that it currently occupies, including Donetsk, Luhansk, Kherson, and Zaporizhzhia. These elections are seen by Russia as a step towards integrating these territories into the Russian Federation.'},
  {'point': 'Ukraine and International Community Condemn the Elections',
   'explanation': "The Ukrainian government and numerous international stakeholders, including the European Union, United States, United Kingdom, G7 nations, and the United Nations, have condemned the elections. They assert that the elections are illegitimate and violate international law and Ukraine's sovereignty."},
  {'point': 'Elections Violate International Law',
   'explanation': 'Many stakeholders claim that the elections contravene international agreements such

In [19]:
from jinja2 import Template
html_template = open("summary2.html").read()
template = Template(html_template)

In [23]:
rendered_html = template.render(openai_response_str)

ValueError: dictionary update sequence element #0 has length 1; 2 is required

In [108]:
# # # convert this html_string to a PDF file
from weasyprint import HTML

HTML(string=openai_response_str).write_pdf('output_o1.pdf')

In [112]:
summary_data["timeline"][3]

{'date': '2023-09-30',
 'person': 'Volodymyr Zelenskyy',
 'event': 'Announces opposition to the electoral events in occupied territories and seeks international support.'}

In [113]:
summary_data['arguments_against']['title']

'Arguments Against the Electoral Events'

In [114]:
rendered_html = template.render(summary_data)
HTML(string=rendered_html).write_pdf('output_test.pdf')