In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!pip install langchain langchain-groq faiss-cpu pandas python-dotenv sentence-transformers -q


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.8.3 requires cubinlinker, which is not installed.
cudf 24.8.3 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.8.3 requires ptxcompiler, which is not installed.
cuml 24.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.8.3 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.8.3 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.6.0 which is incompatible.
distributed 2024.7.1 requires dask==2024.7.1, but you have dask 2024.9.1 which is incompatible.
google-cloud-bigquery 2.34.4 requires packaging<22.0dev,>=14.3, but you have packaging 24.2 which is incompatible.
jupyterlab 4.2.5 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.
jupyterlab-lsp 5.1.0 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 w

In [2]:
pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.7-py3-none-any.whl.metadata (2.9 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Downloading langchain_community-0.3.7-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)
Downloading pydantic_settings-2.6.1-py3-none-any.whl (28 kB)
Installing collected packages: httpx-sse, pydantic-settings, langchain_community
Successfully installed httpx-sse-0.4.0 langchain_community-0.3.7 pydantic-settings-2.6.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
from typing import List, Dict
import pandas as pd
import os
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.schema import Document

# Set your API key
os.environ["GROQ_API_KEY"] = <add your api here>


In [17]:
import pandas as pd
class RAGPipeline:
    def __init__(self):
        # Initialize text splitter
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=100,
            length_function=len
        )
        
        # Initialize embeddings
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-mpnet-base-v2"
        )
        
        # Initialize Groq LLM
        self.llm = ChatGroq(
            model_name="llama-3.2-3b-preview",  # You can also use "mixtral-8x7b-32768" or other Groq models
            temperature=0,
            streaming=True,
            max_tokens=4096
        )
    
    def decode_voting(self, row: pd.Series) -> str:
        """Decode voting data into a human-readable format."""
        vote_mapping = {'Y': 'Yes', 'N': 'No', 'X': 'Abstain', 'A': 'Absent', None: 'Not part of SC'}
        decoded_votes = []
        
        for col, value in row.items():
            if col not in ['COUNCIL', 'DATE', 'RESOLUTION']:
                decoded_value = vote_mapping.get(value, value)
                decoded_votes.append(f"{col}: {decoded_value}")
        
        return ", ".join(decoded_votes)

    def load_and_process_data(self, 
                            resolutions_path: str,
                            socio_econ_path: str,
                            voting_path: str) -> List[Document]:
        # Load datasets
        resolutions_df = pd.read_csv('/kaggle/input/ungraphss/Full_UN_Resolutions_Formatted.csv')
        socio_econ_df = pd.read_csv('/kaggle/input/ungraphss/Filtered_SocioEconomic_Countries.csv')
        voting_df = pd.read_csv('/kaggle/input/ungraphss/Encoded_UNSC_Voting_Data.csv')
        
        documents = []
        
        # Process resolutions
        for _, row in resolutions_df.iterrows():
            doc = Document(
                page_content=row["FullText"],
                metadata={"type": "Resolution", "ResolutionNumber": row["ResolutionNumber"]}
            )
            # Split long documents
            split_docs = self.text_splitter.split_documents([doc])
            documents.extend(split_docs)
        
        # Process socio-economic data
        for _, row in socio_econ_df.iterrows():
            content = f"Socio-economic data for {row['country']} includes: " + \
                     ", ".join([f"{col}: {row[col]}" for col in socio_econ_df.columns[1:]])
            documents.append(Document(
                page_content=content,
                metadata={"type": "Socio-Economic Data", "Country": row["country"]}
            ))
        
        # Process voting data
        for _, row in voting_df.iterrows():
            decoded_votes = self.decode_voting(row)
            content = f"Voting data for Resolution {row['RESOLUTION']} on {row['DATE']} includes: {decoded_votes}"
            documents.append(Document(
                page_content=content,
                metadata={"type": "Voting Data", "ResolutionNumber": row['RESOLUTION']}
            ))
        
        return documents

    def create_vectorstore(self, documents: List[Document]):
        # Create FAISS vectorstore
        return FAISS.from_documents(documents, self.embeddings)

    def setup_rag_chain(self, vectorstore):
        # Define prompt template
        prompt_template = """You are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: {context}
    
        Question: {question}
    
        Provide a thorough analysis. """
        
        PROMPT = PromptTemplate(
            template=prompt_template,
            input_variables=["context", "question"]
        )
        
        # Create chain
        chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(
                search_kwargs={"k": 7}  
            ),
            chain_type_kwargs={
                "prompt": PROMPT,
                "verbose": True
            }
        )
        
        return chain

    def query(self, chain, question: str) -> str:
        return chain.invoke({"query": question})["result"]

# Usage example with error handling
def main():
    try:
        # Initialize pipeline
        print("Initializing RAG pipeline...")
        rag = RAGPipeline()
        
        # Load and process documents
        print("Loading and processing documents...")
        documents = rag.load_and_process_data(
            "Full_UN_Resolutions_Formatted.csv",
            "Filtered_SocioEconomic_Countries.csv",
            "Encoded_UNSC_Voting_Data.csv"
        )
        print(f"Processed {len(documents)} documents")
        
        # Create vectorstore
        print("Creating vector store...")
        vectorstore = rag.create_vectorstore(documents)
        
        # Setup RAG chain
        print("Setting up RAG chain...")
        chain = rag.setup_rag_chain(vectorstore)
        
        # Example query
        question = "Go through some resolutions that mention Somalia and summarize what the UN thinks about it?"
        print(f"\nProcessing question: {question}")
        answer = rag.query(chain, question)
        print(f"\nAnswer: {answer}")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

Initializing RAG pipeline...




Loading and processing documents...
Processed 24164 documents
Creating vector store...
Setting up RAG chain...

Processing question: Go through some resolutions that mention Somalia and summarize what the UN thinks about it?


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: Committee”) to Somalia and States in the region in June 2002 and looking forward
to his report in this regard,
Acting under Chapter VII of the Charter of the United Nations,
1.
Requests the Secretary-General to establish, within one month from the
date of adoption of this resolution, in preparation for a Panel of Experts, a team of
experts consisting of two members for a period of 30 days, to provide the
Committee with an action plan detailing the resources and experti

In [18]:
# Initialize pipeline
print("Initializing RAG pipeline...")
rag = RAGPipeline()

# Load and process documents
print("Loading and processing documents...")
documents = rag.load_and_process_data(
    "Full_UN_Resolutions_Formatted.csv",
    "Filtered_SocioEconomic_Countries.csv",
    "Encoded_UNSC_Voting_Data.csv"
)
print(f"Processed {len(documents)} documents")

# Create vectorstore
print("Creating vector store...")
vectorstore = rag.create_vectorstore(documents)

# Setup RAG chain
print("Setting up RAG chain...")
chain = rag.setup_rag_chain(vectorstore)

# Example query
question = "Analyze Indias stance on sanctions."
print(f"\nProcessing question: {question}")
answer = rag.query(chain, question)
print(f"\nAnswer: {answer}")


Initializing RAG pipeline...




Loading and processing documents...
Processed 24164 documents
Creating vector store...
Setting up RAG chain...

Processing question: Analyze Indias stance on sanctions.


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: United Nations 
 
S/RES/1732 (2006)
 
 
 
Security Council 
 
Distr.: General 
21 December 2006 
 
 
 
06-67780 (E)     
*0667780* 
 
 Resolution 1732 (2006) 
 
 
 
 Adopted by the Security Council at its 5605th meeting, on 
21 December 2006 
 
 
 
The Security Council, 
 
Welcomes the report of the Informal Working Group on General Issues of 
Sanctions, established pursuant to paragraph 3 of the note by the President of the 
Security Council dated 17 April 2000 (S/2000/319).

2. 
Recalls the Informal Working Group on Gener

In [19]:
question = "Assess the diplomatic considerations that guide Russia's decisions regarding sanctions on states with which it has historical or economic ties?"
print(f"\nProcessing question: {question}")
answer = rag.query(chain, question)
print(f"\nAnswer: {answer}")



Processing question: Assess the diplomatic considerations that guide Russia's decisions regarding sanctions on states with which it has historical or economic ties?


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: for effective implementation of targeted financial sanctions related to proliferation,  
 
Expressing its gravest concern that the DPRK’s ongoing nuclear and ballistic 
missile-related activities have further generated increased tension in the region and 
beyond, and determining that there continues to exist a clear threat to international 
peace and security,  
 
Acting under Chapter VII of the Charter of the United Nations, and taking 
measures under its Article 41,  
S/RES/2094 (2013)

resolution 1572 (2004) and further rec

In [23]:
question = "Analyze Germany's economic policies and their impact on the European Union's economic stability and growth."
print(f"\nProcessing question: {question}")
answer = rag.query(chain, question)
print(f"\nAnswer: {answer}")



Processing question: Analyze Germany's economic policies and their impact on the European Union's economic stability and growth.


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: Socio-economic data for GERMANY includes: country: GERMANY, Region: WesternEurope, Surface area (km2): 357376, Population in thousands (2017): 82114, Population density (per km2, 2017): 235.6, Sex ratio (m per 100 f, 2017): 97.0, GDP: Gross domestic product (million current US$): 3363600, GDP growth rate (annual %, const. 2005 prices): 1.7, GDP per capita (current US$): 41686.2, Economy: Agriculture (% of GVA): 0.6, Economy: Industry (% of GVA): 30.5, Economy: Services and other activity (% of GVA): 68.9, Employment: Agriculture (% of employed): 1.4, Employment:

In [21]:
question = "Examine the USA's approach to military interventions and its impact on global security dynamics ?"
print(f"\nProcessing question: {question}")
answer = rag.query(chain, question)
print(f"\nAnswer: {answer}")


Processing question: Examine the USA's approach to military interventions and its impact on global security dynamics ?


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: international relations, from the threat or the use of force in any manner inconsistent
with the Purposes of the United Nations, and of peaceful settlement of international
disputes,
Reaffirming the need to adopt a broad strategy of conflict prevention, which
addresses the root causes of armed conflict and political and social crises in a
comprehensive manner, including by promoting sustainable development, poverty

stability and friendly relations among all States, and underlining the overriding
political, humanitarian and moral imperatives as well as the economic advant

In [22]:
question = "Analyze Russia's military strategy and its implications for regional security in Eastern Europe and beyond."
print(f"\nProcessing question: {question}")
answer = rag.query(chain, question)
print(f"\nAnswer: {answer}")



Processing question: Analyze Russia's military strategy and its implications for regional security in Eastern Europe and beyond.


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an expert analyst of UN resolutions and voting patterns. Based on the provided context, please give a detailed analysis of the question.
    
        Context: 1. 
Recalls the arrangements entered into under the agreements of 12 August 
2008 and of 8 September 2008;  
 
2. 
Calls for the provisions that were set out in paragraph 2 (a) of the 
Agreement on a Ceasefire and Separation of Forces signed in Moscow on 14 May 
1994 (S/1994/583) to be respected, pending consultations and agreement on a 
revised security regime, taking note of the recommendations on the security regime 
contained in the report of the Secretary-General of 4 February 2009;

enforcement and education;
10.
Condemns all violations of the Moscow Agree