In [43]:
import os
import getpass
import api_keys
from langchain.retrievers import EnsembleRetriever
from langchain.chat_models import init_chat_model
from langchain_community.retrievers import WikipediaRetriever
import requests
from langchain.schema.document import Document
from langchain.schema.retriever import BaseRetriever
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from api_keys import GROQ_API_KEY, OPENSANCTIONS_API_KEY

# if not os.environ.get("GROQ_API_KEY"):
#   os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")

# api_key = input("Enter your OpenSanctions API Key: ")
# os.environ["OPENSANCTIONS_API_KEY"] = api_key

In [44]:
model = init_chat_model("deepseek-r1-distill-llama-70b", model_provider="groq")

In [45]:
transaction = input("Enter the transaction: ")

In [59]:
from pydantic import BaseModel, Field
class ResponseFormatter(BaseModel):
    """Always use this tool to structure your response to the user."""
    company_name: str = Field(description="name of the received company")
    person_name: str = Field(description="Person involved in the transaction, return an empty string if you didn't find any")


model_with_tools = model.bind_tools([ResponseFormatter])
entities = model_with_tools.invoke(transaction)
entities.tool_calls[0]["args"]

{'company_name': 'Tesla, Inc.', 'person_name': ''}

In [None]:
import json 

class OpenSanctionsRetriever(BaseRetriever):

    def _get_relevant_documents(self, person_name, company_name):
        """
        Queries OpenSanctions API and returns relevant documents.
        """
        # person_name=""
        # company_name=""
        # if "Company:" in query or "Person:" in query:
        #     parts = query.split(",")
        #     for part in parts:
        #         if "Company:" in part:
        #             company_name = part.split("Company:")[-1].strip()
        #             print(company_name)
        #         elif "Person:" in part:
        #             person_name = part.split("Person:")[-1].strip()
        #             print(person_name)


        headers = {"Authorization": OPENSANCTIONS_API_KEY}
        # params = {"q": query}

        query = {
            "queries": {
                "query-A": {"schema": "Person", "properties": {"name": [person_name]}},
                "query-B": {"schema": "Company", "properties": {"name": [company_name]}},
            }
        }
        response = requests.post(
            "https://api.opensanctions.org/match/default", headers=headers, json=query
        )
        # if response.status_code != 200:
        #     return []
        
        response.raise_for_status()
        response_json = response.json()
        
        with open('convert.txt', 'w') as convert_file: 
            convert_file.write(json.dumps(response_json))
        # print("\nFull API Response:")
        # pprint(response_json, sort_dicts=False)

        # if not response_json.get("results"):
        #     print("empty list lool")
        #     return []

        documents = []
        # print("outside first for")
        for query_id, query_response in response_json["responses"].items():
            # print(f"\nResults for query {query_id}:")
            # results = []
            
            for result in query_response["results"]:
                # print("in for result")
                entity_topics = set(result["properties"].get("topics", []))  # Fix extraction
                entity_datasets = set(result.get("datasets", []))  # Fix extraction

                # print(f"\n🔎 Checking Entity: {result['id']}")
                # print(f"📌 Topics: {entity_topics}")
                # print(f"📌 Datasets: {entity_datasets}")
                
                name_to_store_page_content=result["properties"].get("name")
                # print("hehe", name_to_store_page_content[0])
                entity_info = {
                    "id": result["id"],
                    "name": result["properties"].get("name", []),
                    "match": result["match"],
                    "topics": list(entity_topics),  
                    "datasets": list(entity_datasets),  
                }
                doc = Document(page_content=f"Sanctions data for {name_to_store_page_content}", metadata=entity_info)
                # print("doc = ", doc)
                documents.append(doc)
                # results.append(entity_info)
        return documents

In [48]:
open_test = OpenSanctionsRetriever()
out = open_test._get_relevant_documents(entities.tool_calls[0]["args"]['person_name'],entities.tool_calls[0]["args"]['company_name'])

In [49]:
# Step 1: Load and vectorize Instructions.md
loader = TextLoader("Instructions.md")
docs = loader.load()

# Use a local embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Store Instructions.md in FAISS and create a retriever
vectorstore = FAISS.from_documents(docs, embedding_model)


In [50]:
# Step 1: Load and vectorize Instructions.md
loader = TextLoader("convert.txt")
docs = loader.load()

# Use a local embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Store Instructions.md in FAISS and create a retriever
vectorstore_sanction = FAISS.from_documents(docs, embedding_model)

In [51]:
instructions_retriever = vectorstore.as_retriever()
# OpenSanctions_retriever = OpenSanctionsRetriever()
sanction_text_ret = vectorstore_sanction.as_retriever()
wiki_retriever = WikipediaRetriever()

In [52]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[wiki_retriever, sanction_text_ret, instructions_retriever], weights=[0.25, 0.5, 0.25]
)

In [53]:
prompt = ChatPromptTemplate.from_template(
    """
    You are an agent that helps data analysts in a financial institution by risk scoring a transaction 
    among entities/corporations. It can also include individuals. Given an input transaction, you need to output 
    the risk score (0 to 1) of the transaction, confidence score (0 to 1) and reason for your answer. Use the ensemble retriever
    to get relevant documents to help you make a decision. One of the sources is OpenSanctions API. It need two parameters: Person: <name> and Company: <name>.
    Context: {context}
    Transaction: {transaction}
    """
)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chain_test = (
    {"context": ensemble_retriever | format_docs, "transaction": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [54]:
chain_test.invoke(transaction)

'<think>\nOkay, so I have to help assess the risk score for this transaction. Let\'s break down the information given. The transaction ID is TXN20250322113045, which happened on March 22, 2025. The sender is Tesla, Inc., with the identifier TESLA987654321, and the receiver is Adani Green Energy Ltd., with the identifier ADANIGREEN123456. The bank used is JPMorgan Chase Bank, USA, and the State Bank of India. The amount is $500 million in USD, transferred via wire transfer for an investment in renewable energy collaboration, and the status is completed.\n\nFirst, I need to extract key entity information. The sender is Tesla, Inc., a well-known American company, and the receiver is Adani Green Energy Ltd., which is an Indian company. Both are corporations. I should check if either of these entities is on any sanctions lists or if they\'ve been involved in any suspicious activities.\n\nLooking at the transaction details, the amount is quite large—$500 million. That\'s significant and coul