In [None]:
# use only in google colab or jupyter notebook, you can remove it in a .py file
import nest_asyncio

nest_asyncio.apply()

In [None]:
!pip install llama_index
!pip install llama_index.llms.huggingface
!pip install llama_index.embeddings.huggingface
!pip install llama_index.core



In [None]:
import os
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

# Set your OpenAI API Key
os.environ["OPENAI_API_KEY"] = "hidden" #change later

# Initialize the OpenAI model and embeddings
llm = OpenAI(model="gpt-3.5-turbo")  # You can choose "gpt-3.5-turbo" if you want to save credits
embed_model = OpenAIEmbedding(model="text-embedding-ada-002")  # A cheaper embedding model

# Apply the settings
Settings.llm = llm
Settings.embed_model = embed_model


In [None]:
import json
from copy import deepcopy
from llama_index.core.schema import TextNode

json_file_path = "sample_data/sample_causal.json"

# Function to load JSON data from a file
def load_json_data(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)
    return data

# Load the JSON data
json_data = load_json_data(json_file_path)

In [None]:
def create_nodes_from_json(data):
    nodes = []
    for entry in data:
        text_content = f"Sentence: {entry['sentence']}\nCause: {entry['cause']}\nEffect: {entry['effect']}"
        node = TextNode(
            text=text_content,
            metadata={
                "cause": entry["cause"],
                "effect": entry["effect"]
            }
        )
        nodes.append(node)
    return nodes

docs = create_nodes_from_json(json_data)

# Print the first node's content to verify
print(docs[0].get_content())

Sentence: A rise in unemployment leads to an increase in crime rates.
Cause: rise in unemployment
Effect: increase in crime rates


In [None]:
from llama_index.core import VectorStoreIndex
# Build a vector store index using the generated nodes
index = VectorStoreIndex(docs)

In [None]:
!pip install llama_index.postprocessor.flag_embedding_reranker
!pip install git+https://github.com/FlagOpen/FlagEmbedding.git

Collecting git+https://github.com/FlagOpen/FlagEmbedding.git
  Cloning https://github.com/FlagOpen/FlagEmbedding.git to /tmp/pip-req-build-cg_kvovb
  Running command git clone --filter=blob:none --quiet https://github.com/FlagOpen/FlagEmbedding.git /tmp/pip-req-build-cg_kvovb
  Resolved https://github.com/FlagOpen/FlagEmbedding.git to commit 43d4154fcc6b049a475a0f4fb3fc0051851c79f8
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
from llama_index.postprocessor.flag_embedding_reranker import (
    FlagEmbeddingReranker,
)

reranker = FlagEmbeddingReranker(
    top_n=5,
    model="BAAI/bge-reranker-large",
)

In [None]:
from pydantic import BaseModel, Field
from typing import List


class Output(BaseModel):
    """Output containing the response, causes, effects, and confidence."""

    response: str = Field(..., description="The answer to the user's query.")

    causes: List[str] = Field(
        ...,
        description="The causes found in the dataset related to the user's query."
    )

    effects: List[str] = Field(
        ...,
        description="The effects found in the dataset related to the user's query."
    )

    confidence: float = Field(
        ...,
        description="Confidence value between 0-1 of the correctness of the result."
    )

    confidence_explanation: str = Field(
        ..., description="Explanation for the confidence score."
    )

# Use this updated Output class with your LLM as follows
sllm = llm.as_structured_llm(output_cls=Output)


In [None]:
query_engine = index.as_query_engine(
    similarity_top_k=5,
    node_postprocessors=[reranker],
    llm=sllm,
    response_mode="tree_summarize",  # you can also select other modes like `compact`, `refine`
)

In [None]:
response = query_engine.query("What is the reason for increased poverty?")
print(response)

{"response":"High inflation reduces purchasing power and increases poverty.","causes":["high inflation"],"effects":["reduced purchasing power and increased poverty"],"confidence":0.9,"confidence_explanation":"The answer is based on the provided information from multiple sources."}
