In [None]:
# !pip install langchain
# !pip install python-dotenv
# !pip install openai
# !pip install pypdf
# !pip install tiktoken
# !pip install chromadb


#### IMPORTS

In [2]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import CSVLoader
import os
import openai
from dotenv import load_dotenv
from tqdm import tqdm
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
import uvicorn
import nest_asyncio
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI
from pydantic import BaseModel
from SPARQLWrapper import SPARQLWrapper,JSON

##### DIRECTORIES & ARRAYS

In [4]:
#directories
pdf_directory="./pdf"
csv_directory="./csv"
general_directory = "./fifa_wc_2018"
chroma_directory = "./chroma"


#arrays
csv_data_list = []
csv_files = [os.path.join(csv_directory, file) for file in os.listdir(csv_directory) if file.endswith(".csv")]
all_data=[]
record=[]

##### API_KEY

In [5]:
load_dotenv()
OpenAI.api_key = os.getenv("OPENAI_API_KEY")

## LOADERS 

##### METHOD LOAD PDF 

In [6]:
def LoadPDF(path):
    loader = DirectoryLoader(path, glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader)
    return loader.load()

##### METHOD LOAD CSV FROM A DIRECTORY

In [7]:
def LoadCSV(path):
    loader = CSVLoader(file_path=path, encoding="utf-8", csv_args={'delimiter': ','})
    return loader.load()

##### METHOD LOAD CSVS FROM SUBDIRECTORIES AND SUB-SUBDIRECTORIES 

In [8]:
def load_csv_directory(directory_path):
    
    # Go through all general directory
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".csv"):
                csv_file_path = os.path.join(root, file)
                # print(f"Loading {csv_file_path}...")
                
                csv_data = LoadCSV(csv_file_path)
                # csv_data_list.append(csv_data)
    
    return csv_data

##### LOADING

In [9]:
loaded_pdfs=LoadPDF(pdf_directory)

#tqdm show progress bar
for csv_file in tqdm(csv_files):
    csv_data1 = LoadCSV(csv_file)
    csv_data_list.append(csv_data1)

csv_data2=load_csv_directory(general_directory)
csv_data_list.append(csv_data2)

100%|██████████| 14/14 [02:07<00:00,  9.09s/it]
100%|██████████| 42/42 [00:01<00:00, 23.87it/s] 


In [8]:
print(csv_data1)
print(csv_data2)
print(loaded_pdfs)

[Document(page_content='Year: 2022\nHost: Qatar\nTeams: 32\nChampion: Argentina\nRunner-Up: France\nTopScorrer: Kylian Mbappé - 8\nAttendance: 3404252\nAttendanceAvg: 53191\nMatches: 64', metadata={'source': 'csv\\world_cup.csv', 'row': 0}), Document(page_content='Year: 2018\nHost: Russia\nTeams: 32\nChampion: France\nRunner-Up: Croatia\nTopScorrer: Harry Kane - 6\nAttendance: 3031768\nAttendanceAvg: 47371\nMatches: 64', metadata={'source': 'csv\\world_cup.csv', 'row': 1}), Document(page_content='Year: 2014\nHost: Brazil\nTeams: 32\nChampion: Germany\nRunner-Up: Argentina\nTopScorrer: James Rodríguez - 6\nAttendance: 3429873\nAttendanceAvg: 53592\nMatches: 64', metadata={'source': 'csv\\world_cup.csv', 'row': 2}), Document(page_content='Year: 2010\nHost: South Africa\nTeams: 32\nChampion: Spain\nRunner-Up: Netherlands\nTopScorrer: Wesley Sneijder, Thomas Müller... - 5\nAttendance: 3178856\nAttendanceAvg: 49670\nMatches: 64', metadata={'source': 'csv\\world_cup.csv', 'row': 3}), Documen

## SPLITTING

In [10]:
def Split(pdfs):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    splitted = text_splitter.split_documents(pdfs)
    return splitted

In [11]:
splitted=Split(loaded_pdfs)
print(splitted)



## VECTOR STORAGE

In [15]:
all_data.extend(splitted)
all_data.extend(csv_data1)
all_data.extend(csv_data2)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents=all_data,embedding=embeddings,persist_directory=chroma_directory)
vector=Chroma(persist_directory=chroma_directory, embedding_function=embeddings)

AuthenticationError: Incorrect API key provided: sk-PGT0a***************************************4mvR. You can find your API key at https://platform.openai.com/account/api-keys.

## CONVERSATIONAL AGENT

In [12]:

agent = ConversationalRetrievalChain.from_llm(
    llm = OpenAI(),
    retriever=vector.as_retriever()
)


NameError: name 'vector' is not defined

In [51]:
def Chat(msg,record):
    answer = agent({"question": msg, "chat_history": record})
    record.append((msg,answer["answer"]))

    return answer["answer"]

In [52]:
Chat("Quien gano la copa mundial de futbol del 2010?",record)

' España ganó la Copa Mundial de Fútbol de 2010.'

In [66]:
Chat("Quien gano la copa mundial de futbol del 2010?",record)

' La selección de España ganó la Copa Mundial de Fútbol del 2010.'

## ANZOGRAPH

##### Connection

In [5]:
def ANZO_Connection():
    url = "http://localhost:80/sparql"
    username = "admin"
    password = "Passw0rd1"

    try:
        sparql = SPARQLWrapper(url)

        sparql.setCredentials(username, password)

        sparql.setQuery("SELECT * WHERE { ?s ?p ?o } LIMIT 1")
        sparql.setReturnFormat(JSON)
        
        results = sparql.query().convert()
        
        if results and "results" in results:
            return True
        else:
            return False
    except Exception as e:
        print(f"An error occurred: {e}")

##### query send to anzograph

In [6]:
def SendQuery(connection,tri):
    if connection:
        url = "http://localhost:80/sparql"
        username = "admin"
        password = "Passw0rd1"

        sparql = SPARQLWrapper(url)
        sparql.setCredentials(username, password)
        sparql.method = 'POST'
        que = f"""
                INSERT DATA
                {{
                GRAPH <FIFA_WC>
                {{
                    _:sujeto <sujeto> "{tri[0]}" ;
                            <verbo> "{tri[1]}" ;
                            <predicado> "{tri[2]}" .
                }}
                }}
            """
        sparql.setQuery(que)
        sparql.setReturnFormat(JSON)
        results =  sparql.query().convert()
        if results in results:
            return "saved successfully"
        else:
            return "error saving"

##### get words

In [None]:
import datetime
current_date = datetime.datetime.now().date()
target_date = datetime.date(2024, 6, 12)

if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"


def get_completion(prompt, model=llm_model):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0,
    )
    return response.choices[0].message["content"]

In [None]:
def triplet_maker(sentence):
    pr ="""
    tengo la oracion: 
    "{sentence}"
    devuelve una lista de tres palabras separadas por una coma que sean las mas principales de esa oracion
    """
    res = get_completion(pr)

    triplet = res.split(',')
    return triplet



## FAST API

In [74]:

app = FastAPI()
origins = ["null"]
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
class prompt(BaseModel):
    user_prompt : str


@app.post("/prompt")
async def Post_prompt(prompt : prompt):
    return {"response" : Chat(prompt.user_prompt,record),
            "anzograph" : SendQuery(ANZO_Connection(),triplet_maker(prompt.user_prompt))}

nest_asyncio.apply()
uvicorn.run(app,port=8855)



INFO:     Started server process [24888]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8855 (Press CTRL+C to quit)


INFO:     127.0.0.1:60883 - "OPTIONS /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60883 - "POST /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60884 - "OPTIONS /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60884 - "POST /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60939 - "OPTIONS /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60939 - "POST /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60940 - "OPTIONS /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:60940 - "POST /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:61436 - "OPTIONS /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:61436 - "POST /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:61437 - "OPTIONS /prompt HTTP/1.1" 200 OK
INFO:     127.0.0.1:61437 - "POST /prompt HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [24888]
