In [1]:
import os
import sys
import warnings

from langchain.vectorstores.pgvector import PGVector, DistanceStrategy, Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, TextLoader
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI

from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from pgvector.sqlalchemy import Vector

In [3]:
#!jupyter nbextension enable --py widgetsnbextension

In [1]:
from platform import python_version
print(python_version())

3.9.18


In [3]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [28]:
# !pip install -r requirements.txt

In [33]:
# !pip install psycopg2-binary pgvector

In [13]:
# !pip install openai
# !pip install -U sentence-transformers
# !pip install transformers
# !pip install transformers[torch]
# !pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
# !conda install pytorch torchvision torchaudio cpuonly -c pytorch# 
#!conda install pytorch torchvision torchaudio cpuonly -c pytorch#


In [14]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [15]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
embeddings

In [18]:
documents = []

# TXT file
loader = TextLoader('./files/tale.txt')
documents.extend(loader.load_and_split(CharacterTextSplitter(chunk_size=200, chunk_overlap=0, separator = "\n\n")))

# PDF file
# loader = PyPDFLoader('./files/Understanding_Troubleshooting_HDD_SSD_Dell_US.pdf')
# documents.extend(loader.load_and_split(CharacterTextSplitter(chunk_size=256,
# chunk_overlap=20, separator = "\n\n")))

# Piece of text
# documents.append(Document(page_content="This might be a valuable text."))

In [17]:
#documents

In [34]:
# database: postgres
# schema: public

COLLECTION_NAME = "sample_collection"
# CONNECTION_STRING = "postgresql+psycopg2://postgres:fredsena@localhost:5432/postgres"

store = PGVector.from_documents(
    embedding=embeddings,
    documents=documents,
    distance_strategy = DistanceStrategy.COSINE, # The distance strategy to use. (default: COSINE)
    collection_name=COLLECTION_NAME, # The name of the collection to use. (default: langchain)
    #connection_string=os.getenv("PGVECTOR_CONNECTION_STRING"),
    connection_string= "postgresql://postgres:fredsena@localhost:5432/vectordb"
)

In [None]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = model.encode(sentences)
print(embeddings)

In [None]:
# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2",)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [5]:
model_id = "sentence-transformers/all-MiniLM-L6-v2"
hf_token = "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"

import requests

api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
headers = {"Authorization": f"Bearer {hf_token}"}


In [6]:
def query(texts):
    response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
    return response.json()


In [7]:
texts = ["How do I get a replacement Medicare card?",
        "What is the monthly premium for Medicare Part B?",
        "How do I terminate my Medicare Part B (medical insurance)?",
        "How do I sign up for Medicare?",
        "Can I sign up for Medicare Part B if I am working and have health insurance through an employer?",
        "How do I sign up for Medicare Part B if I already have Part A?",
        "What are Medicare late enrollment penalties?",
        "What is Medicare and who can get it?",
        "How can I get help with my Medicare Part A and Part B premiums?",
        "What are the different parts of Medicare?",
        "Will my Medicare premiums be higher because of my higher income?",
        "What is TRICARE ?",
        "Should I sign up for Medicare Part B if I have Veterans' Benefits?"]

output = query(texts)


In [None]:
output

In [18]:
import os
import requests

# https://dictionarygame-api.azurewebsites.net/admin/9440341601894424/games
# https://dictionarygame-api.azurewebsites.net/gamestatus/2023051105424097922400/allgameresults

def get_players_scores(game_code: str):
    """Get players scores data from the online game DictionaryGame.net, and return the list of winners"""
    api_endpoint = "https://dictionarygame-api.azurewebsites.net/admin/9440341601894424/games"
    #header_dic = {"Authorization": f'Bearer {os.environ.get("PROXYCURL_API_KEY")}'}

    # response = requests.get(
    #     api_endpoint, params={"url": linkedin_profile_url}, headers=header_dic
    # )

    response = requests.get(api_endpoint)

    if response.status_code == 404:
        return ""

    data = response.json()
    # data = {
    #     k: v
    #     for k, v in data.items()
    #     if v not in ([], "", "", None)
    #     and k in ["gameScore"]
    # }
    
    # if data.get("groups"):
    #     for group_dict in data.get("gameScore"):
    #         group_dict.pop("profile_pic_url")

    return data

In [19]:
result = get_players_scores('123456')
print(result)

[{'createdat': '2023-02-11T09:33:38.910Z', 'gamecode': '202302110933389102800'}, {'createdat': '2023-02-11T08:50:50.268Z', 'gamecode': '2023021108505026898600'}, {'createdat': '2023-02-10T05:22:46.585Z', 'gamecode': '2023021005224658535700'}, {'createdat': '2023-02-09T07:30:25.359Z', 'gamecode': '2023020907302535944300'}, {'createdat': '2023-02-09T07:29:48.089Z', 'gamecode': '202302090729480895100'}, {'createdat': '2023-02-09T06:41:08.288Z', 'gamecode': '2023020906410828831100'}, {'createdat': '2023-02-09T06:33:11.618Z', 'gamecode': '2023020906331161814500'}, {'createdat': '2023-02-09T06:25:02.956Z', 'gamecode': '2023020906250295680500'}, {'createdat': '2023-02-09T06:11:04.109Z', 'gamecode': '2023020906110410991400'}, {'createdat': '2023-02-08T21:18:00.118Z', 'gamecode': '2023020821180011847200'}, {'createdat': '2023-02-08T20:56:22.279Z', 'gamecode': '2023020820562227961600'}, {'createdat': '2023-02-08T05:54:40.018Z', 'gamecode': '2023020805544001850800'}]


In [50]:
from pydantic import BaseModel, Field

class AdminGameListInput(BaseModel):
    """Input for admin game code."""

    gamecode: str = Field(..., gamecode="Admin code to retrive list of game codes")


In [51]:
import os
import requests
import json
from langchain.agents import Tool
from langchain.tools import BaseTool
from typing import Optional, Type

class AdminGameListTool(BaseTool):
    name = "Get all games created by the Game administrator"
    description = "Useful for when you need to get a list of all game codes created by the Game administrator from the online game DictionaryGame.net webgame"

    def _run(self, admin_code: str):
        
        def get_game_list(admin_code: str):            
            api_endpoint = f"https://dictionarygame-api.azurewebsites.net/admin/{admin_code}/games"            
            response = requests.get(api_endpoint)
            
            if response.status_code == 404:
                return ""
                
            # data = response.json()
            return response.json()        

        return get_game_list(admin_code)
        
    def _arun(self, admin_code: str):
        raise NotImplementedError("This tool does not support async")

    args_schema: Optional[Type[BaseModel]] = AdminGameListInput

admin_gamelist_getter = AdminGameListTool()

In [11]:
from pydantic import BaseModel
from typing import Dict, List

# class GameScoreModel(BaseModel):
#     gameScore: Dict[str, str]

class GameScoreItem(BaseModel):
    playername: str
    score: str
    
class Model(BaseModel):
    gameScore: List[GameScoreItem]


In [52]:
from pydantic import BaseModel, Field

class PlayersScoresInput(BaseModel):
    """Input for game code check."""

    gamecode: str = Field(..., gamecode="The game code to get the players scores of the game")


In [53]:
import os
import json
import requests
from langchain.agents import Tool
from langchain.tools import BaseTool
from pydantic import ValidationError

class PlayersScoresTool(BaseTool):
    name = "Get Players Scores"
    description = "Useful for when you need to get a list of Players Scores from the online game DictionaryGame.net webgame"

    def _run(self, game_code: str):
        
        def get_players_scores(game_code: str):            
            api_endpoint = f"https://dictionarygame-api.azurewebsites.net/gamestatus/{game_code}/allgameresults"        
            
            response = requests.get(api_endpoint)
            
            if response.status_code == 404:
                return ""
                
            data = response.json()
            data = {
                k: v
                for k, v in data.items()
                if v not in ([], "", "", None)
                and k in ["gameScore"]
            }        
            # return data

            # print(response.json())
            
            try:
                # Attempt to parse the response JSON into the Pydantic model
                # game_score_data = GameScoreModel(**response.json())
                # game_score_data = Model(**data)
                # return game_score_data.gameScore
                return json.dumps(data)
            except ValidationError as e:
                # Handle validation errors here (e.g., log or raise an exception)
                print(f"Validation error: {e}")
                return {}        

        return get_players_scores(game_code)
        
    def _arun(self, game_code: str):
        raise NotImplementedError("This tool does not support async")

    args_schema: Optional[Type[BaseModel]] = PlayersScoresInput
    

players_scores_getter = PlayersScoresTool()

In [20]:
result = players_scores_getter._run('2023021005224658535700')
print(result)

{"gameScore": [{"playername": "Keyla", "score": "90"}, {"playername": "Fred", "score": "50"}, {"playername": "Lucas", "score": "50"}]}


In [27]:
fixed_prompt = '''You are an Assistant designed to be able to assist with a specific range of tasks, from answering simple questions to providing explanations and discussions on a specific and limited range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
Assistant should use the available tools for questions about these topics. If the tool does not returns data, just say that information was not found.
Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a specific and limited range of questions. 
Overall, Assistant is a powerful system that can help with a wide range of tasks and provide valuable insights and information on a specific range of topics. Whether you need help with a specific question or just want to have a information about a particular topic, Assistant is here to assist.'''


In [54]:
# Set up the turbo LLM
turbo_llm = ChatOpenAI(
    temperature=0,
    model_name='gpt-3.5-turbo'
)

In [55]:
from langchain.agents import AgentType, initialize_agent
from langchain.prompts.chat import SystemMessagePromptTemplate
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

# conversational agent memory
memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=3,
    return_messages=True
)

tools = [admin_gamelist_getter, players_scores_getter]

# tools = [players_scores_getter]

conversational_agent = initialize_agent(
    agent='chat-conversational-react-description',
    # agent='openai-multi-functions',
    tools=tools,
    llm=turbo_llm,
    verbose=True,
    max_iterations=3,
    memory=memory,
    # agent_type=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION
)

In [56]:
conversational_agent.agent.llm_chain.prompt.messages[0].prompt.template = fixed_prompt

In [40]:
conversational_agent("who won the game number: 2023021005224658535700? return only the name of the player")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Get Players Scores",
    "action_input": "2023021005224658535700"
}[0m
Observation: [33;1m[1;3m{"gameScore": [{"playername": "Keyla", "score": "90"}, {"playername": "Fred", "score": "50"}, {"playername": "Lucas", "score": "50"}]}[0m
Thought:[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "Keyla"
}[0m

[1m> Finished chain.[0m


{'input': 'who won the game number: 2023021005224658535700? return only the name of the player',
 'chat_history': [HumanMessage(content='based on the admin code: 9440341601894424 return the data', additional_kwargs={}, example=False),
  AIMessage(content='Here are the games created by the Game administrator with the admin code 9440341601894424:\n\n- Game Code: 202302110933389102800, Created At: 2023-02-11T09:33:38.910Z\n- Game Code: 2023021108505026898600, Created At: 2023-02-11T08:50:50.268Z\n- Game Code: 2023021005224658535700, Created At: 2023-02-10T05:22:46.585Z\n- Game Code: 2023020907302535944300, Created At: 2023-02-09T07:30:25.359Z\n- Game Code: 202302090729480895100, Created At: 2023-02-09T07:29:48.089Z\n- Game Code: 2023020906410828831100, Created At: 2023-02-09T06:41:08.288Z\n- Game Code: 2023020906331161814500, Created At: 2023-02-09T06:33:11.618Z\n- Game Code: 2023020906250295680500, Created At: 2023-02-09T06:25:02.956Z\n- Game Code: 2023020906110410991400, Created At: 202

In [58]:
conversational_agent("based on the admin code: 9440341601894424 return the data, then search based on gamecode data who won each game")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Get Players Scores",
    "action_input": "202302110933389102800,2023021108505026898600,2023021005224658535700,2023020907302535944300,202302090729480895100,2023020906410828831100,2023020906331161814500,2023020906250295680500,2023020906110410991400,2023020821180011847200,2023020820562227961600,2023020805544001850800"
}[0m

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
conversational_agent.run("based on the admin code: 9440341601894424 create a list informing \
only the name of the player who won for each game and its respective game date. Return a list in json format")

In [None]:
conversational_agent("based on the admin code: 9440341601894424 return a list of all games")

In [None]:
conversational_agent.run("combining the tools available based on the admin code: 9440341601894424 return the data and take each gamecode tell me who won each game")

In [None]:
conversational_agent.run("who won the game number: 2023020906331161814500? return only the name of the player")