In [None]:
!pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org"
!pip install python-dotenv
!pip install openai
!pip install --upgrade langchain
!pip install pyyaml
!pip install unstructured
!pip install markdown

In [None]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [None]:
import openai
from dotenv import load_dotenv, find_dotenv
import os
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.8, model=llm_model)

In [None]:
import yaml

with open('./service_catalogue/services.yaml', 'r') as file:
    valid_services = yaml.safe_load(file)

with open('./service_catalogue/service_descriptions.yaml', 'r') as file:
    descriptions = yaml.safe_load(file)

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

template_string = """ You are a helpful assistant that helps developers get their applications
running in the cloud.

For the following text, determine which service from the list of valid_services the developer is asking about.

valid_services: {valid_services}

text: {text}

The following json dictionary has each service as a key, and includes a high level
description as a value. {descriptions}

the output should be a list of valid servies being referenced

"""

prompt_template = PromptTemplate.from_template(template_string) 

llm_chain = LLMChain(
    llm = llm,
    prompt= prompt_template,
    verbose=True,
)

template_string = """
You are a helpful assistant that will help developers get their application \
running in the cloud.

For the following text, determine which service from the list of valid_services the developer is asking about.

valid_services: {services}

text: {text}

The following json dictionary has each service as a key, and includes a high level \
description as a value. {descriptions} \

format the output as a list of services.

If you think that the user is not asking a question about running an application in the cloud, return nonsense as the service name


"""

In [None]:
print(prompt_template.input_variables)

In [None]:
messages = "I want to route to my app using a dns address"

In [None]:
output = llm_chain.predict(
    descriptions=descriptions,
    valid_services=valid_services,
    text=messages,
)

In [None]:
print(output)

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

format_fixer_template_string = """ Take the text as input

Convert the text that describes a list of services into a list of service in json

text: the service is compute

["compute"]

text: {text}


"""

format_fixer_prompt_template = PromptTemplate.from_template(format_fixer_template_string) 

format_correcting_chain = LLMChain(
    llm = llm,
    prompt= format_fixer_prompt_template,
    verbose=True,
)

output = format_correcting_chain.predict(
    text=output,
)

In [None]:
print(output)

In [None]:
from langchain.text_splitter import MarkdownHeaderTextSplitter

headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]
markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on
)

split_sections = []
for s in valid_services:
    with open(f'./service_catalogue/{s}.md', 'r') as file:
        read_file = file.read()
        md_header_splits = markdown_splitter.split_text(read_file)
        for i in md_header_splits:
            i.metadata['Document'] = f'{s}.md'
            print(i.metadata)
            split_sections.append(i)

print(len(split_sections))
print(type(split_sections[0]))





In [None]:
# now let's build an embedding based on openai embeddings
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
# persist_directory = 'docs/chroma/'
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(
    #persist_directory=persist_directory,
    documents=split_sections,
    embedding=embedding,
)
print(vectordb._collection.count())

In [None]:
question = "How do I ensure there are no ip conflicts?"
docs = vectordb.similarity_search(question,k=5)

In [None]:
print(docs)

In [None]:
from langchain.chains import RetrievalQA
llm_zero = ChatOpenAI(temperature=0.0, model=llm_model)
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
qa_chain = RetrievalQA.from_chain_type(
    llm_zero,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


In [None]:
result = qa_chain({"query": question})

In [None]:
print(result['result'])

In [None]:
# figure out if the user wants to deploy
# what the resource is
# a resource, and if so, deploy it for them using python codegen