In [None]:
!pip install pypandoc
!pip install -qU \
    datasets==2.14.4 \
    langchain==0.0.274 \
    pinecone-client==2.2.2 \
    openai==0.27.9
!pip install pyrate-limiter

Collecting pyrate-limiter
  Downloading pyrate_limiter-3.1.0-py3-none-any.whl (23 kB)
Installing collected packages: pyrate-limiter
Successfully installed pyrate-limiter-3.1.0


In [None]:
import pypandoc
import re
from pyrate_limiter import Duration, Limiter, Rate
import time
import openai
import os

In [None]:
openai.organization = ""
openai.api_key = ""

IMPORT JOURNAL

In [None]:
docxFilename = '2015.docx'
output = pypandoc.convert_file(docxFilename, 'plain', outputfile="2015.txt")
assert output == ""

PREPARE THE DATA

In [None]:
journal = open("2015.txt","r")
journal_txt = journal.read()

In [None]:
splits = ['\n','|','--','+','=']
for s in splits:
  journal_txt = journal_txt.replace(s, "")
journal_txt = journal_txt.replace("  ", " ")
journal_list= journal_txt.split(" ")

In [None]:
def raw_text_to_entries(journal_list):

  months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
  dates = ['(st)', '(nd)', '(rd)', '(th)']
  month_index = {months[i]: i+1 for i in range(len(months)) }
  year=2015

  month=0
  date=0
  day=""
  date_new="0-0-0"
  location=""
  entries = []
  entry = []

  for i in range(len(journal_list)):

      if journal_list[i] in months:
        if journal_list[i-1][-4:] in dates:

          entries.append({
              'date' : date_new,
              'location' : location,
              'entry' : " ".join(entry[:-3])
          })
          entry=[]

          month = journal_list[i]
          date = journal_list[i-1][:-5]
          day = journal_list[i-2][:-1]
          l = 3
          while not len(journal_list[i-l])>0:
            l+=1
          location = journal_list[i-l]
          date_new = f"{month_index[month]:02}-{int(date):02}-{year}"
      else:
        entry.append(journal_list[i])

  for l in range(i,len(journal_list)):
    entry.append(journal_list[l])

  entries.append({
      'date' : date_new,
      'location' : location,
      'entry' : " ".join(entry)
  })
  entries = entries[1:]
  return entries

In [None]:
len(entries)

GENERATE PROMPTS

In [None]:
def complete_text(query_text, common_args):

    try:

        response = (
          openai.Completion.create(
              **common_args,
              prompt=query_text,
          )
        )
        answer = response['choices'][0]['text'].strip()
        # print(response)
        return answer

    except Exception as exc:
        print("Error completing text: %s", exc)


In [None]:
common_args = {
    "model": "text-davinci-003",
    "max_tokens": 2500,
}
results = []

for entry in entries:
  query_text = f"""
  I am the narrator of the given text. You are a compassionate psychologist who wants to get to know me by asking
  insightful, thought-provoking, meaningful questions about my day.
  Generate atleast 3 interesting thought-provoking question-answer pairs from the given text.
  Use the following pattern:
  'Question': How did you feel when you thought something might have happened to your father?
  'Answer': When I saw my father's leg immersed into the drain, I felt a wave of fear wash over me.
  I feared the worst and I thought he might be injured or worse, and I couldn't help but feel a sense of helplessness.
  Thankfully, nothing bad happened and I was relieved.

  Date: {entry['date']}
  Location: {entry['location']}
  Journal entry: {entry['entry']}
  """
  answer = complete_text(entry, common_args)
  time.sleep(15)
  text_file = open("data_01.txt", "a")
  text_file.write(answer)
  text_file.close()
  results.append(answer)


In [None]:
results

["Prompt: What motivated you to make this New Year's day perfect?\n  Completion: I was motivated to make this New Year's Day perfect because it was a special day, and I wanted to make the most of it. I wanted to start off the new year in the right way and create lasting memories with my family and friends.",
 "Prompt: How did you react when you saw your father's leg immersed into the drain?\n  Answer: When I saw my father's leg immersed into the drain, I felt shock and a wave of fear wash over me. I quickly ran to him to check if he was okay and my mind raced to think of the worst possible outcome. Thankfully nothing bad happened and I was immensely relieved.",
 'Prompt: What did you feel when you got the auto to the cinema?\nCompletion: When I saw the auto arrive, I felt a sense of relief wash over me. I was so sure that it would arrive and my faith was rewarded. I was excited for the movie and happy to be out with my friends on an adventure.']

CONVERT PROMPT TEXT FILE TO JSONL FILE

In [None]:
## TBD

PREPARE FOR FINE-TUNING JOB

In [None]:
res = openai.File.create(
    file=open("conversations.jsonl", "r"),
    purpose='fine-tune'
)
res

In [None]:
file_id = res["id"]
file_id

In [None]:
res = openai.FineTuningJob.create(training_file=file_id, model="gpt-3.5-turbo")
res

In [None]:
job_id = res["id"]
job_id

In [None]:
openai.FineTuningJob.retrieve(job_id)

In [None]:
openai.FineTuningJob.list_events(id=job_id)

In [None]:
from time import sleep

while True:
    res = openai.FineTuningJob.retrieve(job_id)
    if res["finished_at"] != None:
        break
    else:
        print(".", end="")
        sleep(100)

In [None]:
res

In [None]:
ft_model = res["fine_tuned_model"]
ft_model

In [None]:
ft_model = 'ft:gpt-3.5-turbo-0613:pinecone::7s8gnk9R'

FEED JOURNAL DATA TO PINECONE

In [None]:
import requests

res = requests.get('https://raw.githubusercontent.com/pinecone-io/examples/master/learn/generation/openai/fine-tuning/gpt-3.5-agent-training/chains.py')
with open("chains.py", 'w') as fp:
    fp.write(res.text)

INITIALIZE LANGCHAIN AGENT FOR CHAT SESSION

In [None]:
from langchain.agents import Tool
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from chains import VectorDBChain

llm = ChatOpenAI(
    temperature=0.5,
    model_name=ft_model
)

memory = ConversationBufferWindowMemory(
    memory_key="chat_history",
    k=5,
    return_messages=True,
    output_key="output"
)
# app.pinecone.io
vdb = VectorDBChain(
    index_name="llama-2-arxiv-papers",
    environment=os.getenv("PINECONE_ENV") or "YOUR_ENV",
    pinecone_api_key=os.getenv("PINECONE_API_KEY") or "YOUR_KEY"
)

vdb_tool = Tool(
    name=vdb.name,
    func=vdb.query,
    description="This tool allows you to get research information about LLMs."
)

In [None]:
from langchain.agents import AgentType, initialize_agent

agent = initialize_agent(
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    tools=[vdb_tool],
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method="generate",
    memory=memory,
    return_intermediate_steps=True
)

In [None]:
agent("tell me about Llama 2?")