In [25]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import os
from dotenv import load_dotenv

load_dotenv()

QDRANT_HOST_URL = os.getenv('QDRANT_HOST_URL')

qdrant_client = QdrantClient(
  url=QDRANT_HOST_URL
)

if not qdrant_client.collection_exists("documents-collection"):
  qdrant_client.create_collection(
      collection_name="documents-collection",
      vectors_config=models.VectorParams(
        size=1536,
        distance=models.Distance.COSINE
      )
  )

In [26]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter

pdf = "assets/StockMarketAnalysis.pdf"
text = ""

with open(pdf,'rb') as file:
    pdf_reader = PdfReader(file)

    for page in pdf_reader.pages:
        text += page.extract_text()

text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
      )
chunks = text_splitter.split_text(text)

Created a chunk of size 1371, which is longer than the specified 1000


In [None]:
# Converting chunks into points which are central entity of Qdrant
# and putting them up on vector store

import uuid
from openai import OpenAI
from qdrant_client.http.models import PointStruct
import os
from dotenv import load_dotenv

load_dotenv()

points = []
open_ai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

print(len(chunks), "chunks to embed")

for idx, chunk in enumerate(chunks):
  print("Embedding chunk", idx+1, "/", len(chunks))
  response = open_ai_client.embeddings.create(
    input=chunk,
    model="text-embedding-ada-002"
  )
  embeddings = response.data[0].embedding
  point_id = str(uuid.uuid4())  # Generate a unique ID for the point
  points.append(PointStruct(
    id=point_id,
    payload={"text": chunk},
    vector=embeddings
    )
  )

qdrant_client.upsert(
  collection_name="documents-collection",
  wait=True,
  points=points
)

In [30]:
query = "What is a candlistick chart?"

response = open_ai_client.embeddings.create(
  input=chunk,
  model="text-embedding-ada-002"
)

embeddings = response.data[0].embedding
search_result = qdrant_client.query_points(
    collection_name="documents-collection",
    query=embeddings,
    limit=3
).points

prompt=""
for result in search_result:
    prompt += result.payload['text']
concatenated_string = " ".join([prompt,query])
completion = open_ai_client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": concatenated_string}
    ]
)

print(completion.choices[0].message.content)

A candlestick chart is a type of financial chart that is used to represent the price movements of a security, derivative, or currency. It is named for the way it looks, as each bar resembles a candle with a wick (the thin vertical line) at both ends and a body (the wider rectangle) in between. Candlestick charts provide more information than traditional line charts, as they display the open, high, low, and close prices for each time period. Traders use candlestick charts to analyze price patterns and make informed trading decisions.
