In [51]:
# Imports
import streamlit as st
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from qdrant_client import QdrantClient, models
import os
import qdrant_client
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [52]:
# create qdrant client
client = qdrant_client.QdrantClient(
        st.secrets["QDRANT_HOST"], 
        api_key=st.secrets["QDRANT_API_KEY"]
    )

In [None]:
# delete collection
client.delete_collection(collection_name=st.secrets["QDRANT_COLLECTION_NAME"])

In [54]:
# create collection and vector store

# define parameters for vectors
vectors_config = models.VectorParams(
    size=1536, # vector size for OpenAI
    distance=models.Distance.COSINE
)
# create collection
client.recreate_collection(
    collection_name=st.secrets["QDRANT_COLLECTION_NAME"],
    vectors_config=vectors_config,
)
# define embeddings
embeddings = OpenAIEmbeddings()
# create vector store
vector_store = Qdrant(
    client=client, 
    collection_name=st.secrets["QDRANT_COLLECTION_NAME"], 
    embeddings=embeddings,
)

In [55]:
# load webpages to vector store
header_text_length = len('NF ComicsHOMEcomicsMerchLoreLORE AIMapMORE Characters Contact Us Marketplace Gallery nfh iq join our community Buy on openseabuy now')
footer_text_length = len("<< Back to allrelated postsThe Heroes LeagueThe Hero's League is a prestigious and diverse group of superpowered individuals dedicated to protecting Genesis City and maintaining peace within The Frontier.AlphaDeep within The Frontier, an ancient and pervasive force akin to Ch’i courses through the very essence of every living being.DumaThe head of LD Industries, Alexander Yowleus Duma, or Lord Duma as he calls himself, is one of Genesis City's most prominent businessmen. Volt: Lore AIWant to know more about the lore of the Frontier? Volt is here to help you dive in and get your creative juices flowing.Try nownew frontiercomics Join Our Email ListThank you! Your submission has been received!Oops! Something went wrong while submitting the form.supportNFHEROES.iobecome a vipIP RightsT&CPrivacy© 2024 NFHeroes. All Rights Reserved. Powered by Gen City Labs")

def get_vectorstore_from_url(url):
    loader = WebBaseLoader(url)
    documents = loader.load()
    doc_text = documents[0].page_content
    cleaned_doc_string = doc_text[header_text_length:-(footer_text_length-100)]
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
    chunks = text_splitter.split_text(cleaned_doc_string)
    vector_store.add_texts(chunks)
    return chunks

In [56]:
# Create url list
url_base = 'https://www.newfrontiercomics.com/post/'
url_tails = ['the-abandon', 'the-heroes-league', 'alpha', 'duma', 'night-ape', 'bella-bash', 'griff',
             'rat-rider', 'uptown', 'downtown', 'the-docks', 'industrial-park', 'new-hope-junior-academy']
url_list = []
for tail in url_tails:
    url = f"{url_base}{tail}"
    url_list.append(url)

In [57]:
# Upload website content to vector store
for url in url_list:
    get_vectorstore_from_url(url)
    