# Using Langchain

In [1]:
import pgvector
import os
import keys
import getpass
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain.document_loaders import DataFrameLoader
from langchain.docstore.document import Document
import pandas as pd
import psycopg2

In [None]:
api_key = keys.OPENAI_KEY

In [None]:
# Loading the data:
all_data = pd.read_csv('output_data/incomplete_All_Zones_Current_Season_reports_data.csv')
all_data.columns = columns=['date', 'zone', 'overall_risk', 'above_treeline_risk', 'near_treeline_risk', 'below_treeline_risk', 'bottom_line_text', 'problem_type_text', 'forecast_discussion_text']
all_data['combined_text'] = all_data['bottom_line_text'] + all_data['problem_type_text'] + all_data['forecast_discussion_text']

### Loading the text data from a pandas dataframe:

In [None]:
# Loading the data:
loader = DataFrameLoader(all_data, 'combined_text')
documents = loader.load()
# Splitting the data:
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
# Getting the embeddings:
embeddings = OpenAIEmbeddings(openai_api_key=api_key)

### Using QDrant (Vector DB)

In [None]:
qdrant = Qdrant.from_documents(
    docs, embeddings,
    path="qdrant_data",
    collection_name="nwac_qdrant",
)

In [None]:
from langchain import VectorDBQA, OpenAI

llm = OpenAI(openai_api_key=api_key)
qa = VectorDBQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    vectorstore=qdrant,
    return_source_documents=False
)

In [None]:
query = "What conditions cause the highest avalanche rating on Steven's Pass?"
results = qa.run(query)
print(results)

 High Danger. Overnight strong wind and heavy snow will make it easy to trigger large avalanches near and above treeline. Avoid being on or under slopes over 35 degrees where the wind has drifted recent snow into deeper slabs. If the new snow slides easily in snowpack tests or recent avalanches at any elevation, stick to lower-angle slopes.


In [2]:
q_url = 'https://6c3c7456-5df9-4a62-aa7a-961212c53a01.us-east-1-0.aws.cloud.qdrant.io'
qdrant_key = keys.QDRANT_KEY
qdrant = Qdrant.from_documents(
    docs, embeddings, 
    url=q_url, api_key=qdrant_key, 
    collection_name="nwac_current_season_reports",
)

NameError: name 'docs' is not defined