In [258]:
import pandas as pd
import openai
import praw
import os
import re

pd.set_option('max_colwidth', 100)
pd.set_option('display.max_columns', None)

In [259]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.indexes.vectorstore import VectorstoreIndexCreator
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI

In [260]:
REDDIT_CLIENT_ID = os.environ.get('REDDIT_CLIENT_ID')
REDDIT_CLIENT_SECRET = os.environ.get('REDDIT_CLIENT_SECRET')
REDDIT_USER_AGENT = os.environ.get('REDDIT_USER_AGENT')

reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT)

## Query

In [261]:
query = '''
Where can I find a good gluten-free pizza in Stockholm?
'''

## Topic Retrieval

In [262]:
def generate_topics(query, model="gpt-3.5-turbo"):

    messages = [
        {"role": "user", "content": f"Take this query '{query}' and return a list of 10 simple to understand topics (4 words or less) to input in Search so it returns good results."},
    ]

    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0
    )

    response_message = response["choices"][0]["message"]["content"]

    topics = re.sub(r'^\d+\.\s*', '', response_message, flags=re.MULTILINE).split("\n")

    return topics

In [263]:
topics = generate_topics(query)
topics = [topic.strip() for topic in topics]
topics = [topic[1:-1] if (topic.startswith('"') and topic.endswith('"')) or (topic.startswith("'") and topic.endswith("'")) else topic for topic in topics]

topics = [re.sub(r'[^a-zA-Z0-9\s]', ' ', topic) for topic in topics]

topics

['Gluten free pizza Stockholm',
 'Best gluten free pizza',
 'Pizza without gluten',
 'Stockholm pizza options',
 'Gluten free restaurants Stockholm',
 'Pizza delivery gluten free',
 'Stockholm pizza places',
 'Gluten free pizza toppings',
 'Pizza crust gluten free',
 'Vegan gluten free pizza']

## Relevant Subreddit Retrieval

In [264]:
posts = []

for topic in topics:
    for post in reddit.subreddit("all").search(
    topic, limit=10):

        post.comments.replace_more(limit=3)

        # TODO: A check can be added here once we have enough comments for a post. This should drastically reduce the number of requests to the API and time spent.
        for comment in post.comments.list():
            posts.append([comment.id, post.id, post.title, post.author, comment.body])

posts = pd.DataFrame(posts,columns=['source', 'post_id', 'title', 'author', 'text'])

In [265]:
# Drop empty texts or ["deleted"] texts
posts = posts[posts['text'].str.len() > 0]
posts = posts[posts['text'] != "[deleted]"]

# Drop duplicate ids
posts = posts.drop_duplicates(subset=['source'])

## Answering Query with Langchain

In [266]:
posts = posts.to_dict('records')

In [267]:
# Convert posts["text"] to a list of strings
texts = [post["title"] + " " + post["text"] for post in posts]

In [268]:
db = Chroma.from_texts(texts, OpenAIEmbeddings(), metadatas=[{"source": post["source"], "post_id": post["post_id"], "author": post["author"].name} for post in posts])



In [269]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 10})

In [270]:
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), retriever, return_source_documents=True)

In [291]:
chat_history = []
result = qa({"question": query, "chat_history": chat_history})

In [292]:
result["answer"]

' Meno Male. Super nice Napoletana style Italian pizza, they have a few restaurants throughout Stockholm.'

In [293]:
result["source_documents"]

[Document(page_content='Gluten free pizza in Stockholm? Any pizzeria', metadata={'source': 'irirsw2', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Gluten free pizza in Stockholm? Crispy pizza', metadata={'source': 'iritand', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Gluten free pizza in Stockholm? Thanks!', metadata={'source': 'irimidh', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Var kan jag köpa glutenfri kebab pizza?? (Stockholm) Quick search:\n\n[https://www.findmeglutenfree.com/se/stockholm/pizza](https://www.findmeglutenfree.com/se/stockholm/pizza)', metadata={'source': 'jef789l', 'post_id': '127mh90', 'author': 'TheRoyalStork'}),
 Document(page_content='Var kan jag köpa glutenfri kebab pizza?? (Stockholm) Tack!', metadata={'source': 'jeg12d4', 'post_id': '127mh90', 'author': 'TheRoyalStork'}),
 Document(page_content='Gluten free pizza in Stockholm? Most places have glu

In [297]:
new_query = "Are most pizzas in Stockholm gluten-free?"

new_result = qa({"question": query + new_query, "chat_history": result["chat_history"]})

In [298]:
new_result["answer"]

' Most places have gluten free pizza. Only Giro and Meno Male have good gluten free pizza. Pretty much all pizzerias serve GF pizza, but most use store-bought pizza bases. Stockholm is like the capital of gluten intolerance so I think you will be fine in most (central) places. Meno Male is a great option for Napoletana style Italian pizza, they have a few restaurants throughout Stockholm.'

In [299]:
new_result["source_documents"]

[Document(page_content='Gluten free pizza in Stockholm? Any pizzeria', metadata={'source': 'irirsw2', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Gluten free pizza in Stockholm? Thanks!', metadata={'source': 'irimidh', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Gluten free pizza in Stockholm? Crispy pizza', metadata={'source': 'iritand', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Gluten free pizza in Stockholm? Most places have gluten free pizza. Only Giro and Meno Male have good gluten free pizza.', metadata={'source': 'irm0583', 'post_id': 'xyti59', 'author': 'Head-Commission-8222'}),
 Document(page_content='Var kan jag köpa glutenfri kebab pizza?? (Stockholm) Quick search:\n\n[https://www.findmeglutenfree.com/se/stockholm/pizza](https://www.findmeglutenfree.com/se/stockholm/pizza)', metadata={'source': 'jef789l', 'post_id': '127mh90', 'author': 'TheRoyalStork'}),
 Documen