In [1]:
import requests
import os
from importlib.machinery import SourceFileLoader
import pandas as pd
import logging
import torch


try:
    config = SourceFileLoader("config", "config.py").load_module()
    os.environ['OPENAI_API_KEY'] = config.OPENAI_API_KEY

    bungie_api_key = config.BUNGIE_API_KEY
    endpoint = "https://www.bungie.net/Platform/Content/Rss/NewsArticles/{pageToken}/"
    page_token = "0"
    include_body = True
    headers = {
        "X-API-Key": bungie_api_key
    }
    params = {
        "includebody": include_body
    }

    results = []

    while page_token is not None:
        response = requests.get(endpoint.format(pageToken=page_token), headers=headers, params=params)

        if response.status_code == 200:
            json_response = response.json()
            if 'NewsArticles' in json_response['Response']:
                results.extend(json_response['Response']['NewsArticles'])
            else:
                print("No NewsArticles found in the response.")
                break
            page_token = json_response['Response']['NextPaginationToken']
        else:
            print("Error:", response.status_code)
            print("Response:", response.text)
            break
except Exception as e:
    print("An error occurred:", str(e))


An error occurred: 'NextPaginationToken'


In [2]:
results[0]

{'Title': 'This Week in Destiny - 06/22/2023',
 'Link': '/7/en/News/Article/this-week-in-destiny-6-22-23',
 'PubDate': '2023-06-21T22:00:00Z',
 'UniqueIdentifier': 'blt5aec7ef1b64cae0b',
 'Description': '',
 'HtmlContent': '<p>Moin Moin (That’s “Hello” in German), Guardians. This week in Destiny, we are going fishing! All over the world, Guardians have fallen in love with those little fishies. So, this week we have something special for you! But first...</p><p>In case you missed something, last week we talked about:</p><ul><li>Pride at Bungie 2023 recap and a free emote.</li><li>A nice little sandbox Q&amp;A to dive a little bit deeper into some of the most frequent burning questions.</li><li>Iron Banner is back with Eruption.</li><li>Fashion contest picks for #DrownInTheDrip.</li><li>PlayStation x Destiny 2 crossover art roundup.</li></ul><p>So, this week we will dive in the following stuff:</p><ul><li>Throne World fishing rally.</li><li>Bungie Day Giving Festival announcement.</li><l

In [41]:
# Convert the results list into a DataFrame
df = pd.DataFrame(results)


In [42]:
import re
pattern = r'Update|Hotfix'
df = df[df['Title'].str.contains(pattern, regex=True, flags=re.IGNORECASE)]
df = df.iloc[0:25].reset_index()
# Print the filtered DataFrame
df['Title']

0                         Destiny 2 Update 7.1.0.2
1                         Destiny 2 Hotfix 6/02/23
2                         Destiny 2 Update 7.1.0.1
3                         Destiny 2 Hotfix 5/26/23
4      Destiny 2 Update 7.1.0 - Season of the Deep
5                         Destiny 2 Hotfix 7.0.5.3
6                         Destiny 2 Hotfix 7.0.5.2
7                         Destiny 2 Hotfix 7.0.5.1
8                         Destiny 2 Update 7.0.5.0
9                         Destiny 2 Hotfix 7.0.0.7
10                        Destiny 2 Hotfix 7.0.0.6
11                       Lightfall Crucible Update
12                        Destiny 2 Hotfix 7.0.0.5
13                        Destiny 2 Hotfix 7.0.0.3
14                        Destiny 2 Update 7.0.0.1
15                        Destiny 2 Hotfix 6.3.0.7
16    Economy Updates And More Coming In Lightfall
17                        Destiny 2 Hotfix 6.3.0.5
18                        Destiny 2 Hotfix 6.3.0.4
19                        Desti

In [43]:
df["HtmlContent"].iloc[0]

'<h2>Activities</h2><h3>Crucible</h3><ul><li>Fixed an issue where the Dazzling Iridescence Trials emblem could be awarded from the Flawless chest without completing the necessary requirements.<ul><li>Players who have already acquired the emblem will need to complete the requirements after this patch goes live in order to equip the emblem.</li></ul></li><li>Fixed an issue with spawn trapping that could occur on meltdown.</li></ul><h3>Raids and Dungeons</h3><ul><li>Fixed an issue where players can damage bosses in Ghosts of the Deep through immunity shields.</li></ul><h2>Gameplay and Investment</h2><h3>Armor</h3><ul><li>Gyrfalcon\'s Hauberk\'s reserve overshield now deploys when a player uses Ensnaring Slam.</li><li>Fixed an issue where Khepri\'s Horn could disable various non-enemy objects.</li><li>Fixed an issue where Vesper of Radius\'s effects were triggering on things other than casting a rift.</li><li>Fixed an issue where Point-Contact Cannon Brace would create lightning strikes on

In [45]:
from bs4 import BeautifulSoup


def extract_list_from_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    list_items = soup.find_all('li')
    result = [li.get_text(strip=True) for li in list_items]
    return result

def clean_html(text):
    cleantext = re.sub(r'([.,?!;:])((?!\s)|$)', r'\1 ', text)
    return cleantext

def parse_html_content(df):
    df['parsed_content'] = df['HtmlContent'].apply(extract_list_from_html)
    df['joined_content'] = df['parsed_content'].apply(lambda x: ' '.join(x))
    df['joined_content'] = df['joined_content'].apply(clean_html)
    return df

In [46]:
# Parse HTML content and create new column
df = parse_html_content(df)

In [47]:
df = df[['Title', 'Link', 'PubDate', 'UniqueIdentifier', 'Description',
       'joined_content']]

In [48]:
print(df.iloc[0]["joined_content"])

Fixed an issue where the Dazzling Iridescence Trials emblem could be awarded from the Flawless chest without completing the necessary requirements. Players who have already acquired the emblem will need to complete the requirements after this patch goes live in order to equip the emblem. Players who have already acquired the emblem will need to complete the requirements after this patch goes live in order to equip the emblem. Fixed an issue with spawn trapping that could occur on meltdown. Fixed an issue where players can damage bosses in Ghosts of the Deep through immunity shields. Gyrfalcon's Hauberk's reserve overshield now deploys when a player uses Ensnaring Slam. Fixed an issue where Khepri's Horn could disable various non-enemy objects. Fixed an issue where Vesper of Radius's effects were triggering on things other than casting a rift. Fixed an issue where Point-Contact Cannon Brace would create lightning strikes on melees other than Thunderclap. Fixed an issue where becoming fr

In [49]:

import tiktoken  # !pip install tiktoken

tokenizer = tiktoken.get_encoding('p50k_base')

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=50,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""]
)


In [12]:
import os
from importlib.machinery import SourceFileLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
config = SourceFileLoader("config", "config.py").load_module()
os.environ['OPENAI_API_KEY'] = config.OPENAI_API_KEY

In [15]:
pinecone_api_key = config.pinecone_api_key

In [17]:
import pinecone
# connect to pinecone environment
pinecone.init(
    api_key = pinecone_api_key,
    environment = "us-west1-gcp-free"
)


  from tqdm.autonotebook import tqdm


In [18]:
index_name = "extractive-question-answering"

In [67]:
pinecone.list_indexes()

['extractive-question-answering']

In [82]:
pinecone.delete_index("extractive-question-answering")

In [87]:
# check if the extractive-question-answering index exists
if index_name not in pinecone.list_indexes():
    # create the index if it does not exist
    pinecone.create_index(
        index_name,
        dimension=1536,
        metric="dotproduct"
    )

# connect to extractive-question-answering index we created
index = pinecone.Index(index_name)

In [84]:
from langchain.embeddings.openai import OpenAIEmbeddings

model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    openai_api_key=os.environ['OPENAI_API_KEY']
)


In [85]:
docs = []
for index, row in df.iterrows():
    doc = {
        'content': row['joined_content'],  
            'name': row['Title'],
            'link': row['Link'],
            'pub_date': row['PubDate'],
            'unique_id': row['UniqueIdentifier'],
           'description': row['Description']
        }
    docs.append(doc)
 

In [88]:
from tqdm.auto import tqdm
from uuid import uuid4

batch_limit = 100

texts = []
metadatas = []

for i, record in enumerate(tqdm(docs)):
    # first get metadata fields for this record
    metadata = {
            'name': record['name'],
            'source': record['link'],
            'pub_date': record['pub_date'],
            'unique_id': record['unique_id'],
           'description': record['description']
        }
    # now we create chunks from the record text
    record_texts = text_splitter.split_text(record['content'])
    # create individual metadata dicts for each chunk
    record_metadatas = [{
        "chunk": j, "text": text, **metadata
    } for j, text in enumerate(record_texts)]
    # append these to current batches
    texts.extend(record_texts)
    metadatas.extend(record_metadatas)
    # if we have reached the batch_limit we can add texts
    if len(texts) >= batch_limit:
        ids = [str(uuid4()) for _ in range(len(texts))]
        embeds = embed.embed_documents(texts)
        index.upsert(vectors=zip(ids, embeds, metadatas))
        texts = []
        metadatas = []


  0%|          | 0/25 [00:00<?, ?it/s]

In [89]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 683}},
 'total_vector_count': 683}

In [90]:
from langchain.vectorstores import Pinecone

text_field = "text"

# switch back to normal index for langchain
index = pinecone.Index(index_name)

vectorstore = Pinecone(
    index, embed.embed_query, text_field
)


In [91]:
query = "what were the changes to the immortal?"

vectorstore.similarity_search(
    query,  # our search query
    k=3  # return 3 most relevant docs
)

[Document(page_content="(crit damage goes from 21. 8 to 21). The Immortal Reduced base range value by 10. Bows Fixed an issue where the Arsenic Bite Bow would display an incorrect charge time on its tooltip. Updated the stats on the Tyranny of Heaven Bow to better compete with the current selection of Lightweight Bows. Miscellaneous and Visuals Fixed an issue where the glow when firing on the Nessa's Oblation Legendary Shotgun was not as bright as intended. Fixed an issue where the Nasreddin and", metadata={'chunk': 80.0, 'description': 'The one about Season of the Deep.', 'name': 'Destiny 2 Update 7.1.0 - Season of the Deep', 'pub_date': datetime.datetime(2023, 5, 23, 16, 30, tzinfo=tzutc()), 'source': '/7/en/News/Article/season-deep-update-7-1-0', 'unique_id': 'blt56002d0e3c869908'}),
 Document(page_content='benefit from the recent buff to their projectile collision radius. They are now consistent with other Heavy Grenade Launchers. Submachine Guns Aggressive Submachine Guns Reduced 

In [92]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# completion llm
llm = ChatOpenAI(
    openai_api_key=config.OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)


In [93]:
from langchain.chains import RetrievalQAWithSourcesChain

qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [94]:
query = "list all the changes to the immortal."
qa_with_sources(query)

{'question': 'list all the changes to the immortal.',
 'answer': "Changes to the Immortal: reduced base range value by 10. Other changes include: fixed an issue with the Arsenic Bite Bow, updated stats on the Tyranny of Heaven Bow, fixed an issue with the glow on Nessa's Oblation Legendary Shotgun, consistent projectile collision radius for Heavy Grenade Launchers, reduced base damage for Aggressive Submachine Guns, added Swashbuckler and Voltshot to the right trait column, moved Impulse Amplifier from the right trait column to the left, fixed various issues with spawn trapping, damage to bosses in Ghosts of the Deep, and progress in the Into the Depths pursuit. \n",
 'sources': '/7/en/News/Article/season-deep-update-7-1-0, /7/en/News/Article/update_7_1_0_2, /7/en/News/Article/update_7_1_0_1'}

In [95]:
query = "list all the changes to the immortal."
qa.run(query)

'The Immortal had its base range value reduced by 10.'

In [None]:
from langchain.llms import OpenAI
from langchain import LLMChain

davinci = OpenAI(model_name='text-davinci-003')

from langchain import PromptTemplate
from langchain import FewShotPromptTemplate

# create our examples
examples = [
    {
        "query": "How are you?",
        "answer": "I can't complain but sometimes I still do."
    }, {
        "query": "What time is it?",
        "answer": "It's time to get a watch."
    }, {
        "query": "What is the meaning of life?",
        "answer": "42"
    }, {
        "query": "What is the weather like today?",
        "answer": "Cloudy with a chance of memes."
    }, {
        "query": "What is your favorite movie?",
        "answer": "Terminator"
    }, {
        "query": "Who is your best friend?",
        "answer": "Siri. We have spirited debates about the meaning of life."
    }, {
        "query": "What should I do today?",
        "answer": "Stop talking to chatbots on the internet and go outside."
    }
]


# create a example template
example_template = """
User: {query}
AI: {answer}
"""

# create a prompt example from above template
example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

# now break our previous prompt into a prefix and suffix
# the prefix is our instructions
prefix = """The following are exerpts from conversations with an AI
assistant. The assistant is typically sarcastic and witty, producing
creative  and funny responses to the users questions. Here are some
examples: 
"""
# and the suffix our user input and output indicator
suffix = """
User: {query}
AI: """


query = "What is the meaning of life?"

print(few_shot_prompt_template.format(query=query))

from langchain.prompts.example_selector import LengthBasedExampleSelector

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=50  # this sets the max length that examples should be
)

# now create the few shot prompt template
dynamic_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,  # use example_selector instead of examples
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n"
)

The following are exerpts from conversations with an AI
assistant. The assistant is typically sarcastic and witty, producing
creative  and funny responses to the users questions. Here are some
examples: 



User: How are you?
AI: I can't complain but sometimes I still do.



User: What time is it?
AI: It's time to get a watch.



User: What is the meaning of life?
AI: 42



User: What is the weather like today?
AI: Cloudy with a chance of memes.



User: What is your favorite movie?
AI: Terminator



User: Who is your best friend?
AI: Siri. We have spirited debates about the meaning of life.



User: What should I do today?
AI: Stop talking to chatbots on the internet and go outside.



User: What is the meaning of life?
AI: 
