In [1]:
import requests
import os
from importlib.machinery import SourceFileLoader
import pandas as pd
import logging
import torch


try:
    config = SourceFileLoader("config", "config.py").load_module()
    os.environ['OPENAI_API_KEY'] = config.OPENAI_API_KEY

    bungie_api_key = config.BUNGIE_API_KEY
    endpoint = "https://www.bungie.net/Platform/Content/Rss/NewsArticles/{pageToken}/"
    page_token = "0"
    include_body = True
    headers = {
        "X-API-Key": bungie_api_key
    }
    params = {
        "includebody": include_body
    }

    results = []

    while page_token is not None:
        response = requests.get(endpoint.format(pageToken=page_token), headers=headers, params=params)

        if response.status_code == 200:
            json_response = response.json()
            if 'NewsArticles' in json_response['Response']:
                results.extend(json_response['Response']['NewsArticles'])
            else:
                print("No NewsArticles found in the response.")
                break
            page_token = json_response['Response']['NextPaginationToken']
        else:
            print("Error:", response.status_code)
            print("Response:", response.text)
            break
except Exception as e:
    print("An error occurred:", str(e))


An error occurred: 'NextPaginationToken'


In [2]:
results[0]

{'Title': 'This Week In Destiny - 06/15/2023',
 'Link': '/7/en/News/Article/this-week-in-destiny-6-15-23',
 'PubDate': '2023-06-15T00:00:00Z',
 'UniqueIdentifier': 'blt85ee83215ddfaef7',
 'Description': 'You’ve got sandbox questions, we’ve got answers. And some solid drip, as a treat.  ',
 'HtmlContent': '<p>This week in Destiny, we’re TWIDdling our way into more epic fashion, answering some sandbox Qs, and offering up a few other reminders that some folks might be looking for. Before we get into this week’s shenanigans, however, let’s quickly look back at <a href="https://www.bungie.net/7/en/News/article/the_first_twid_06_08_2023" target="_blank">what went down last week</a>, in case you missed it:</p><ul><li>New blog name, who dis? Heralding in TWID.</li><li>Walk, walk, fashion baby with a brand-new Guardian fashion contest.</li><li>GM Nightfalls heads up, starting this week.</li><li>Supremacy returned to Crucible.</li><li>A very well-earned apology to Kalli, The Corrupted.</li><li>P

In [3]:
# Convert the results list into a DataFrame
df = pd.DataFrame(results)


In [6]:
import re
pattern = r'Update|Hotfix'
df = df[df['Title'].str.contains(pattern, regex=True, flags=re.IGNORECASE)]
df = df.iloc[0:25].reset_index()
# Print the filtered DataFrame
df['Title']

0                         Destiny 2 Update 7.1.0.2
1                         Destiny 2 Hotfix 6/02/23
2                         Destiny 2 Update 7.1.0.1
3                         Destiny 2 Hotfix 5/26/23
4      Destiny 2 Update 7.1.0 - Season of the Deep
5                         Destiny 2 Hotfix 7.0.5.3
6                         Destiny 2 Hotfix 7.0.5.2
7                         Destiny 2 Hotfix 7.0.5.1
8                         Destiny 2 Update 7.0.5.0
9                         Destiny 2 Hotfix 7.0.0.7
10                        Destiny 2 Hotfix 7.0.0.6
11                       Lightfall Crucible Update
12                        Destiny 2 Hotfix 7.0.0.5
13                        Destiny 2 Hotfix 7.0.0.3
14                        Destiny 2 Update 7.0.0.1
15                        Destiny 2 Hotfix 6.3.0.7
16    Economy Updates And More Coming In Lightfall
17                        Destiny 2 Hotfix 6.3.0.5
18                        Destiny 2 Hotfix 6.3.0.4
19                        Desti

In [7]:
df["HtmlContent"].iloc[0]

'<h2>Activities</h2><h3>Crucible</h3><ul><li>Fixed an issue where the Dazzling Iridescence Trials emblem could be awarded from the Flawless chest without completing the necessary requirements.<ul><li>Players who have already acquired the emblem will need to complete the requirements after this patch goes live in order to equip the emblem.</li></ul></li><li>Fixed an issue with spawn trapping that could occur on meltdown.</li></ul><h3>Raids and Dungeons</h3><ul><li>Fixed an issue where players can damage bosses in Ghosts of the Deep through immunity shields.</li></ul><h2>Gameplay and Investment</h2><h3>Armor</h3><ul><li>Gyrfalcon\'s Hauberk\'s reserve overshield now deploys when a player uses Ensnaring Slam.</li><li>Fixed an issue where Khepri\'s Horn could disable various non-enemy objects.</li><li>Fixed an issue where Vesper of Radius\'s effects were triggering on things other than casting a rift.</li><li>Fixed an issue where Point-Contact Cannon Brace would create lightning strikes on

In [8]:
from bs4 import BeautifulSoup


def extract_list_from_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    list_items = soup.find_all('li')
    result = [li.get_text(strip=True) for li in list_items]
    return result

def clean_html(text):
    cleantext = re.sub(r'([.,?!;:])((?!\s)|$)', r'\1 ', text)
    return cleantext

def parse_html_content(df):
    df['parsed_content'] = df['HtmlContent'].apply(extract_list_from_html)
    df['joined_content'] = df['parsed_content'].apply(lambda x: ' '.join(x))
    df['joined_content'] = df['joined_content'].apply(clean_html)
    return df

In [9]:
# Parse HTML content and create new column
df = parse_html_content(df)

In [10]:
df = df[['Title', 'Link', 'PubDate', 'UniqueIdentifier', 'Description',
       'joined_content']]

In [11]:
print(df.iloc[0]["joined_content"])

Fixed an issue where the Dazzling Iridescence Trials emblem could be awarded from the Flawless chest without completing the necessary requirements. Players who have already acquired the emblem will need to complete the requirements after this patch goes live in order to equip the emblem. Players who have already acquired the emblem will need to complete the requirements after this patch goes live in order to equip the emblem. Fixed an issue with spawn trapping that could occur on meltdown. Fixed an issue where players can damage bosses in Ghosts of the Deep through immunity shields. Gyrfalcon's Hauberk's reserve overshield now deploys when a player uses Ensnaring Slam. Fixed an issue where Khepri's Horn could disable various non-enemy objects. Fixed an issue where Vesper of Radius's effects were triggering on things other than casting a rift. Fixed an issue where Point-Contact Cannon Brace would create lightning strikes on melees other than Thunderclap. Fixed an issue where becoming fr

In [20]:
docs = []
for index, row in df.iterrows():
    doc = {
        'content': row['joined_content'],  
        'meta': {
            'name': row['Title'],
            'link': row['Link'],
            'pub_date': row['PubDate'],
            'unique_id': row['UniqueIdentifier'],
           'description': row['Description']
        }
    }
    docs.append(doc)
 

In [21]:
from haystack.document_stores import InMemoryDocumentStore

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

document_store = InMemoryDocumentStore(use_bm25=True)


INFO - haystack.modeling.utils -  Using devices: CUDA:0 - Number of GPUs: 1


In [22]:
from haystack.nodes import PreProcessor

processor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=True,
    split_by="word",
    split_length=100,
    split_respect_sentence_boundary=True,
    split_overlap=50
)

docs = processor.process(docs)

Preprocessing:   0%|          | 0/25 [00:00<?, ?docs/s]



In [23]:
document_store.write_documents(docs)


Updating BM25 representation...:   0%|          | 0/705 [00:00<?, ? docs/s]

In [24]:
from haystack.nodes import BM25Retriever

retriever = BM25Retriever(document_store=document_store)


In [25]:
from haystack.nodes import PromptNode, PromptTemplate

lfqa_prompt = PromptTemplate(
    name="lfqa",
    prompt_text="""Synthesize a comprehensive answer from the following text for the given question. 
                    Provide a clear and concise response that summarizes the key points and information presented in the text. 
                             Your answer should directly pull from the source but be organized. 
                             \n\n Related text: {join(documents)} \n\n Question: {query} \n\n Answer:""",
)

prompt_node = PromptNode(model_name_or_path="google/flan-t5-large", default_prompt_template=lfqa_prompt,  model_kwargs={"stream":True})


INFO - haystack.modeling.utils -  Using devices: CUDA:0 - Number of GPUs: 1
  return self.fget.__get__(instance, owner)()


In [26]:
from haystack.nodes import TransformersReader

reader = TransformersReader(model_name_or_path="ahotrod/albert_xxlargev1_squad2_512", use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0 - Number of GPUs: 1


In [37]:
from haystack.pipelines import Pipeline

pipe = Pipeline()
pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
pipe.add_node(component=reader, name="reader", inputs=["retriever"])
pipe.add_node(component=prompt_node, name="prompt_node", inputs=["reader"])


In [39]:
from haystack.utils import print_answers

results = pipe.run(
    query="what were the changes to immortal?",
    params={
        "retriever": {"top_k": 2}
        ,"reader": {"top_k": 10}
    }
)

print_answers(results, details="all", max_text_len=1000)


<pad> The Immortal Reduced base range value by 10.</s>
'Query: what were the changes to immortal?'
'Answers:'
[   <Answer {'answer': ' Reduced base range value by 10.', 'type': 'extractive', 'score': 0.10633327811956406, 'context': 'r from 1. 45 to 1. 5 (crit damage goes from 21. 8 to 21). The Immortal Reduced base range value by 10. ', 'offsets_in_document': [{'start': 484, 'end': 516}], 'offsets_in_context': [{'start': 70, 'end': 102}], 'document_ids': ['2a0e4c66543552bc658256794ea0daff'], 'meta': {'name': 'Destiny 2 Update 7.1.0 - Season of the Deep', 'link': '/7/en/News/Article/season-deep-update-7-1-0', 'pub_date': '2023-05-23T16:30:00Z', 'unique_id': 'blt56002d0e3c869908', 'description': 'The one about Season of the Deep.', '_split_id': 101, '_split_overlap': [{'doc_id': '25134dcea5689c07049c1964118b44db', 'range': (0, 423)}, {'doc_id': 'fd52d17de2198d99af68379b741fd6fd', 'range': (114, 516)}]}}>,
    <Answer {'answer': ' Reduced base range value by 10.', 'type': 'extractive', 's

In [29]:
from pprint import pprint

pprint(results)


{'answers': [<Answer {'answer': ' Best Dressed Commendation card to the Ironwood Tree', 'type': 'extractive', 'score': 2.004014959311462e-06, 'context': 'dian Rank objectives to make for a more intuitive experience Added the Best Dressed Commendation card to the Ironwood Tree in the Tower. ', 'offsets_in_document': [{'start': 455, 'end': 507}], 'offsets_in_context': [{'start': 70, 'end': 122}], 'document_ids': ['341b724048921ded57783de0e08d932b'], 'meta': {'name': 'Destiny 2 Update 7.1.0 - Season of the Deep', 'link': '/7/en/News/Article/season-deep-update-7-1-0', 'pub_date': '2023-05-23T16:30:00Z', 'unique_id': 'blt56002d0e3c869908', 'description': 'The one about Season of the Deep.', '_split_id': 217, '_split_overlap': [{'doc_id': 'e99e94e3cb804e0cb12529b23b206432', 'range': (0, 339)}, {'doc_id': 'e099f69b59b0df9ef02adffdd7646d8c', 'range': (154, 521)}]}}>,
             <Answer {'answer': ' Best Dressed Commendation card', 'type': 'extractive', 'score': 1.9467454421828734e-06, 'conte