In [96]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import yfinance as yf
import json

from sklearn.feature_extraction.text import TfidfVectorizer

In [93]:
def get_news_title(soup):
    return soup.find('h1').text

link = "https://www.cnbc.com/2023/12/12/us-crude-oil-falls-4percent-as-traders-worry-about-inflation-impacting-demand.html"

request = requests.get(link)

print(request.status_code)

soup = BeautifulSoup(request.content, 'html.parser')


result_dict = {
    "title": get_news_title(soup),
    "link": link,
}


with open(f"./news_library_gnews/{result_dict['title']}.json", "w") as outfile: 
    json.dump(result_dict, outfile, indent = 4)


200


In [111]:
corpus = [
    'The OPEC Monthly Oil Market Report (MOMR) covers major issues affecting the world oil market and provides an outlook for crude oil market developments for the coming year. The report provides a detailed analysis of key developments impacting oil market trends in world oil demand, supply as well as the oil market balance. In an effort to continuously enhance the service provided to our readers, we are pleased to announce that the analysis provided in the OPEC Monthly Oil Market Report is now more accessible than ever. As of March 2017, the electronic version of the report includes a bookmark feature, allowing readers to access individual sections directly from the table of contents, to make navigating the report as easy as one click. Additionally, as of February 2019, key tables providing the latest information on the world oil demand and supply balance, including monthly revisions; OECD oil stocks and oil on water; non-OPEC supply and OPEC natural gas liquids; and world rig count as contained in the MOMR appendix tables will also be made available on the OPEC Website and can be downloaded in Excel, for ease of use.',
    'VIENNA, Dec. 13 (Xinhua) -- The Organization of the Petroleum Exporting Countries (OPEC) on Wednesday maintained its previous forecast for a healthy global oil demand growth in 2024, blaming speculators for the recent drop in crude prices. In its monthly oil market report for December, OPEC sees a global demand growth of 2.25 million barrels per day (bpd) for next year, following a demand growth of 2.46 million bpd this year, both unchanged from last months predictions. Noting that it remains cautiously optimistic about oil market fundamentals in 2024, OPEC said in the report that next years oil demand is expected to be supported by "resilient global GDP growth, amid continued improvements in economic activity in China." Oil prices have slid since October, with international benchmark Brent Crude dropping to the low 70 U.S. dollars a barrel from this years peak of over 90 dollars a barrel in September. The drop has continued since OPEC and its allies, a group known as OPEC+, on Nov. 30 announced a new round of production cuts totaling 2.2 million bpd for the first quarter of 2024. In Wednesdays report, OPEC blamed speculators and their heavy selloffs for pushing down oil prices, saying the speculators cut "their bullish positions sharply while increasing short positions." "The market dynamic was fueled by exaggerated concerns about oil demand growth, which negatively impacted market sentiment," the organization added.',
]

# use tfidf to vectorize the title
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names_out())

# get the vector of the title
title_vector = vectorizer.transform([result_dict['title']])
print(title_vector)


['13' '2017' '2019' '2024' '25' '30' '46' '70' '90' 'about' 'access'
 'accessible' 'activity' 'added' 'additionally' 'affecting' 'allies'
 'allowing' 'also' 'amid' 'an' 'analysis' 'and' 'announce' 'announced'
 'appendix' 'are' 'as' 'available' 'balance' 'barrel' 'barrels' 'be'
 'benchmark' 'blamed' 'blaming' 'bookmark' 'both' 'bpd' 'brent' 'bullish'
 'by' 'can' 'cautiously' 'china' 'click' 'coming' 'concerns' 'contained'
 'contents' 'continued' 'continuously' 'count' 'countries' 'covers'
 'crude' 'cut' 'cuts' 'day' 'dec' 'december' 'demand' 'detailed'
 'developments' 'directly' 'dollars' 'down' 'downloaded' 'drop' 'dropping'
 'dynamic' 'ease' 'easy' 'economic' 'effort' 'electronic' 'enhance' 'ever'
 'exaggerated' 'excel' 'expected' 'exporting' 'feature' 'february' 'first'
 'following' 'for' 'forecast' 'from' 'fueled' 'fundamentals' 'gas' 'gdp'
 'global' 'group' 'growth' 'has' 'have' 'healthy' 'heavy' 'impacted'
 'impacting' 'improvements' 'in' 'includes' 'including' 'increasing'
 'indi