In [40]:
import pandas as pd
import os
news_df = pd.read_csv('Financial_news.csv')
news_df.dropna(subset=['content'], inplace=True)
news_df.content.isna().sum()
news_df['published_at'] = pd.to_datetime(news_df['published_at']).dt.date
news_df.rename(columns={'published_at': 'date'}, inplace=True)
# Load the stock price data
print("Loading stock price data...")
stock_df = pd.read_csv('historical_stock_prices.csv')

# Parse 'date' to datetime format in the stock data
news_df['date'] = pd.to_datetime(news_df['date'])
stock_df['date'] = pd.to_datetime(stock_df['date'])

# Handling missing dates in stock data for each company
print("Filling missing dates in stock price data...")
all_companies = stock_df['company'].unique()
complete_stock_data = []

for company in all_companies:
    company_data = stock_df[stock_df['company'] == company].copy()

    # Create a complete date range from the min to max date in the company's data
    full_date_range = pd.date_range(start=company_data['date'].min(), end=company_data['date'].max())

    # Set the date as the index to align it with the full date range
    company_data.set_index('date', inplace=True)

    # Reindex the DataFrame to the full date range, keeping the existing data and adding missing rows
    company_data = company_data.reindex(full_date_range, method='ffill')

    # Reset the index and rename the new date index column
    company_data.reset_index(inplace=True)
    company_data.rename(columns={'index': 'date'}, inplace=True)

    # Add back the company name in the newly added rows
    company_data['company'] = company

    complete_stock_data.append(company_data)

# Concatenate the data for all companies
stock_df = pd.concat(complete_stock_data, ignore_index=True)

print(f"Filled stock price data has {len(stock_df)} entries.")




# Merge news and stock price data on 'company' and 'date'
print("Merging news and stock data...")
merged_df = pd.merge(news_df, stock_df, on=['company', 'date'], how='inner')

# Drop rows with missing values, if any, after merging
# merged_df.dropna(inplace=True)
# print(f"Merged dataset has {len(merged_df)} records.")
merged_df

Loading stock price data...
Filling missing dates in stock price data...
Filled stock price data has 780 entries.
Merging news and stock data...


Unnamed: 0,company,source,author,title,description,url,date,content,close_price
0,Pfizer,VentureBeat,Michael Nuñez,"Amazon doubles down on Anthropic, positioning ...",Amazon strengthens its position in the AI race...,https://venturebeat.com/ai/amazon-doubles-down...,2024-11-22,Join our daily and weekly newsletters for the ...,25.650000
1,Pfizer,Observer,"Alexandra Tremayne-Pengelly, Alexandra Tremayn...","Amazon Doubles Down On Anthropic Investment, M...",Amazon has invested a total of $8 billion in A...,https://observer.com/2024/11/amazon-invest-bil...,2024-11-22,Amazon (AMZN) is doubling down on its partners...,25.650000
2,Pfizer,Theregister.com,Thomas Claburn,Amazon bets another $4B on Anthropic,"You just gonna stand there, Google, let AWS ta...",https://www.theregister.com/2024/11/22/anthrop...,2024-11-22,"Amid concerns about the return of AI winter, w...",25.650000
3,Pfizer,Freerepublic.com,Frontpagemagazine,Crime of the Century?-Naomi Wolf delivers the ...,"If my father had been alive, I wouldn’t have d...",https://freerepublic.com/focus/f-news/4280185/...,2024-11-22,Skip to comments. Posted on 11/22/2024 7:57:56...,25.650000
4,Pfizer,Techtarget.com,Reda Chouffani,The pros and cons of Drupal,Drupal is a highly customizable and scalable C...,https://www.techtarget.com/searchcontentmanage...,2024-11-22,Drupal's flexibility makes it a viable option ...,25.650000
...,...,...,...,...,...,...,...,...,...
163,Moderna,Biztoc.com,wsj.com,"The Score: Spirit, Tesla, Moderna and More Sto...",Here are some of the major companies whose sto...,https://biztoc.com/x/5a80702f23f764e5,2024-11-15,Vidal is inactive for Monday night's game agai...,36.849998
164,Moderna,ABC News,STAN CHOE AP business writer,Stock market today: Wall Street tumbles as the...,U.S. stocks fell to their worst loss since Ele...,https://abcnews.go.com/Business/wireStory/stoc...,2024-11-15,U.S. stocks fell to their worst loss since Ele...,36.849998
165,Moderna,Wnd.com,"Luis Cornelio, Headlines USA",Big Pharma stocks crumble after Trump taps RFK...,'Together we will clean up corruption...',https://www.wnd.com/2024/11/big-pharma-stocks-...,2024-11-15,"By Ireland Owens, Daily Caller News Foundation...",36.849998
166,Moderna,Newsbreak.com,Ellie Quinlan Houghtaling,Stock Market Tanks as Trump Unveils Nightmare ...,Donald Trump’s controversial picks for his upc...,https://www.newsbreak.com/share/3676224985864-...,2024-11-15,"Mountain View By Ellie Quinlan Houghtaling, D...",36.849998


In [39]:
# "JPMorgan Chase", "Goldman Sachs", "Pfizer", "Moderna",
#         "Apple", "Microsoft", "Tesla", "Nvidia"
import pandas as pd
import os
news_df = pd.read_csv('Financial_news.csv')
news_df[news_df['company']=='Microsoft']

Unnamed: 0,company,source,author,title,description,url,published_at,content
500,Microsoft,PC Gamer,ted.litchfield@futurenet.com (Ted Litchfield),Valve first came up with the Steam Hardware Su...,The first public Steam Hardware Survey came in...,https://www.pcgamer.com/games/fps/valve-first-...,2024-11-24T23:46:11Z,The first public Steam Hardware Survey came in...
501,Microsoft,Slashdot.org,EditorDavid,Microsoft's Controversial 'Recall' Feature is ...,"Microsoft's controversial ""Recall"" feature (in...",https://tech.slashdot.org/story/24/11/24/23252...,2024-11-24T23:28:00Z,
502,Microsoft,Motley Fool Australia,Zach Bristow,BHP shares have fallen out of the global top 2...,Global dividends continue to climb.\nThe post ...,https://www.fool.com.au/2024/11/25/bhp-shares-...,2024-11-24T23:25:52Z,Global dividends continue to climb. Image sour...
503,Microsoft,Herecomesthemoon.net,Mond,The Two Factions of C++,The dream of a single dialect-free C++ has pro...,https://herecomesthemoon.net/2024/11/two-facti...,2024-11-24T23:21:36Z,\n The dream of a single dialect-free C+...
504,Microsoft,Motley Fool Australia,Tristan Harrison,"Up 40% in 2024, why I'd still buy the Global X...",This fund has already delivered great returns....,https://www.fool.com.au/2024/11/25/up-40-in-20...,2024-11-24T23:06:09Z,This fund has already delivered great returns....
...,...,...,...,...,...,...,...,...
595,Microsoft,The Times of India,ET Online,IPL 2025 Auction: Who will join the legendary ...,Chennai Super Kings are strategizing for the u...,https://economictimes.indiatimes.com/news/spor...,2024-11-24T11:51:42Z,Artificial Intelligence(AI) Basics of Generati...
596,Microsoft,Eurogamer.net,Vikki Blake,Microsoft Edge Game Assist lets you check out ...,"Microsoft is introducing a ""seamless, full-fea...",https://www.eurogamer.net/microsoft-edge-game-...,2024-11-24T11:48:58Z,"""It's game-aware and will suggest tips and gui..."
597,Microsoft,SiliconANGLE News,Dave Vellante,Dave Vellante’s Breaking Analysis: The complet...,Breaking Analysis is a weekly editorial progra...,https://siliconangle.com/2024/11/24/dave-vella...,2024-11-24T11:33:12Z,\nUPDATED 06:33 EST / NOVEMBER 24 2024\n\n \nB...
598,Microsoft,Daily Sabah,Amina Ali,"Microsoft sees Türkiye as 'key' market, commit...","Tech giant Microsoft sees Türkiye as a ""key ma...",https://www.dailysabah.com/business/tech/micro...,2024-11-24T11:30:43Z,"Tech giant Microsoft sees Türkiye as a ""key ma..."
