# Task 3: Correlation between News and Stock Movement


## 1. Import Necessary Libraries


In [1]:
import pandas as pd
import numpy as np
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

## 2. Load Data


# Load the news data


In [2]:
news_df = pd.read_csv('../data/raw_analyst_ratings.csv')


# Load stock data


In [3]:
stock_dfs = {
    'NVDA': pd.read_csv('../data/NVDA_historical_data.csv'),
    'AAPL': pd.read_csv('../data/AAPL_historical_data.csv'),
    'AMZN': pd.read_csv('../data/AMZN_historical_data.csv'),
    'GOOG': pd.read_csv('../data/GOOG_historical_data.csv'),
    'META': pd.read_csv('../data/META_historical_data.csv'),
    'MSFT': pd.read_csv('../data/MSFT_historical_data.csv'),
    'TSLA': pd.read_csv('../data/TSLA_historical_data.csv')
}

## 3. Data Preparation


# Convert date columns to datetime


In [9]:
news_df['date'] = pd.to_datetime(news_df['date'])
for stock in stock_dfs:
    stock_dfs[stock]['Date'] = pd.to_datetime(stock_dfs[stock]['Date'])

# Normalize Dates: Align dates in both news and stock datasets


In [10]:
news_df['date'] = news_df['date'].dt.normalize()

## 4. Sentiment Analysis on News Headlines


In [11]:
def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

news_df['sentiment'] = news_df['headline'].apply(get_sentiment)

## 5. Calculate Stock Movements


In [12]:
for stock, df in stock_dfs.items():
    df['Daily_Return'] = df['Close'].pct_change()

## 6. Aggregate Daily Sentiments


In [13]:
daily_sentiment = news_df.groupby('date')['sentiment'].mean()