In [1]:
import pandas as pd
import numpy as np

In [3]:
import yfinance as yf

def download_stock_data(symbol, start_date, end_date):
    """
    Download stock data based on stock symbol, start date, and end date.
    
    Parameters:
    symbol (str): Stock symbol (e.g., AAPL for Apple Inc.).
    start_date (str): Start date in YYYY-MM-DD format.
    end_date (str): End date in YYYY-MM-DD format.
    
    Returns:
    pandas.DataFrame: Stock data for the specified symbol and date range.
    """
    # Download stock data
    stock_data = yf.download(symbol, start=start_date, end=end_date)
    
    return stock_data




In [6]:
# Example usage
# if __name__ == "__main__":
# Specify the stock symbol, start date, and end date
symbol = "AAPL"  # Example: Apple Inc.
start_date = "2020-01-01"
end_date = "2024-04-20"

# Download stock data
stock_data = download_stock_data(symbol, start_date, end_date)

# Print the downloaded data
stock_data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.059425,135480400
2020-01-03,74.287498,75.144997,74.125000,74.357498,72.349136,146322800
2020-01-06,73.447502,74.989998,73.187500,74.949997,72.925644,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.582664,108872000
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.750237,132079200
...,...,...,...,...,...,...
2024-04-15,175.360001,176.630005,172.500000,172.690002,172.690002,73531800
2024-04-16,171.750000,173.759995,168.270004,169.380005,169.380005,73711200
2024-04-17,169.610001,170.649994,168.000000,168.000000,168.000000,50901200
2024-04-18,168.029999,168.639999,166.550003,167.039993,167.039993,43122900


In [51]:
import datetime
import requests
import newspaper

def get_indepth_news(url):
    article = newspaper.Article(url)
    article.download()
    article.parse()
    content = article.text
    return content

def extract_news(stock_symbol,days_ago=5):

    api_key='126502c867164eea8fb5918b3793869d'
    # Calculate the date 5 days ago from today
    date_5_days_ago = (pd.to_datetime("now") - datetime.timedelta(days=days_ago)).strftime("%Y-%m-%d")

    url = f"https://newsapi.org/v2/everything?q={stock_symbol}&from={date_5_days_ago}&apiKey={api_key}"

    data = requests.get(url)
    json_newsdata = data.json()
    title = []
    description = []
    url = []
    ld_content = []
    long_description = []

    for items in json_newsdata["articles"]:
        title.append(items["title"])
        description.append(items["description"])
        url.append(items["url"])
        ld_content.append(items["content"])
        try:
            long_description.append(get_indepth_news(url=items["url"]))
        except newspaper.ArticleException as e:
            print(f"Error fetching article: {e}")
            long_description.append(items["title"])

    dc = {
    "title":title,
    "description":description,
    "url":url,
    "long_description":long_description,
    "content":ld_content
    }
    news_data = pd.DataFrame(dc)
    return news_data
    

In [52]:
cc = extract_news(stock_symbol="AAPL")

Error fetching article: Article `download()` failed with 401 Client Error: Unauthorized for url: https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_2fadeb34-0498-47d0-a011-f0329f97b050 on URL https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_2fadeb34-0498-47d0-a011-f0329f97b050
Error fetching article: Article `download()` failed with 403 Client Error: Max restarts limit reached for url: https://www.forbes.com/sites/greatspeculations/2024/04/17/down-73-in-a-day-where-is-salesforce-stock-headed/ on URL https://www.forbes.com/sites/greatspeculations/2024/04/17/down-73-in-a-day-where-is-salesforce-stock-headed/
Error fetching article: Article `download()` failed with 403 Client Error: Max restarts limit reached for url: https://www.forbes.com/sites/greatspeculations/2024/04/16/how-will-teslas-earnings-trend-after-a-tough-q1-delivery-report/ on URL https://www.forbes.com/sites/greatspeculations/2024/04/16/how-will-teslas-earnings-trend-after-a-tough-q1-deliv

In [73]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download VADER lexicon if not already downloaded
nltk.download('vader_lexicon')

def analyze_sentiment(text):
    """
    Analyzes the sentiment of the given text using VADER sentiment analysis.
    
    Parameters:
        text (str): The text to analyze.
        
    Returns:
        str: The sentiment label ('positive', 'negative', 'neutral').
    """
    # Initialize the VADER sentiment analyzer
    sid = SentimentIntensityAnalyzer()
    # Analyze sentiment
    scores = sid.polarity_scores(text)
    
    # Classify sentiment
    if scores['compound'] >= 0.05:
        sents =  'positive'
    elif scores['compound'] <= -0.05:
        sents =  'negative'
    else:
        sents =  'neutral'
    return sents


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\nayan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [83]:
def sentiment_append(dataframe):
    sentiment = []
    for _,rows in dataframe.iterrows():
        sentiment.append(analyze_sentiment(rows["long_description"]))

    dataframe["sentiment"] = sentiment
    return dataframe

In [84]:
def main_function(company_symbol,start_date,end_date,days_buffer=5):
    stock_data = download_stock_data(symbol=company_symbol,start_date=start_date,
                                     end_date=end_date)
    news_data = extract_news(stock_symbol=company_symbol,days_ago=days_buffer)
    sentiment_data = sentiment_append(dataframe=news_data)
    return stock_data, sentiment_data

In [85]:
start_date = "2000-01-01"
end_date = "2023-12-31"
sd, nd = main_function(company_symbol="AAPL",start_date=start_date,end_date=end_date)


[*********************100%%**********************]  1 of 1 completed




Error fetching article: Article `download()` failed with 401 Client Error: Unauthorized for url: https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_2fadeb34-0498-47d0-a011-f0329f97b050 on URL https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_2fadeb34-0498-47d0-a011-f0329f97b050
Error fetching article: Article `download()` failed with 403 Client Error: Max restarts limit reached for url: https://www.forbes.com/sites/greatspeculations/2024/04/17/down-73-in-a-day-where-is-salesforce-stock-headed/ on URL https://www.forbes.com/sites/greatspeculations/2024/04/17/down-73-in-a-day-where-is-salesforce-stock-headed/
Error fetching article: Article `download()` failed with 403 Client Error: Max restarts limit reached for url: https://www.forbes.com/sites/greatspeculations/2024/04/16/how-will-teslas-earnings-trend-after-a-tough-q1-delivery-report/ on URL https://www.forbes.com/sites/greatspeculations/2024/04/16/how-will-teslas-earnings-trend-after-a-tough-q1-deliv

In [86]:
sd

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.059425,135480400
2020-01-03,74.287498,75.144997,74.125000,74.357498,72.349136,146322800
2020-01-06,73.447502,74.989998,73.187500,74.949997,72.925644,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.582664,108872000
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.750237,132079200
...,...,...,...,...,...,...
2024-04-15,175.360001,176.630005,172.500000,172.690002,172.690002,73531800
2024-04-16,171.750000,173.759995,168.270004,169.380005,169.380005,73711200
2024-04-17,169.610001,170.649994,168.000000,168.000000,168.000000,50901200
2024-04-18,168.029999,168.639999,166.550003,167.039993,167.039993,43122900
