In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import wordcloud as wcl
from datetime import timedelta
import snscrape.modules.twitter as sntwitter
import requests

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
### CONTROLLER - You can change that data

# Range of timeframe
start_date = '2025-03-17'
end_date = '2025-04-17'

# Stock to analyze
ticker = '^VIX'

# How many days with the biggest change do you want to analyze
num_of_biggest_changes = 3

# Keywords from tweets for sentiment analysis
keywords = ['stock', '$SPX', '$SP500', 'SPX', 'SP500', '#SPX', '#SP500', 'S&P']

# Maximum number of tweets to analyze
max_tweets = 1000

# Number of most popular tweets to analyze
top_n = 50

In [4]:
def load_timeframe(start_date, end_date):
    try:
        business_days = pd.date_range(start=start_date, end=end_date, freq='B')
        timeframe = pd.DataFrame({'Date': business_days, 'Open': 0.0, 'Close': 0.0, 'Daily change [%]': 0.0,
                                  'Total change [%]': 0.0})

        return timeframe

    except Exception as e:
        print(f'Error in load_timeframe: {e}')

In [5]:
def load_stock_timeframe(ticker, timeframe):
    for index, row in timeframe.iterrows():
        try:
            date = row['Date']
            data = yf.download(ticker, start=date.strftime('%Y-%m-%d'),
                                   end=(date + timedelta(days=1)).strftime('%Y-%m-%d'), auto_adjust=False)
            open_price = data['Open'].iloc[0] if isinstance(data['Open'].iloc[0], float) else data['Open'].iloc[
                0].item()
            close_price = data['Close'].iloc[0] if isinstance(data['Close'].iloc[0], float) else data['Close'].iloc[
                0].item()

            timeframe.loc[index, 'Open'] = open_price
            timeframe.loc[index, 'Close'] = close_price

        except Exception as e:
            print(f"Error in load_stock_timeframe while retrieving data for {ticker} on {row['Date']}: {e}")
            timeframe.drop(index, inplace=True)
            continue

    timeframe['Daily change [%]'] = (timeframe['Close'] / timeframe['Open'] - 1) * 100
    timeframe['Total change [%]'] = (timeframe['Close'] / timeframe['Close'].iloc[0] - 1) * 100

In [6]:
def biggest_changes(timeframe, num_of_biggest_changes):
    if num_of_biggest_changes <= len(timeframe):
        timeframe_copy = timeframe.copy()
        timeframe_sorted = timeframe_copy.reindex(
            timeframe_copy['Daily change [%]'].abs().sort_values(ascending=False).index
        )
        return timeframe_sorted.head(num_of_biggest_changes).reset_index(drop=True)
    else:
        print('num_of_biggest_changes should not be bigger than number of rows in timeframe \n'
              'Change num_of_biggest_changes in controller.py')

In [7]:
def stock_plot(timeframe, column, ticker):
    try:
        plt.plot(timeframe['Date'], timeframe[f'{column}'], label=f'{column} of {ticker}', linewidth=3, linestyle='--',
                 color='black')
        plt.title('Portfolio total profit over time')
        plt.xlabel('Date')
        plt.ylabel('Total profit')
        plt.legend()
        plt.grid()
        plt.show()

    except Exception as e:
        print(f"Error in stock_plot: {e}")

In [8]:
def get_popular_tweets(target_date, keywords, max_tweets, top_n):
    formatted_keywords = []
    for kw in keywords:
        kw = kw.strip()
        if ' ' in kw and not (kw.startswith('"') and kw.endswith('"')):
            formatted_keywords.append(f'"{kw}"')
        else:
            formatted_keywords.append(kw)

    query_keywords = ' OR '.join(formatted_keywords)
    query = f'({query_keywords}) since:{target_date} until:{target_date} lang:en -filter:retweets'

    tweets = []

    try:
        session = requests.Session()
        session.verify = False
        for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
            if i >= max_tweets:
                break
            tweets.append([
                tweet.date,
                tweet.content,
                tweet.user.username,
                tweet.likeCount,
                tweet.retweetCount,
                tweet.replyCount
            ])

    except Exception as e:
        print(f'Error in tweets_loader: {e}')
        return pd.DataFrame()

    df = pd.DataFrame(tweets, columns=['Date', 'Tweet', 'User', 'Likes', 'Retweets', 'Answers'])

    if not df.empty:
        df['Sum of interaction'] = df[['Likes', 'Retweets', 'Answers']].sum(axis=1)
        return df.sort_values('Sum of interaction', ascending=False).head(top_n)

    return df

In [9]:
if __name__ == "__main__":
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)

    timeframe = load_timeframe(start_date, end_date)
    load_stock_timeframe(ticker, timeframe)
    # print(timeframe)

    # stock_plot(timeframe, 'Daily change [%]', ticker)
    # stock_plot(timeframe, 'Total change [%]', ticker)

    biggest_changes = biggest_changes(timeframe, num_of_biggest_changes)
    # print(biggest_changes)

    date = '2025-04-04'
    popular_tweets = get_popular_tweets(date, keywords, max_tweets, top_n)
    print(popular_tweets)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Error in tweets_loader: 4 requests to https://twitter.com/search?f=live&lang=en&q=%28stock+OR+%24SPX+OR+%24SP500+OR+SPX+OR+SP500+OR+%23SPX+OR+%23SP500+OR+S%26P%29+since%3A2025-04-04+until%3A2025-04-04+lang%3Aen+-filter%3Aretweets&src=spelling_expansion_revert_click failed, giving up.
Empty DataFrame
Columns: []
Index: []
