In [None]:
import re
import nltk
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
import pytz
import datetime
import seaborn as sns
from collections import Counter
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)

# 1. Define functions

### db call

In [2]:
def get_db_reddit(table, utc_begin, utc_end):
    """
    Retrieves posts from our AWS PostgreSQL database within a specified UTC time range.

    Parameters:
    table (str): Name of the database table to query.
    utc_begin (int): The beginning of the UTC time range.
    utc_end (int): The end of the UTC time range.

    Returns:
    DataFrame: A pandas DataFrame containing the retrieved posts.

    Raises:
    psycopg2.DatabaseError: If an error occurs in database connection or execution.
    """
    connection_params = {
        "user": 'postgres',
        "password": 'Data2023',
        "host": 'mads-capstone.cmohac77hep9.eu-north-1.rds.amazonaws.com',
        "port": 5432,
        "database": "mads"
    }


    try:
        with psycopg2.connect(**connection_params) as db_connection:
            select_query = f"SELECT * FROM {table} WHERE created_utc >= %s AND created_utc <= %s;"

            cursor = db_connection.cursor()
            cursor.execute(select_query, (utc_begin, utc_end))
            rows = cursor.fetchall()
            
            df = pd.DataFrame(rows,)
            if table == 'submissions':
                df.columns= ["id", "subreddit_id", "subreddit", "author", "created_utc", "permalink", "title", "selftext", "num_comments", "score"]
            elif table == 'comments':
                df.columns= ["id", "subreddit_id","submission_id", "subreddit", "author", "created_utc","selftext", "score"]
            else:
                print('Table not found!')
                raise
                                  
            return df
        
    except psycopg2.DatabaseError as e:
        print(f"Database error: {e}")
        raise

### Text Preprocessing

In [3]:
def preprocess_df(df):
    def preprocess_text(text):
        
        # Remove specific unwanted characters
        text = re.sub(r'[^A-Za-z0-9\s,.!?;:()\'\"-]', '', text)
        
        # Strip whitespace
        text = text.strip()
        
        return text
    
    try:
        df['title'] = df['title'].fillna('')
        df['title'] = df['title'].apply(preprocess_text)

        # Create a new column with shifted values
        df['title_prior'] = df['title'].shift(1)
        # Drop rows where the value in 'ColumnToCheck' is the same as in 'ShiftedColumn'
        df = df[df['title'] != df['title_prior']]
        # drop the 'ShiftedColumn'
        df = df.drop('title_prior', axis=1)
        
    except:
        print(f'No title found, skipping')
    
    
    # handle blank 
    df['selftext'] = df['selftext'].fillna('') 
    
    # preprocess selftext
    df['selftext'] = df['selftext'].apply(preprocess_text)
    
    # localize the UTC time stamp
    df['created_EST_date'] = pd.to_datetime(df['created_utc'], unit='s').dt.tz_localize('UTC').dt.tz_convert('America/New_York').dt.date
    
    
    return df

### Ticker Extraction

In [4]:
def find_tickers(df):
 
    # Load tickers from a CSV file
    stocks = pd.read_csv('nasdaq_screener_1700463382148.csv')

    # Directly convert the 'Symbol' column to a set
    tickers_set = set(stocks['Symbol'])

    # Use set union to add additional tickers
    additional_tickers = {'BBBY'}
    tickers_set = tickers_set.union(additional_tickers)
    
    # Adjust the pattern to optionally include a leading '$'
    # pattern = r'\b\$?(?:' + '|'.join(tickers_as_strings) + r')\b'
    # pattern = r'\b\$?(?:\(?)(?:' + '|'.join(tickers_as_strings) + r')(?:\)?)\b'
    pattern = r'\b[A-Z]{2,5}\b'
    compiled_pattern = re.compile(pattern)
    
    
    
    blacklist = {
#                 {'I', 'ELON', 'WSB', 'THE', 'A', 'ROPE', 'YOLO', 'TOS', 'CEO', 'DD', 'IT', 'OPEN', 'ATH', 'PM', 'IRS', 'FOR',
#              'DEC', 'BE',
                'IMO',# 'ALL', 'RH', 'EV', 'TOS', 'CFO', 'CTO',
                'DD',
                #'BTFD', 'WSB', 'OK', 'PDT', 'RH', 'KYS', 'FD',
#              'TYS', 
                'US',
                'USA',
                # 'IT', 'ATH', 'RIP', 'BMW', 'GDP', 'OTM', 'ATM', 'ITM', 
                'IMO', 'LOL', 'AM', 'BE', 'PR', 'PRAY',
#              'PT', 'FBI', 'SEC', 'GOD', 'NOT', 'POS', 'FOMO', 'TL;DR',
                'EDIT', 'STILL', 'WTF', 'RAW', 'PM', 'LMAO', 'LMFAO',
#              'ROFL', 'EZ', 'RED', 'BEZOS', 'TICK', 'IS', 'PM', 'LPT', 'GOAT', 'FL', 'CA', 'IL', 'MACD', 'HQ', 'OP', 'PS', 'AH',
#              'TL', 'JAN', 'FEB', 'JUL', 'AUG', 'SEP', 'SEPT', 'OCT', 'NOV', 'FDA', 'IV', 'ER', 'IPO', 'MILF', 'BUT', 'SSN', 'FIFA',
#              'USD', 'CPU', 'AT', 'GG', 'Mar', 
            
#                # Jake added
                'RUN', # common
                'SAY', # common
                'EOD', # end of day
                'BIG', # common
                'LOW', # low / high
                'RSI', #relative strenght
                'DT', #double top
                'HUGE',
                'U', # you
                'AI', # Artificial Intelligence
                'DC', # washington DC
                'J', # as in J Powell
                'ES', # E-mini SP future
                'F', # f*ck
                'GO',
                'UK', # United Kingdom
                'EU', # european union
                'RH', # Robinhood, not Restoration Hardware
                'E', # E*trade brokerage
                'L', # L for loss, P&L etc
                'R', # common 
                'K', # OK
                'B', # common in BBBY odd spacing (spam?)
                'TD', # TD Ameritrade brokerage
                'RYAN', # Ryan Cohen, CEO of GME
                'NYC', # New York City
                'REG', # reg SHO 
                'SHO', # reg SHO 
                'NEXT', # common
                'FREE', # spam
                'DM', # direct message
                'TV', # television
                'ENS', # ethereum name service, spam
                'IRS', # internal revenue service
                'PR', # public relations
                'IQ', # intelligence quotient
                'VS', # versus
                'PT', # price target
                'IBKR', # interactive brokers
                'GOOD', # common
                'OPEN', # market open
                'FCF', # free cash flow
        
                 
                }
    
    combined_blacklist = set(blacklist) | set(word.upper() for word in stopwords.words('english'))
       
    
    def find_tickers(text, compiled_pattern, tickers_set, blacklist_set):
        # Find all matches
        potential_tickers = compiled_pattern.findall(text)
        # Filter matches against the tickers list and ensure they are not in the blacklist
        return list(set([ticker for ticker in potential_tickers if ticker in tickers_set and ticker not in combined_blacklist]))

    try:
        df['title_tickers'] = df['title'].apply(lambda x: find_tickers(x, compiled_pattern, tickers_set, combined_blacklist))
    except KeyError:
        print('title not found, working with comments?')
        
    
    df['selftext_tickers'] = df['selftext'].apply(lambda x: find_tickers(x, compiled_pattern, tickers_set, combined_blacklist))
    
    df['tickers'] = [list(set(x + y)) for x, y in zip(df['title_tickers'], df['selftext_tickers'])]
    
    return df

### Add vader sentiment

In [5]:
def add_vader_sentiment(df):
    
    vader = SentimentIntensityAnalyzer()

    added_words = {
            'citron': -4.0,  
            'hidenburg': -4.0,        
            'moon': 4.0,
            'highs': 2.0,
            'mooning': 4.0,
            'long': 2.0,
            'short': -2.0,
            'call': 4.0,
            'calls': 4.0,    
            'put': -4.0,
            'puts': -4.0,    
            'break': 2.0,
            'tendie': 2.0,
            'tendies': 2.0,
            'town': 2.0,     
            'overvalued': -3.0,
            'undervalued': 3.0,
            'buy': 4.0,
            'sell': -4.0,
            'gone': -1.0,
            'gtfo': -1.7,
            'paper': -1.7,
            'bullish': 3.7,
            'bearish': -3.7,
            'bagholder': -1.7,
            'stonk': 1.9,
            'green': 1.9,
            'money': 1.2,
            'print': 2.2,
            'rocket': 2.2,
            'bull': 2.9,
            'bear': -2.9,
            'pumping': -1.0,
            'sus': -3.0,
            'offering': -2.3,
            'rip': -4.0,
            'downgrade': -3.0,
            'upgrade': 3.0,     
            'maintain': 1.0,          
            'pump': 1.9,
            'hot': 1.5,
            'drop': -2.5,
            'rebound': 1.5,  
            'crack': 2.5,
            '🚀': 3, # Jake ADDED THESE
            '🌕': 3, # Jake ADDED THESE
            'YOLO': 4, # Jake ADDED THESE
            'ripping': 3,# Jake ADDED THESE
            'regarded': 0, # Jake ADDED THESE
            'squeeze':3, # Jake ADDED THESE
            }
    
    vader.lexicon.update(added_words)

    def safe_sentiment(text):
        try:
            # Ensure the input is a non-empty string
            if not isinstance(text, str) or not text.strip():
                return 0

            # Analyze the sentiment
            sentiment_dict = vader.polarity_scores(text)
            return sentiment_dict.get('compound', 0)
        except Exception as e:
            print(f"Error processing text: '{text}' (type: {type(text)}). Error: {e}")
            return 0

    # Apply the function
    try:
        df['title_sentiment'] = df['title'].apply(safe_sentiment)
    except:
        print('Titles not found, is this a comments file?')
        df['title_sentiment'] = 0
        
    df['selftext_sentiment'] = df['selftext'].apply(safe_sentiment)
    
    
    def non_zero_average(row):
        sentiments = [row['title_sentiment'], row['selftext_sentiment']]
        non_zero_sentiments = [s for s in sentiments if s != 0]

        if not non_zero_sentiments:
            return 0  # Return 0 if both sentiments are zero

        return sum(non_zero_sentiments) / len(non_zero_sentiments)

    # Apply the function to calculate overall sentiment
    df['overall_sentiment'] = df.apply(non_zero_average, axis=1)
    
    df['score_weighted_sentiment'] = df['overall_sentiment'] * df['score']

    return df

# 2. Loop over the time frame, Fetch Records and append new day sentiment to the output df

In [6]:
!pip install vaderSentiment

[0m

In [7]:

# establish min max date range


In [6]:
import re
import nltk
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
import pytz
import datetime
import seaborn as sns
from collections import Counter
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
from tqdm import tqdm


# for date in daterange:
reddit_sentiment_by_date = pd.DataFrame()

# Create a date range
start_date = '2018-08-01'
end_date = '2023-11-30'
date_range = pd.date_range(start=start_date, end=end_date, freq='D')


# results repo
results = []

# Iterate through the date range
for day in tqdm(date_range):
    # Set UTC begin and end timestamps for each day
    utc_begin = int(day.replace(hour=23, minute=59, second=59, tzinfo=datetime.timezone.utc).timestamp())
    utc_end = int((day + pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59, tzinfo=datetime.timezone.utc).timestamp())  
    
    
    # pull submissions for date
    try:
        submissions_df = get_db_reddit(table='submissions',utc_begin=utc_begin, utc_end=utc_end)
    except:
        # Convert timestamps back to datetime objects
        utc_begin_date = datetime.datetime.utcfromtimestamp(utc_begin).strftime('%Y-%m-%d %H:%M:%S')
        utc_end_date = datetime.datetime.utcfromtimestamp(utc_end).strftime('%Y-%m-%d %H:%M:%S')

        # Print the UTC begin and end dates in string format
        print(f"UTC Begin: {utc_begin_date}, UTC End: {utc_end_date}")
        continue
    
    # apply functions to df
    submissions_df = preprocess_df(submissions_df)
    submissions_df = find_tickers(submissions_df)
    submissions_df = add_vader_sentiment(submissions_df)
    
    # create cumulative_sentiment_sorted_df
    exploded_df = submissions_df.explode('tickers')
    cumulative_sentiment = exploded_df.groupby('tickers')['overall_sentiment'].sum().reset_index() # Group by 'tickers'
    cumulative_sentiment.columns = ['Ticker', 'Cumulative Overall Sentiment'] # Rename columns for clarity
    cumulative_sentiment_sorted = cumulative_sentiment.sort_values(by='Cumulative Overall Sentiment', ascending=False)
    # display(cumulative_sentiment_sorted)

    # Group by 'tickers' and sum the 'score_weighted_sentiment'
    cumulative_weighted_sentiment = exploded_df.groupby('tickers')['score_weighted_sentiment'].sum().reset_index()
    cumulative_weighted_sentiment.columns = ['Ticker', 'Cumulative Weighted Sentiment'] # Rename columns for clarity
    cumulative_weighted_sentiment_sorted = cumulative_weighted_sentiment.sort_values(by='Cumulative Weighted Sentiment', ascending=False)
    cumulative_weighted_sentiment_sorted['Date'] = submissions_df['created_EST_date']
    cumulative_weighted_sentiment_sorted['Date'].fillna(submissions_df['created_EST_date'].unique()[0],inplace=True)
    # display(cumulative_weighted_sentiment_sorted)
    
    daily_sentiment_df = cumulative_sentiment_sorted.merge(cumulative_weighted_sentiment_sorted, on='Ticker')
    # display(daily_sentiment_df)
    
    #append new_sentiment_df to daily_sentiment_df
    # reddit_sentiment_by_date = pd.concat([reddit_sentiment_by_date, daily_sentiment_df])
    # display(reddit_sentiment_by_date)
    
    results.append(daily_sentiment_df)


 23%|██▎       | 442/1948 [03:54<06:20,  3.96it/s]

UTC Begin: 2019-10-14 23:59:59, UTC End: 2019-10-15 23:59:59
UTC Begin: 2019-10-15 23:59:59, UTC End: 2019-10-16 23:59:59
UTC Begin: 2019-10-16 23:59:59, UTC End: 2019-10-17 23:59:59
UTC Begin: 2019-10-17 23:59:59, UTC End: 2019-10-18 23:59:59


 83%|████████▎ | 1620/1948 [27:10<00:39,  8.33it/s] 

UTC Begin: 2022-12-31 23:59:59, UTC End: 2023-01-01 23:59:59
UTC Begin: 2023-01-01 23:59:59, UTC End: 2023-01-02 23:59:59
UTC Begin: 2023-01-02 23:59:59, UTC End: 2023-01-03 23:59:59
UTC Begin: 2023-01-03 23:59:59, UTC End: 2023-01-04 23:59:59
UTC Begin: 2023-01-04 23:59:59, UTC End: 2023-01-05 23:59:59
UTC Begin: 2023-01-05 23:59:59, UTC End: 2023-01-06 23:59:59
UTC Begin: 2023-01-06 23:59:59, UTC End: 2023-01-07 23:59:59


 83%|████████▎ | 1626/1948 [27:10<00:22, 14.31it/s]

UTC Begin: 2023-01-07 23:59:59, UTC End: 2023-01-08 23:59:59
UTC Begin: 2023-01-08 23:59:59, UTC End: 2023-01-09 23:59:59
UTC Begin: 2023-01-09 23:59:59, UTC End: 2023-01-10 23:59:59
UTC Begin: 2023-01-10 23:59:59, UTC End: 2023-01-11 23:59:59
UTC Begin: 2023-01-11 23:59:59, UTC End: 2023-01-12 23:59:59
UTC Begin: 2023-01-12 23:59:59, UTC End: 2023-01-13 23:59:59


 84%|████████▍ | 1632/1948 [27:10<00:16, 19.67it/s]

UTC Begin: 2023-01-13 23:59:59, UTC End: 2023-01-14 23:59:59
UTC Begin: 2023-01-14 23:59:59, UTC End: 2023-01-15 23:59:59
UTC Begin: 2023-01-15 23:59:59, UTC End: 2023-01-16 23:59:59
UTC Begin: 2023-01-16 23:59:59, UTC End: 2023-01-17 23:59:59
UTC Begin: 2023-01-17 23:59:59, UTC End: 2023-01-18 23:59:59
UTC Begin: 2023-01-18 23:59:59, UTC End: 2023-01-19 23:59:59


 84%|████████▍ | 1635/1948 [27:10<00:14, 20.95it/s]

UTC Begin: 2023-01-19 23:59:59, UTC End: 2023-01-20 23:59:59
UTC Begin: 2023-01-20 23:59:59, UTC End: 2023-01-21 23:59:59
UTC Begin: 2023-01-21 23:59:59, UTC End: 2023-01-22 23:59:59
UTC Begin: 2023-01-22 23:59:59, UTC End: 2023-01-23 23:59:59
UTC Begin: 2023-01-23 23:59:59, UTC End: 2023-01-24 23:59:59
UTC Begin: 2023-01-24 23:59:59, UTC End: 2023-01-25 23:59:59


 84%|████████▍ | 1645/1948 [27:11<00:11, 26.52it/s]

UTC Begin: 2023-01-25 23:59:59, UTC End: 2023-01-26 23:59:59
UTC Begin: 2023-01-26 23:59:59, UTC End: 2023-01-27 23:59:59
UTC Begin: 2023-01-27 23:59:59, UTC End: 2023-01-28 23:59:59
UTC Begin: 2023-01-28 23:59:59, UTC End: 2023-01-29 23:59:59
UTC Begin: 2023-01-29 23:59:59, UTC End: 2023-01-30 23:59:59
UTC Begin: 2023-01-30 23:59:59, UTC End: 2023-01-31 23:59:59
UTC Begin: 2023-01-31 23:59:59, UTC End: 2023-02-01 23:59:59


 85%|████████▍ | 1649/1948 [27:11<00:10, 28.41it/s]

UTC Begin: 2023-02-01 23:59:59, UTC End: 2023-02-02 23:59:59
UTC Begin: 2023-02-02 23:59:59, UTC End: 2023-02-03 23:59:59
UTC Begin: 2023-02-03 23:59:59, UTC End: 2023-02-04 23:59:59
UTC Begin: 2023-02-04 23:59:59, UTC End: 2023-02-05 23:59:59
UTC Begin: 2023-02-05 23:59:59, UTC End: 2023-02-06 23:59:59
UTC Begin: 2023-02-06 23:59:59, UTC End: 2023-02-07 23:59:59


 85%|████████▍ | 1655/1948 [27:11<00:10, 28.15it/s]

UTC Begin: 2023-02-07 23:59:59, UTC End: 2023-02-08 23:59:59
UTC Begin: 2023-02-08 23:59:59, UTC End: 2023-02-09 23:59:59
UTC Begin: 2023-02-09 23:59:59, UTC End: 2023-02-10 23:59:59
UTC Begin: 2023-02-10 23:59:59, UTC End: 2023-02-11 23:59:59
UTC Begin: 2023-02-11 23:59:59, UTC End: 2023-02-12 23:59:59
UTC Begin: 2023-02-12 23:59:59, UTC End: 2023-02-13 23:59:59


 85%|████████▌ | 1662/1948 [27:11<00:09, 28.78it/s]

UTC Begin: 2023-02-13 23:59:59, UTC End: 2023-02-14 23:59:59
UTC Begin: 2023-02-14 23:59:59, UTC End: 2023-02-15 23:59:59
UTC Begin: 2023-02-15 23:59:59, UTC End: 2023-02-16 23:59:59
UTC Begin: 2023-02-16 23:59:59, UTC End: 2023-02-17 23:59:59
UTC Begin: 2023-02-17 23:59:59, UTC End: 2023-02-18 23:59:59
UTC Begin: 2023-02-18 23:59:59, UTC End: 2023-02-19 23:59:59
UTC Begin: 2023-02-19 23:59:59, UTC End: 2023-02-20 23:59:59


 86%|████████▌ | 1669/1948 [27:11<00:09, 29.16it/s]

UTC Begin: 2023-02-20 23:59:59, UTC End: 2023-02-21 23:59:59
UTC Begin: 2023-02-21 23:59:59, UTC End: 2023-02-22 23:59:59
UTC Begin: 2023-02-22 23:59:59, UTC End: 2023-02-23 23:59:59
UTC Begin: 2023-02-23 23:59:59, UTC End: 2023-02-24 23:59:59
UTC Begin: 2023-02-24 23:59:59, UTC End: 2023-02-25 23:59:59
UTC Begin: 2023-02-25 23:59:59, UTC End: 2023-02-26 23:59:59


 86%|████████▌ | 1677/1948 [27:12<00:08, 30.33it/s]

UTC Begin: 2023-02-26 23:59:59, UTC End: 2023-02-27 23:59:59
UTC Begin: 2023-02-27 23:59:59, UTC End: 2023-02-28 23:59:59
UTC Begin: 2023-02-28 23:59:59, UTC End: 2023-03-01 23:59:59
UTC Begin: 2023-03-01 23:59:59, UTC End: 2023-03-02 23:59:59
UTC Begin: 2023-03-02 23:59:59, UTC End: 2023-03-03 23:59:59
UTC Begin: 2023-03-03 23:59:59, UTC End: 2023-03-04 23:59:59
UTC Begin: 2023-03-04 23:59:59, UTC End: 2023-03-05 23:59:59


 86%|████████▋ | 1681/1948 [27:12<00:08, 30.76it/s]

UTC Begin: 2023-03-05 23:59:59, UTC End: 2023-03-06 23:59:59
UTC Begin: 2023-03-06 23:59:59, UTC End: 2023-03-07 23:59:59
UTC Begin: 2023-03-07 23:59:59, UTC End: 2023-03-08 23:59:59
UTC Begin: 2023-03-08 23:59:59, UTC End: 2023-03-09 23:59:59
UTC Begin: 2023-03-09 23:59:59, UTC End: 2023-03-10 23:59:59
UTC Begin: 2023-03-10 23:59:59, UTC End: 2023-03-11 23:59:59
UTC Begin: 2023-03-11 23:59:59, UTC End: 2023-03-12 23:59:59


 87%|████████▋ | 1689/1948 [27:12<00:08, 30.71it/s]

UTC Begin: 2023-03-12 23:59:59, UTC End: 2023-03-13 23:59:59
UTC Begin: 2023-03-13 23:59:59, UTC End: 2023-03-14 23:59:59
UTC Begin: 2023-03-14 23:59:59, UTC End: 2023-03-15 23:59:59
UTC Begin: 2023-03-15 23:59:59, UTC End: 2023-03-16 23:59:59
UTC Begin: 2023-03-16 23:59:59, UTC End: 2023-03-17 23:59:59
UTC Begin: 2023-03-17 23:59:59, UTC End: 2023-03-18 23:59:59
UTC Begin: 2023-03-18 23:59:59, UTC End: 2023-03-19 23:59:59


 87%|████████▋ | 1697/1948 [27:12<00:08, 30.33it/s]

UTC Begin: 2023-03-19 23:59:59, UTC End: 2023-03-20 23:59:59
UTC Begin: 2023-03-20 23:59:59, UTC End: 2023-03-21 23:59:59
UTC Begin: 2023-03-21 23:59:59, UTC End: 2023-03-22 23:59:59
UTC Begin: 2023-03-22 23:59:59, UTC End: 2023-03-23 23:59:59
UTC Begin: 2023-03-23 23:59:59, UTC End: 2023-03-24 23:59:59
UTC Begin: 2023-03-24 23:59:59, UTC End: 2023-03-25 23:59:59


 87%|████████▋ | 1701/1948 [27:12<00:08, 30.13it/s]

UTC Begin: 2023-03-25 23:59:59, UTC End: 2023-03-26 23:59:59
UTC Begin: 2023-03-26 23:59:59, UTC End: 2023-03-27 23:59:59
UTC Begin: 2023-03-27 23:59:59, UTC End: 2023-03-28 23:59:59
UTC Begin: 2023-03-28 23:59:59, UTC End: 2023-03-29 23:59:59
UTC Begin: 2023-03-29 23:59:59, UTC End: 2023-03-30 23:59:59
UTC Begin: 2023-03-30 23:59:59, UTC End: 2023-03-31 23:59:59
UTC Begin: 2023-03-31 23:59:59, UTC End: 2023-04-01 23:59:59


 88%|████████▊ | 1709/1948 [27:13<00:08, 29.74it/s]

UTC Begin: 2023-04-01 23:59:59, UTC End: 2023-04-02 23:59:59
UTC Begin: 2023-04-02 23:59:59, UTC End: 2023-04-03 23:59:59
UTC Begin: 2023-04-03 23:59:59, UTC End: 2023-04-04 23:59:59
UTC Begin: 2023-04-04 23:59:59, UTC End: 2023-04-05 23:59:59
UTC Begin: 2023-04-05 23:59:59, UTC End: 2023-04-06 23:59:59
UTC Begin: 2023-04-06 23:59:59, UTC End: 2023-04-07 23:59:59
UTC Begin: 2023-04-07 23:59:59, UTC End: 2023-04-08 23:59:59


 88%|████████▊ | 1717/1948 [27:13<00:07, 29.92it/s]

UTC Begin: 2023-04-08 23:59:59, UTC End: 2023-04-09 23:59:59
UTC Begin: 2023-04-09 23:59:59, UTC End: 2023-04-10 23:59:59
UTC Begin: 2023-04-10 23:59:59, UTC End: 2023-04-11 23:59:59
UTC Begin: 2023-04-11 23:59:59, UTC End: 2023-04-12 23:59:59
UTC Begin: 2023-04-12 23:59:59, UTC End: 2023-04-13 23:59:59
UTC Begin: 2023-04-13 23:59:59, UTC End: 2023-04-14 23:59:59


 88%|████████▊ | 1721/1948 [27:13<00:07, 30.52it/s]

UTC Begin: 2023-04-14 23:59:59, UTC End: 2023-04-15 23:59:59
UTC Begin: 2023-04-15 23:59:59, UTC End: 2023-04-16 23:59:59
UTC Begin: 2023-04-16 23:59:59, UTC End: 2023-04-17 23:59:59
UTC Begin: 2023-04-17 23:59:59, UTC End: 2023-04-18 23:59:59
UTC Begin: 2023-04-18 23:59:59, UTC End: 2023-04-19 23:59:59
UTC Begin: 2023-04-19 23:59:59, UTC End: 2023-04-20 23:59:59


 89%|████████▊ | 1728/1948 [27:13<00:07, 28.88it/s]

UTC Begin: 2023-04-20 23:59:59, UTC End: 2023-04-21 23:59:59
UTC Begin: 2023-04-21 23:59:59, UTC End: 2023-04-22 23:59:59
UTC Begin: 2023-04-22 23:59:59, UTC End: 2023-04-23 23:59:59
UTC Begin: 2023-04-23 23:59:59, UTC End: 2023-04-24 23:59:59
UTC Begin: 2023-04-24 23:59:59, UTC End: 2023-04-25 23:59:59
UTC Begin: 2023-04-25 23:59:59, UTC End: 2023-04-26 23:59:59


 89%|████████▉ | 1735/1948 [27:14<00:07, 28.85it/s]

UTC Begin: 2023-04-26 23:59:59, UTC End: 2023-04-27 23:59:59
UTC Begin: 2023-04-27 23:59:59, UTC End: 2023-04-28 23:59:59
UTC Begin: 2023-04-28 23:59:59, UTC End: 2023-04-29 23:59:59
UTC Begin: 2023-04-29 23:59:59, UTC End: 2023-04-30 23:59:59
UTC Begin: 2023-04-30 23:59:59, UTC End: 2023-05-01 23:59:59
UTC Begin: 2023-05-01 23:59:59, UTC End: 2023-05-02 23:59:59


 89%|████████▉ | 1738/1948 [27:14<00:07, 26.85it/s]

UTC Begin: 2023-05-02 23:59:59, UTC End: 2023-05-03 23:59:59
UTC Begin: 2023-05-03 23:59:59, UTC End: 2023-05-04 23:59:59
UTC Begin: 2023-05-04 23:59:59, UTC End: 2023-05-05 23:59:59
UTC Begin: 2023-05-05 23:59:59, UTC End: 2023-05-06 23:59:59
UTC Begin: 2023-05-06 23:59:59, UTC End: 2023-05-07 23:59:59
UTC Begin: 2023-05-07 23:59:59, UTC End: 2023-05-08 23:59:59


 90%|████████▉ | 1746/1948 [27:14<00:07, 28.76it/s]

UTC Begin: 2023-05-08 23:59:59, UTC End: 2023-05-09 23:59:59
UTC Begin: 2023-05-09 23:59:59, UTC End: 2023-05-10 23:59:59
UTC Begin: 2023-05-10 23:59:59, UTC End: 2023-05-11 23:59:59
UTC Begin: 2023-05-11 23:59:59, UTC End: 2023-05-12 23:59:59
UTC Begin: 2023-05-12 23:59:59, UTC End: 2023-05-13 23:59:59
UTC Begin: 2023-05-13 23:59:59, UTC End: 2023-05-14 23:59:59


 90%|████████▉ | 1752/1948 [27:14<00:07, 26.53it/s]

UTC Begin: 2023-05-14 23:59:59, UTC End: 2023-05-15 23:59:59
UTC Begin: 2023-05-15 23:59:59, UTC End: 2023-05-16 23:59:59
UTC Begin: 2023-05-16 23:59:59, UTC End: 2023-05-17 23:59:59
UTC Begin: 2023-05-17 23:59:59, UTC End: 2023-05-18 23:59:59
UTC Begin: 2023-05-18 23:59:59, UTC End: 2023-05-19 23:59:59


 90%|█████████ | 1755/1948 [27:14<00:07, 24.94it/s]

UTC Begin: 2023-05-19 23:59:59, UTC End: 2023-05-20 23:59:59
UTC Begin: 2023-05-20 23:59:59, UTC End: 2023-05-21 23:59:59
UTC Begin: 2023-05-21 23:59:59, UTC End: 2023-05-22 23:59:59
UTC Begin: 2023-05-22 23:59:59, UTC End: 2023-05-23 23:59:59
UTC Begin: 2023-05-23 23:59:59, UTC End: 2023-05-24 23:59:59


 90%|█████████ | 1761/1948 [27:15<00:08, 22.37it/s]

UTC Begin: 2023-05-24 23:59:59, UTC End: 2023-05-25 23:59:59
UTC Begin: 2023-05-25 23:59:59, UTC End: 2023-05-26 23:59:59
UTC Begin: 2023-05-26 23:59:59, UTC End: 2023-05-27 23:59:59
UTC Begin: 2023-05-27 23:59:59, UTC End: 2023-05-28 23:59:59


 91%|█████████ | 1767/1948 [27:15<00:07, 24.24it/s]

UTC Begin: 2023-05-28 23:59:59, UTC End: 2023-05-29 23:59:59
UTC Begin: 2023-05-29 23:59:59, UTC End: 2023-05-30 23:59:59
UTC Begin: 2023-05-30 23:59:59, UTC End: 2023-05-31 23:59:59
UTC Begin: 2023-05-31 23:59:59, UTC End: 2023-06-01 23:59:59
UTC Begin: 2023-06-01 23:59:59, UTC End: 2023-06-02 23:59:59
UTC Begin: 2023-06-02 23:59:59, UTC End: 2023-06-03 23:59:59


 91%|█████████ | 1771/1948 [27:15<00:06, 26.32it/s]

UTC Begin: 2023-06-03 23:59:59, UTC End: 2023-06-04 23:59:59
UTC Begin: 2023-06-04 23:59:59, UTC End: 2023-06-05 23:59:59
UTC Begin: 2023-06-05 23:59:59, UTC End: 2023-06-06 23:59:59
UTC Begin: 2023-06-06 23:59:59, UTC End: 2023-06-07 23:59:59
UTC Begin: 2023-06-07 23:59:59, UTC End: 2023-06-08 23:59:59
UTC Begin: 2023-06-08 23:59:59, UTC End: 2023-06-09 23:59:59
UTC Begin: 2023-06-09 23:59:59, UTC End: 2023-06-10 23:59:59


 91%|█████████▏| 1778/1948 [27:15<00:06, 27.00it/s]

UTC Begin: 2023-06-10 23:59:59, UTC End: 2023-06-11 23:59:59
UTC Begin: 2023-06-11 23:59:59, UTC End: 2023-06-12 23:59:59
UTC Begin: 2023-06-12 23:59:59, UTC End: 2023-06-13 23:59:59
UTC Begin: 2023-06-13 23:59:59, UTC End: 2023-06-14 23:59:59
UTC Begin: 2023-06-14 23:59:59, UTC End: 2023-06-15 23:59:59
UTC Begin: 2023-06-15 23:59:59, UTC End: 2023-06-16 23:59:59


 92%|█████████▏| 1785/1948 [27:15<00:05, 28.79it/s]

UTC Begin: 2023-06-16 23:59:59, UTC End: 2023-06-17 23:59:59
UTC Begin: 2023-06-17 23:59:59, UTC End: 2023-06-18 23:59:59
UTC Begin: 2023-06-18 23:59:59, UTC End: 2023-06-19 23:59:59
UTC Begin: 2023-06-19 23:59:59, UTC End: 2023-06-20 23:59:59
UTC Begin: 2023-06-20 23:59:59, UTC End: 2023-06-21 23:59:59
UTC Begin: 2023-06-21 23:59:59, UTC End: 2023-06-22 23:59:59
UTC Begin: 2023-06-22 23:59:59, UTC End: 2023-06-23 23:59:59


 92%|█████████▏| 1792/1948 [27:16<00:05, 29.61it/s]

UTC Begin: 2023-06-23 23:59:59, UTC End: 2023-06-24 23:59:59
UTC Begin: 2023-06-24 23:59:59, UTC End: 2023-06-25 23:59:59
UTC Begin: 2023-06-25 23:59:59, UTC End: 2023-06-26 23:59:59
UTC Begin: 2023-06-26 23:59:59, UTC End: 2023-06-27 23:59:59
UTC Begin: 2023-06-27 23:59:59, UTC End: 2023-06-28 23:59:59
UTC Begin: 2023-06-28 23:59:59, UTC End: 2023-06-29 23:59:59


 92%|█████████▏| 1798/1948 [27:16<00:05, 29.35it/s]

UTC Begin: 2023-06-29 23:59:59, UTC End: 2023-06-30 23:59:59
UTC Begin: 2023-06-30 23:59:59, UTC End: 2023-07-01 23:59:59
UTC Begin: 2023-07-01 23:59:59, UTC End: 2023-07-02 23:59:59
UTC Begin: 2023-07-02 23:59:59, UTC End: 2023-07-03 23:59:59
UTC Begin: 2023-07-03 23:59:59, UTC End: 2023-07-04 23:59:59
UTC Begin: 2023-07-04 23:59:59, UTC End: 2023-07-05 23:59:59


 93%|█████████▎| 1804/1948 [27:16<00:05, 27.83it/s]

UTC Begin: 2023-07-05 23:59:59, UTC End: 2023-07-06 23:59:59
UTC Begin: 2023-07-06 23:59:59, UTC End: 2023-07-07 23:59:59
UTC Begin: 2023-07-07 23:59:59, UTC End: 2023-07-08 23:59:59
UTC Begin: 2023-07-08 23:59:59, UTC End: 2023-07-09 23:59:59
UTC Begin: 2023-07-09 23:59:59, UTC End: 2023-07-10 23:59:59


 93%|█████████▎| 1807/1948 [27:16<00:05, 26.61it/s]

UTC Begin: 2023-07-10 23:59:59, UTC End: 2023-07-11 23:59:59
UTC Begin: 2023-07-11 23:59:59, UTC End: 2023-07-12 23:59:59
UTC Begin: 2023-07-12 23:59:59, UTC End: 2023-07-13 23:59:59
UTC Begin: 2023-07-13 23:59:59, UTC End: 2023-07-14 23:59:59
UTC Begin: 2023-07-14 23:59:59, UTC End: 2023-07-15 23:59:59
UTC Begin: 2023-07-15 23:59:59, UTC End: 2023-07-16 23:59:59


 93%|█████████▎| 1814/1948 [27:16<00:04, 27.43it/s]

UTC Begin: 2023-07-16 23:59:59, UTC End: 2023-07-17 23:59:59
UTC Begin: 2023-07-17 23:59:59, UTC End: 2023-07-18 23:59:59
UTC Begin: 2023-07-18 23:59:59, UTC End: 2023-07-19 23:59:59
UTC Begin: 2023-07-19 23:59:59, UTC End: 2023-07-20 23:59:59
UTC Begin: 2023-07-20 23:59:59, UTC End: 2023-07-21 23:59:59
UTC Begin: 2023-07-21 23:59:59, UTC End: 2023-07-22 23:59:59


 93%|█████████▎| 1821/1948 [27:17<00:04, 26.40it/s]

UTC Begin: 2023-07-22 23:59:59, UTC End: 2023-07-23 23:59:59
UTC Begin: 2023-07-23 23:59:59, UTC End: 2023-07-24 23:59:59
UTC Begin: 2023-07-24 23:59:59, UTC End: 2023-07-25 23:59:59
UTC Begin: 2023-07-25 23:59:59, UTC End: 2023-07-26 23:59:59
UTC Begin: 2023-07-26 23:59:59, UTC End: 2023-07-27 23:59:59


 94%|█████████▍| 1828/1948 [27:17<00:04, 28.15it/s]

UTC Begin: 2023-07-27 23:59:59, UTC End: 2023-07-28 23:59:59
UTC Begin: 2023-07-28 23:59:59, UTC End: 2023-07-29 23:59:59
UTC Begin: 2023-07-29 23:59:59, UTC End: 2023-07-30 23:59:59
UTC Begin: 2023-07-30 23:59:59, UTC End: 2023-07-31 23:59:59
UTC Begin: 2023-07-31 23:59:59, UTC End: 2023-08-01 23:59:59
UTC Begin: 2023-08-01 23:59:59, UTC End: 2023-08-02 23:59:59
UTC Begin: 2023-08-02 23:59:59, UTC End: 2023-08-03 23:59:59


 94%|█████████▍| 1835/1948 [27:17<00:03, 29.11it/s]

UTC Begin: 2023-08-03 23:59:59, UTC End: 2023-08-04 23:59:59
UTC Begin: 2023-08-04 23:59:59, UTC End: 2023-08-05 23:59:59
UTC Begin: 2023-08-05 23:59:59, UTC End: 2023-08-06 23:59:59
UTC Begin: 2023-08-06 23:59:59, UTC End: 2023-08-07 23:59:59
UTC Begin: 2023-08-07 23:59:59, UTC End: 2023-08-08 23:59:59
UTC Begin: 2023-08-08 23:59:59, UTC End: 2023-08-09 23:59:59
UTC Begin: 2023-08-09 23:59:59, UTC End: 2023-08-10 23:59:59


 94%|█████████▍| 1838/1948 [27:17<00:03, 29.18it/s]

UTC Begin: 2023-08-10 23:59:59, UTC End: 2023-08-11 23:59:59
UTC Begin: 2023-08-11 23:59:59, UTC End: 2023-08-12 23:59:59
UTC Begin: 2023-08-12 23:59:59, UTC End: 2023-08-13 23:59:59
UTC Begin: 2023-08-13 23:59:59, UTC End: 2023-08-14 23:59:59
UTC Begin: 2023-08-14 23:59:59, UTC End: 2023-08-15 23:59:59
UTC Begin: 2023-08-15 23:59:59, UTC End: 2023-08-16 23:59:59


 95%|█████████▍| 1845/1948 [27:18<00:03, 26.67it/s]

UTC Begin: 2023-08-16 23:59:59, UTC End: 2023-08-17 23:59:59
UTC Begin: 2023-08-17 23:59:59, UTC End: 2023-08-18 23:59:59
UTC Begin: 2023-08-18 23:59:59, UTC End: 2023-08-19 23:59:59
UTC Begin: 2023-08-19 23:59:59, UTC End: 2023-08-20 23:59:59
UTC Begin: 2023-08-20 23:59:59, UTC End: 2023-08-21 23:59:59


 95%|█████████▌| 1851/1948 [27:18<00:03, 27.21it/s]

UTC Begin: 2023-08-21 23:59:59, UTC End: 2023-08-22 23:59:59
UTC Begin: 2023-08-22 23:59:59, UTC End: 2023-08-23 23:59:59
UTC Begin: 2023-08-23 23:59:59, UTC End: 2023-08-24 23:59:59
UTC Begin: 2023-08-24 23:59:59, UTC End: 2023-08-25 23:59:59
UTC Begin: 2023-08-25 23:59:59, UTC End: 2023-08-26 23:59:59
UTC Begin: 2023-08-26 23:59:59, UTC End: 2023-08-27 23:59:59


 95%|█████████▌| 1857/1948 [27:18<00:03, 24.91it/s]

UTC Begin: 2023-08-27 23:59:59, UTC End: 2023-08-28 23:59:59
UTC Begin: 2023-08-28 23:59:59, UTC End: 2023-08-29 23:59:59
UTC Begin: 2023-08-29 23:59:59, UTC End: 2023-08-30 23:59:59
UTC Begin: 2023-08-30 23:59:59, UTC End: 2023-08-31 23:59:59
UTC Begin: 2023-08-31 23:59:59, UTC End: 2023-09-01 23:59:59


 96%|█████████▌| 1863/1948 [27:18<00:03, 25.88it/s]

UTC Begin: 2023-09-01 23:59:59, UTC End: 2023-09-02 23:59:59
UTC Begin: 2023-09-02 23:59:59, UTC End: 2023-09-03 23:59:59
UTC Begin: 2023-09-03 23:59:59, UTC End: 2023-09-04 23:59:59
UTC Begin: 2023-09-04 23:59:59, UTC End: 2023-09-05 23:59:59
UTC Begin: 2023-09-05 23:59:59, UTC End: 2023-09-06 23:59:59
UTC Begin: 2023-09-06 23:59:59, UTC End: 2023-09-07 23:59:59


 96%|█████████▌| 1869/1948 [27:18<00:02, 26.59it/s]

UTC Begin: 2023-09-07 23:59:59, UTC End: 2023-09-08 23:59:59
UTC Begin: 2023-09-08 23:59:59, UTC End: 2023-09-09 23:59:59
UTC Begin: 2023-09-09 23:59:59, UTC End: 2023-09-10 23:59:59
UTC Begin: 2023-09-10 23:59:59, UTC End: 2023-09-11 23:59:59
UTC Begin: 2023-09-11 23:59:59, UTC End: 2023-09-12 23:59:59
UTC Begin: 2023-09-12 23:59:59, UTC End: 2023-09-13 23:59:59


 96%|█████████▋| 1875/1948 [27:19<00:02, 26.40it/s]

UTC Begin: 2023-09-13 23:59:59, UTC End: 2023-09-14 23:59:59
UTC Begin: 2023-09-14 23:59:59, UTC End: 2023-09-15 23:59:59
UTC Begin: 2023-09-15 23:59:59, UTC End: 2023-09-16 23:59:59
UTC Begin: 2023-09-16 23:59:59, UTC End: 2023-09-17 23:59:59
UTC Begin: 2023-09-17 23:59:59, UTC End: 2023-09-18 23:59:59
UTC Begin: 2023-09-18 23:59:59, UTC End: 2023-09-19 23:59:59


 97%|█████████▋| 1881/1948 [27:19<00:02, 26.74it/s]

UTC Begin: 2023-09-19 23:59:59, UTC End: 2023-09-20 23:59:59
UTC Begin: 2023-09-20 23:59:59, UTC End: 2023-09-21 23:59:59
UTC Begin: 2023-09-21 23:59:59, UTC End: 2023-09-22 23:59:59
UTC Begin: 2023-09-22 23:59:59, UTC End: 2023-09-23 23:59:59
UTC Begin: 2023-09-23 23:59:59, UTC End: 2023-09-24 23:59:59
UTC Begin: 2023-09-24 23:59:59, UTC End: 2023-09-25 23:59:59


 97%|█████████▋| 1887/1948 [27:19<00:02, 27.48it/s]

UTC Begin: 2023-09-25 23:59:59, UTC End: 2023-09-26 23:59:59
UTC Begin: 2023-09-26 23:59:59, UTC End: 2023-09-27 23:59:59
UTC Begin: 2023-09-27 23:59:59, UTC End: 2023-09-28 23:59:59
UTC Begin: 2023-09-28 23:59:59, UTC End: 2023-09-29 23:59:59
UTC Begin: 2023-09-29 23:59:59, UTC End: 2023-09-30 23:59:59
UTC Begin: 2023-09-30 23:59:59, UTC End: 2023-10-01 23:59:59


 97%|█████████▋| 1894/1948 [27:19<00:01, 28.83it/s]

UTC Begin: 2023-10-01 23:59:59, UTC End: 2023-10-02 23:59:59
UTC Begin: 2023-10-02 23:59:59, UTC End: 2023-10-03 23:59:59
UTC Begin: 2023-10-03 23:59:59, UTC End: 2023-10-04 23:59:59
UTC Begin: 2023-10-04 23:59:59, UTC End: 2023-10-05 23:59:59
UTC Begin: 2023-10-05 23:59:59, UTC End: 2023-10-06 23:59:59
UTC Begin: 2023-10-06 23:59:59, UTC End: 2023-10-07 23:59:59
UTC Begin: 2023-10-07 23:59:59, UTC End: 2023-10-08 23:59:59


 97%|█████████▋| 1898/1948 [27:20<00:01, 30.14it/s]

UTC Begin: 2023-10-08 23:59:59, UTC End: 2023-10-09 23:59:59
UTC Begin: 2023-10-09 23:59:59, UTC End: 2023-10-10 23:59:59
UTC Begin: 2023-10-10 23:59:59, UTC End: 2023-10-11 23:59:59
UTC Begin: 2023-10-11 23:59:59, UTC End: 2023-10-12 23:59:59
UTC Begin: 2023-10-12 23:59:59, UTC End: 2023-10-13 23:59:59
UTC Begin: 2023-10-13 23:59:59, UTC End: 2023-10-14 23:59:59
UTC Begin: 2023-10-14 23:59:59, UTC End: 2023-10-15 23:59:59


 98%|█████████▊| 1906/1948 [27:20<00:01, 29.52it/s]

UTC Begin: 2023-10-15 23:59:59, UTC End: 2023-10-16 23:59:59
UTC Begin: 2023-10-16 23:59:59, UTC End: 2023-10-17 23:59:59
UTC Begin: 2023-10-17 23:59:59, UTC End: 2023-10-18 23:59:59
UTC Begin: 2023-10-18 23:59:59, UTC End: 2023-10-19 23:59:59
UTC Begin: 2023-10-19 23:59:59, UTC End: 2023-10-20 23:59:59
UTC Begin: 2023-10-20 23:59:59, UTC End: 2023-10-21 23:59:59


 98%|█████████▊| 1913/1948 [27:20<00:01, 29.74it/s]

UTC Begin: 2023-10-21 23:59:59, UTC End: 2023-10-22 23:59:59
UTC Begin: 2023-10-22 23:59:59, UTC End: 2023-10-23 23:59:59
UTC Begin: 2023-10-23 23:59:59, UTC End: 2023-10-24 23:59:59
UTC Begin: 2023-10-24 23:59:59, UTC End: 2023-10-25 23:59:59
UTC Begin: 2023-10-25 23:59:59, UTC End: 2023-10-26 23:59:59
UTC Begin: 2023-10-26 23:59:59, UTC End: 2023-10-27 23:59:59
UTC Begin: 2023-10-27 23:59:59, UTC End: 2023-10-28 23:59:59


 99%|█████████▊| 1919/1948 [27:20<00:01, 25.77it/s]

UTC Begin: 2023-10-28 23:59:59, UTC End: 2023-10-29 23:59:59
UTC Begin: 2023-10-29 23:59:59, UTC End: 2023-10-30 23:59:59
UTC Begin: 2023-10-30 23:59:59, UTC End: 2023-10-31 23:59:59
UTC Begin: 2023-10-31 23:59:59, UTC End: 2023-11-01 23:59:59
UTC Begin: 2023-11-01 23:59:59, UTC End: 2023-11-02 23:59:59


 99%|█████████▉| 1926/1948 [27:21<00:00, 27.94it/s]

UTC Begin: 2023-11-02 23:59:59, UTC End: 2023-11-03 23:59:59
UTC Begin: 2023-11-03 23:59:59, UTC End: 2023-11-04 23:59:59
UTC Begin: 2023-11-04 23:59:59, UTC End: 2023-11-05 23:59:59
UTC Begin: 2023-11-05 23:59:59, UTC End: 2023-11-06 23:59:59
UTC Begin: 2023-11-06 23:59:59, UTC End: 2023-11-07 23:59:59
UTC Begin: 2023-11-07 23:59:59, UTC End: 2023-11-08 23:59:59
UTC Begin: 2023-11-08 23:59:59, UTC End: 2023-11-09 23:59:59


 99%|█████████▉| 1933/1948 [27:21<00:00, 29.28it/s]

UTC Begin: 2023-11-09 23:59:59, UTC End: 2023-11-10 23:59:59
UTC Begin: 2023-11-10 23:59:59, UTC End: 2023-11-11 23:59:59
UTC Begin: 2023-11-11 23:59:59, UTC End: 2023-11-12 23:59:59
UTC Begin: 2023-11-12 23:59:59, UTC End: 2023-11-13 23:59:59
UTC Begin: 2023-11-13 23:59:59, UTC End: 2023-11-14 23:59:59
UTC Begin: 2023-11-14 23:59:59, UTC End: 2023-11-15 23:59:59
UTC Begin: 2023-11-15 23:59:59, UTC End: 2023-11-16 23:59:59


 99%|█████████▉| 1937/1948 [27:21<00:00, 29.71it/s]

UTC Begin: 2023-11-16 23:59:59, UTC End: 2023-11-17 23:59:59
UTC Begin: 2023-11-17 23:59:59, UTC End: 2023-11-18 23:59:59
UTC Begin: 2023-11-18 23:59:59, UTC End: 2023-11-19 23:59:59
UTC Begin: 2023-11-19 23:59:59, UTC End: 2023-11-20 23:59:59
UTC Begin: 2023-11-20 23:59:59, UTC End: 2023-11-21 23:59:59
UTC Begin: 2023-11-21 23:59:59, UTC End: 2023-11-22 23:59:59


100%|█████████▉| 1944/1948 [27:21<00:00, 30.10it/s]

UTC Begin: 2023-11-22 23:59:59, UTC End: 2023-11-23 23:59:59
UTC Begin: 2023-11-23 23:59:59, UTC End: 2023-11-24 23:59:59
UTC Begin: 2023-11-24 23:59:59, UTC End: 2023-11-25 23:59:59
UTC Begin: 2023-11-25 23:59:59, UTC End: 2023-11-26 23:59:59
UTC Begin: 2023-11-26 23:59:59, UTC End: 2023-11-27 23:59:59
UTC Begin: 2023-11-27 23:59:59, UTC End: 2023-11-28 23:59:59
UTC Begin: 2023-11-28 23:59:59, UTC End: 2023-11-29 23:59:59


100%|██████████| 1948/1948 [27:21<00:00,  1.19it/s]

UTC Begin: 2023-11-29 23:59:59, UTC End: 2023-11-30 23:59:59
UTC Begin: 2023-11-30 23:59:59, UTC End: 2023-12-01 23:59:59





In [10]:
len(results)

1609

In [11]:
reddit_sentiment_by_date =  pd.concat(results)

reddit_sentiment_by_date.shape

(161976, 4)

In [14]:
reddit_sentiment_by_date.head()

Unnamed: 0,Ticker,Cumulative Overall Sentiment,Cumulative Weighted Sentiment,Date
0,TSLA,5.49375,-139.8838,2018-08-01
1,SQ,3.4488,115.74475,2018-08-01
2,NVDA,2.61225,105.9679,2018-08-01
3,MSFT,2.04665,107.76055,2018-08-01
4,CC,1.46725,61.278,2018-08-01


In [15]:
reddit_sentiment_by_date.to_csv("hist_sentiment.csv", index=False, header=False)

In [16]:
!head hist_sentiment.csv

TSLA,5.49375,-139.88380000000004,2018-08-01
SQ,3.4488000000000003,115.74475,2018-08-01
NVDA,2.61225,105.9679,2018-08-01
MSFT,2.04665,107.76055,2018-08-01
CC,1.46725,61.278,2018-08-01
BIDU,1.3963,4.5704,2018-08-01
SBUX,1.1623999999999999,18.1114,2018-08-01
MU,1.1574499999999999,32.25815,2018-08-01
POST,0.80425,13.67225,2018-08-01
ACT,0.80425,13.67225,2018-08-01


In [18]:
db_connection = psycopg2.connect(
            user='postgres', password='Data2023',
            host='mads-capstone.cmohac77hep9.eu-north-1.rds.amazonaws.com', port=5432,
            database="mads"
        )

db_connection.autocommit = True

cursor = db_connection.cursor()

In [19]:
query = '''CREATE TABLE reddit_sentiment_by_date (id SERIAL PRIMARY KEY,  
                                ticker VARCHAR(8),
                                cumm_overall_sentiment float,
                                cumm_weighted_sentiment float,
                                record_date date)'''


cursor.execute(query)

In [20]:
import pandas as pd

select_query = "SELECT count(*) FROM reddit_sentiment_by_date"

cursor.execute(select_query)

cursor.fetchall()

[(0,)]

In [22]:
with open("hist_sentiment.csv", "rb") as fp:
            cursor.copy_from(fp, "reddit_sentiment_by_date", sep=",", null="", columns=["ticker", "cumm_overall_sentiment", "cumm_weighted_sentiment", "record_date"])

In [23]:
import pandas as pd

select_query = "SELECT count(*) FROM reddit_sentiment_by_date"

cursor.execute(select_query)

cursor.fetchall()

[(161976,)]

In [24]:
index_query = "CREATE INDEX reddit_sentiment_ticker_index ON reddit_sentiment_by_date(ticker);"

cursor.execute(index_query)

In [25]:
index_query = "CREATE INDEX reddit_sentiment_date_index ON reddit_sentiment_by_date(record_date);"

cursor.execute(index_query)

In [28]:
select_query = "SELECT * FROM reddit_sentiment_by_date LIMIT 100;"

cursor.execute(select_query)

rows = cursor.fetchall()

records = []
for row in rows:
    records.append([row[0], row[1],row[2], row[3], row[4]])
    
df = pd.DataFrame(records, columns=["id", "ticker", "cumm_overall_sentiment", "cumm_weighted_sentiment", "record_date"])

print(df.shape)

df.head()

(100, 5)


Unnamed: 0,id,ticker,cumm_overall_sentiment,cumm_weighted_sentiment,record_date
0,1,TSLA,5.49375,-139.8838,2018-08-01
1,2,SQ,3.4488,115.74475,2018-08-01
2,3,NVDA,2.61225,105.9679,2018-08-01
3,4,MSFT,2.04665,107.76055,2018-08-01
4,5,CC,1.46725,61.278,2018-08-01


# 3. Add daily_sentiment_df to dbreddit_sentiment_by_date

In [None]:
# 