In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torchtext.data.functional import to_map_style_dataset
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
from newsapi import NewsApiClient
from openai import OpenAI
from my_secrets_file import API_KEY, OPEN_API_KEY
from datetime import datetime, timedelta

TO_DATE = datetime.today().strftime('%Y-%m-%d')
FROM_DATE = (datetime.today() - timedelta(days=30)).strftime('%Y-%m-%d')
MODEL = 'gpt-4-turbo'

FROM_DATE, TO_DATE

('2024-04-10', '2024-05-10')

## I. Sentiment Analysis of News Media Published about Candidates

In [2]:
newsapi = NewsApiClient(api_key=API_KEY)

biden_news = newsapi.get_everything(q='Biden OR Joe Biden OR Joseph Biden OR Joseph Robinette Biden OR President Biden',
                                      from_param=FROM_DATE,
                                      to=TO_DATE,
                                      language='en')

dems_news = newsapi.get_everything(q='Democrat OR Democrats OR Dem OR Dems OR Progressives',
                                      from_param=FROM_DATE,
                                      to=TO_DATE,
                                      language='en')

trump_news = newsapi.get_everything(q='Trump OR Donald Trump OR Donald J Trump OR President Trump',
                                      from_param=FROM_DATE,
                                      to=TO_DATE,
                                      language='en')

repub_news = newsapi.get_everything(q='Republican OR Republicans OR GOP OR MAGA',
                                      from_param=FROM_DATE,
                                      to=TO_DATE,
                                      language='en')


In [3]:
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup

# Note: maybe we want to induce noise through transformations? 

class PreprocessData():
    def __init__(self, topic, data, transforms=None):
        self.topic = topic
        self.data = pd.DataFrame(data) 
        self.transforms = transforms

    def process_data(self):
        self.data['source'] = self.data['source'].apply(lambda x: x['name'])
        self.data['topic'] = self.topic
        self.data['content'] = self.data['url'].apply(self.get_content).str.replace('\n', ' ', regex=False)
        self.data['content'] = self.data['content'].apply(self.combine_longest_parts)
        self.data['description'] = self.data['description'].fillna('NO TEXT')
        self.data['processed_text'] = self.data.apply(
            lambda x: self.topic + ": " + (x['content'] if "EXCEPTION" not in x['content'] else x['description']), axis=1)
        return self.data[['source', 'author', 'title', 'processed_text']]
    
    def get_content(self, url):
        try:
            page = urlopen(url)
            html = page.read().decode("utf-8")
            soup = BeautifulSoup(html, "html.parser")
            element = soup.find('body')
            return element.get_text(' | ', strip=True) if element else soup.get_text(' | ', strip=True)
        except Exception as e:
            return f"EXCEPTION: {str(e)}"
        
    def combine_longest_parts(self, text):
        parts = text.split('|')
        sorted_parts = sorted(parts, key=len, reverse=True)
        longest_parts = sorted_parts[:3]
        combined_text = '|'.join(longest_parts)
        return combined_text
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        content = self.get_content(row['url'])
        row['content'] = content.replace('\n', ' ')
        if self.transforms:
            row = self.transforms(row)
        return row


In [4]:
biden_df = PreprocessData(data=biden_news['articles'], topic='Biden').process_data() 
trump_df = PreprocessData(data=trump_news['articles'], topic='Trump').process_data() 
dem_df = PreprocessData(data=dems_news['articles'], topic='Democratic Party').process_data() 
rep_df = PreprocessData(data=repub_news['articles'], topic='Republican Party').process_data() 

In [5]:
biden_df.head(5)

Unnamed: 0,source,author,title,processed_text
0,Boston Herald,Boston Herald editorial staff,Letters to the editor,"Biden: I have been canvassing lately, talking..."
1,Dianeravitch.net,dianeravitch,Jonathan V. Last: Why Is a Good Economy Bad fo...,Biden: God what took so long. Well better lat...
2,Marketscreener.com,,Biden appears in live interview with Howard Stern,Biden: (marketscreener.com) President Joe Bide...
3,Investing.com,Reuters,Biden appears in live interview with Howard Stern,Biden: Biden appears in live interview with Ho...
4,Wonkette.com,Evan Hurst,All Trump’s Crime Friends Just Got Indicted Fo...,Biden: Go to prison and go to hell.


In [6]:
class GPTLayerOneProcess():
    def __init__(self, data, transforms=None):
        self.data = pd.DataFrame(data) 
        self.transforms = transforms
    
    def gpt_relatedness(self, description):
        client = OpenAI(api_key=OPEN_API_KEY)
        
        try:
            completion = client.chat.completions.create(
                model=MODEL,
                temperature=0,
                max_tokens=20,
                top_p=1,
                frequency_penalty=0.0,
                presence_penalty=0.0,
                messages=[
                    {"role": "system", "content": "You are a helpful research assistant."},
                    {"role": "user", "content": "Help me determine if the text provided is relevant to the topic, the topic can be found in the text provided before the first colon, for example: 'Biden: The President of the USA is at home today.' The topic in this example is Biden."},
                    {"role": "assistant", "content": "Sure, I'd be happy to!"},
                    {"role": "user", "content": "Biden: Yesterday, Japanese Prime Minister of Japan Shinzo Abe met with President Biden. Abe requested support for the ongoing Asia pacific conflict."},
                    {"role": "assistant", "content": "No, the text is not mainly about Biden, it is about Prime Minister Shinzo Abe asking for support from Biden."},
                    {"role": "user", "content": "Trump: Two days ago, President Donald J. Trump farted in court, according to Maggie Haberman. He is expected to fart again."},
                    {"role": "assistant", "content": "Yes, the text is mainly about Trump, it is about his farting during his trial."},
                    {"role": "user", "content": description}, 
                    {"role": "assistant", "content": "Analyzing the provided text."},
                ],
            )

            rate = completion.choices[0].message.content.strip().split('\n')[0]
            return rate
        except Exception as e:
            print(e)
            return None
        
    def process_data(self):
        self.data['relatedness'] = self.data['processed_text'].apply(self.gpt_relatedness)
        return self.data

In [7]:
df = pd.concat([biden_df, dem_df, trump_df, rep_df])
df.head(5)

Unnamed: 0,source,author,title,processed_text
0,Boston Herald,Boston Herald editorial staff,Letters to the editor,"Biden: I have been canvassing lately, talking..."
1,Dianeravitch.net,dianeravitch,Jonathan V. Last: Why Is a Good Economy Bad fo...,Biden: God what took so long. Well better lat...
2,Marketscreener.com,,Biden appears in live interview with Howard Stern,Biden: (marketscreener.com) President Joe Bide...
3,Investing.com,Reuters,Biden appears in live interview with Howard Stern,Biden: Biden appears in live interview with Ho...
4,Wonkette.com,Evan Hurst,All Trump’s Crime Friends Just Got Indicted Fo...,Biden: Go to prison and go to hell.


In [8]:
gpt_df_layer1 = GPTLayerOneProcess(data=df).process_data()

In [9]:
gpt_df_layer1.head(10)

Unnamed: 0,source,author,title,processed_text,relatedness
0,Boston Herald,Boston Herald editorial staff,Letters to the editor,"Biden: I have been canvassing lately, talking...","Yes, the text is relevant to the topic ""Biden...."
1,Dianeravitch.net,dianeravitch,Jonathan V. Last: Why Is a Good Economy Bad fo...,Biden: God what took so long. Well better lat...,"Yes, the text is relevant to the topic ""Biden...."
2,Marketscreener.com,,Biden appears in live interview with Howard Stern,Biden: (marketscreener.com) President Joe Bide...,"Yes, the text is relevant to the topic ""Biden""..."
3,Investing.com,Reuters,Biden appears in live interview with Howard Stern,Biden: Biden appears in live interview with Ho...,"Yes, the text is relevant to the topic ""Biden,..."
4,Wonkette.com,Evan Hurst,All Trump’s Crime Friends Just Got Indicted Fo...,Biden: Go to prison and go to hell.,"No, the text is not relevant to the topic ""Bid..."
5,Americanthinker.com,,Science fiction fans should abandon wokester '...,Biden: Star Wars actor Mark Hamill recently ap...,"Yes, the text is relevant to the topic ""Biden""..."
6,Americanthinker.com,,Joe Biden flubs J6 date at a fundraiser becaus...,"Biden: Just four days ago, Hollywood has-been ...","Yes, the text is relevant to the topic ""Biden,..."
7,Americanthinker.com,,"When it comes to Biden worship, Mark Hamill ou...","Biden: Many on the left, including countless m...","Yes, the text is relevant to the topic ""Biden""..."
8,Freerepublic.com,https://www.thegatewaypundit.com,Biden’s DOJ Sentence Aimee Harris to Prison fo...,Biden: As previously reported by The Gateway ...,"No, the text is not mainly about Biden; it pri..."
9,Muslimmatters.org,Ibrahim Moiz,Israel Seeks Escalation For Latitude – The Reg...,"Biden: , instead preferring to target the Dae...","No, the text is not mainly about Biden. It pri..."


In [10]:
review = gpt_df_layer1[gpt_df_layer1.relatedness.str.contains('No')]

for idx, row in review.iterrows():
    print(f"Index: {idx}, Processed Text: {row['relatedness']}")

remove_idx = review.index.values
gpt_df_layer1_reviewed = gpt_df_layer1[~gpt_df_layer1.index.isin(remove_idx)]
gpt_df_layer1_reviewed.head(5)

Index: 4, Processed Text: No, the text is not relevant to the topic "Biden" as it does not provide any
Index: 8, Processed Text: No, the text is not mainly about Biden; it primarily discusses an incident involving his daughter, Ashley
Index: 9, Processed Text: No, the text is not mainly about Biden. It primarily discusses the geopolitical dynamics involving Iran, the
Index: 11, Processed Text: No, the text is not mainly about Biden. It discusses various topics including the social network Bluesky
Index: 0, Processed Text: No, the text is not mainly about the Democratic Party. It primarily discusses the late Associate Justice Ruth
Index: 3, Processed Text: No, the text is not mainly about the Democratic Party; it seems to be about a specific individual
Index: 4, Processed Text: No, the text is not mainly about the Democratic Party. It focuses more on individual actions and statements
Index: 5, Processed Text: No, the text is not mainly about the Democratic Party. It primarily discusses 

Unnamed: 0,source,author,title,processed_text,relatedness
1,Dianeravitch.net,dianeravitch,Jonathan V. Last: Why Is a Good Economy Bad fo...,Biden: God what took so long. Well better lat...,"Yes, the text is relevant to the topic ""Biden...."
1,Business Insider,Bryan Metzger,Democrats' dilemma: Which hard-right Republica...,"Democratic Party: ""I'm a progressive Democrat...","Yes, the text is relevant to the topic ""Democr..."
12,The Verge,Gaby Del Valle,"House votes to reauthorize FISA, without the w...","Democratic Party: In a statement, Sen. Ron Wy...","Yes, the text is relevant to the topic ""Democr..."
27,NPR,Sarah McCammon,"As abortion looks like a key issue in 2024, vo...",Democratic Party: Most of that partisan gap s...,"Yes, the text is relevant to the topic ""Democr..."
57,Yahoo Entertainment,,"Mitt Romney praises Kyrsten Sinema, explains w...",Democratic Party: NO TEXT,Since there is no text provided after the topi...


In [69]:
MODEL = 'gpt-4-turbo'

class GPTLayerTwoProcess():
    def __init__(self, data, transforms=None):
        self.data = pd.DataFrame(data)
        self.transforms = transforms
    
    def gpt_likability(self, description):
        client = OpenAI(api_key=OPEN_API_KEY)
        
        try:
            completion = client.chat.completions.create(
                model=MODEL,
                temperature=0,
                max_tokens=50,
                top_p=1,
                frequency_penalty=0.0,
                presence_penalty=0.0,
                messages=[
                    {"role": "system", "content": "You are a sophisticated sentiment analysis tool."},
                    {"role": "user", "content": "Provide a sentiment score between 0 and 1 for the text provided in regards to how favorable the text is towards President Biden's election campaign, and justify your score with any criteria you used where 0 is extremely negative and 1 is extremely positive."},
                    {"role": "user", "content": "Biden: Today, President Biden signed a new bill to elimante all student loans."},
                    {"role": "assistant", "content": "Score: 0.92, Justification: this text mentions a major policy win for President Biden."},
                    {"role": "user", "content": "Trump: Two days ago, President Donald J. Trump farted in court, according to Maggie Haberman. He is expected to fart again."},
                    {"role": "assistant", "content": "Score: 0.67, Justification: while this is not about President Biden, it is about his opponent so negative news of Trump is good for Biden."},
                    {"role": "user", "content": "Democratic Party: The Democratic Party is splintering in its support for Palestinians, upsetting many young votes."},
                    {"role": "assistant", "content": "Score: 0.41, Justification: while this is not about President Biden, it is about his political party so negative news of the party is bad for Biden."},
                    {"role": "user", "content": "Republican Party: Marjorie Taylor Greene threatens to remove Speaker Johnson."},
                    {"role": "assistant", "content": "Score: 0.52, Justification: while this is not about President Biden, it is about his rival political party so negative news of the Republican Party is good for Biden."},
                    {"role": "assistant", "content": "Sure, I'd be happy to!"},
                    {"role": "user", "content": description},
                    {"role": "assistant", "content": "Analyzing the sentiment of the text."},
                ],
            )
            
            response = completion.choices[0].message.content.strip()
            print(response)
            # score_start = response.find("Score: ") + len("Score: ")
            # score_end = response.find(",", score_start)
            # score = response[score_start:score_end]
            # justification = response[score_end + 1:].strip()

            return response
        
        except Exception as e:
            print(f"Error: {e}")
            return (None, "Error parsing response")
        
    def process_data(self):
        # score, justification = self.data['processed_text'].apply(self.gpt_likability)
        # self.data['likability_score'] = score
        # self.data['score_justification'] = justification
        self.data['gpt_layer_2_response'] = self.data['processed_text'].apply(self.gpt_likability)
        return self.data

    

In [12]:
gpt_df_layer2_notreviewed = GPTLayerTwoProcess(data=gpt_df_layer1).process_data()
gpt_df_layer2_reviewed = GPTLayerTwoProcess(data=gpt_df_layer1_reviewed).process_data()

Score: 0.55

Justification: The text presents a mixed sentiment towards President Biden's election campaign. The initial part acknowledges the complexity of issues and the challenges in public understanding, which could be seen as a neutral to slightly positive acknowledgment of
Score: 0.85

Justification: The text is largely favorable towards President Biden, highlighting his administration's economic achievements, such as low unemployment rates and handling of inflation, in a positive light compared to historical figures and past administrations. It critic
Score: 0.85

Justification: The text presents President Biden in a positive, humanizing light by focusing on his personal challenges and resilience, specifically his lifelong struggle with stuttering. Sharing such personal stories, especially in a sympathetic context like
Score: 0.65, Justification: The text indicates President Biden's engagement in a live interview with a well-known media personality, which could be seen as a posi

In [13]:
def post_layer2_procesing(df):
    idx = df.gpt_layer_2_response.str.find("Justification: ")
    df['score'] = df.gpt_layer_2_response.apply(lambda x: x[x.find("Score: ")+7: x.find("Score: ")+11])
    df['justification'] = df.gpt_layer_2_response.apply(lambda x: x[x.find("Justification: ")+15: -1])
    df = df[df.score != 'ms t']
    df['score'] = df.score.apply(lambda x: float(x.strip().replace(',', '')))

    return df

gpt_df_layer2_reviewed_processed = post_layer2_procesing(gpt_df_layer2_reviewed)
gpt_df_layer2_notreviewed_processed = post_layer2_procesing(gpt_df_layer2_notreviewed)

In [15]:
final_reviewed_score, final_nonreviewed_score = np.mean(gpt_df_layer2_reviewed_processed.score), np.mean(gpt_df_layer2_notreviewed_processed.score)
final_reviewed_score, final_nonreviewed_score

(0.5, 0.49182692307692305)

## FINAL PREDICTION

In [63]:
# candidate sentiment analysis
alpha1 = 0.20

# unemployment rate trend extrapolation (binary 1/0 (positive/negative))
alpha2 = 0.03

# Allan Lichtman forecast (binary 1/0 (win/lose))
alpha3 = 0.18

# ABC Experts forecast (binary 1/0 (win/lose))
alpha4 = 0.13

# Split Ticket forecast (binary 1/0 (win/lose))
alpha5 = 0.09

# interest rate trend extrapolation 
alpha6 = 0.03

# stock market trend extrapolation
alpha7 = 0.03

# incumbency factor
alpha8 = 0.05

# real GDP trend extrapolation 
alpha9 = 0.03

# voter assessment of economy
alpha10 = 0.08

# inflation rate
alpha11 = 0.05

# polling forecast
alpha12 = 0.04

# approval rating forecast
alpha13 = 0.06

alphas = [alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11, alpha12, alpha13]
sum_of_alphas = sum(alphas)

if abs(sum_of_alphas - 1) < 0.0001:  # Allows a small margin of error
    print("The sum of all alphas is approximately 1.")
else:
    print("The sum of all alphas is not 1, it is", sum_of_alphas)


The sum of all alphas is approximately 1.


In [73]:
print("Final forecast of President Joe Biden winning the 2024 Presidential Election as of May 10, 2024")
(alpha1 * 0.5) + (alpha2 * 0.489909) + (alpha3 * 0.8) + (alpha4 * 0.54) + (alpha5 * 0.51) + (alpha6 * 1) + (alpha7 * 1) + (alpha8 * 1) + (alpha9 * 0.4725387) + (alpha10 * 0.462881) + (alpha11 * 0.4615465) + (alpha12 * 0.37543774) + (alpha13 * .41305233)

Final forecast of President Joe Biden winning the 2024 Presidential Election as of May 10, 2024


0.5988818854000001