### Importing Libraries

In [1]:
!pip install tweet-preprocessor

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Dense,Flatten,Embedding,Bidirectional,LSTM,Dropout,BatchNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import nltk
from nltk.tokenize import sent_tokenize
from nltk.corpus import words
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

import re
import os
import preprocessor as p
from textblob import TextBlob

2023-05-04 18:52:35.508596: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-04 18:52:35.509966: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-04 18:52:35.536803: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-04 18:52:35.537374: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
nltk.download("stopwords")
nltk.download("wordnet")

[nltk_data] Downloading package stopwords to /home/enzo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/enzo/nltk_data...


True

### Data Preprocessing

In [4]:
df = pd.read_csv("Biden_No_Retweet_Full_Text.csv")
df2 = pd.read_csv("Trump_No_Retweet_Full_Text.csv")

# Dataset is now stored in a Pandas Dataframe


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15098 entries, 0 to 15097
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       15098 non-null  object
 1   Tweet_id   15098 non-null  int64 
 2   Verified   15098 non-null  bool  
 3   Tweet      15098 non-null  object
 4   User       15098 non-null  object
 5   Location   11032 non-null  object
 6   Source     15098 non-null  object
 7   Likes      15098 non-null  int64 
 8   Followers  15098 non-null  int64 
 9   Following  15098 non-null  int64 
 10  Retweets   15098 non-null  int64 
dtypes: bool(1), int64(5), object(5)
memory usage: 1.2+ MB


In [6]:
df.head(10)

Unnamed: 0,Date,Tweet_id,Verified,Tweet,User,Location,Source,Likes,Followers,Following,Retweets
0,2020-07-21 23:59:43,1285726179907993601,False,@JoeBiden VP Biden I wish you wouldn't debate ...,BballmomEjZay,United States,Twitter for Android,0,54,192,0
1,2020-07-21 23:59:27,1285726110513344520,False,"#GeorgeWill , it’s so sad you wake up everyday...",TalbotMac,"East Aurora, NY",Twitter for iPhone,0,882,1554,0
2,2020-07-21 23:59:24,1285726099239055361,False,Question:\nIf @JoeBiden doesn’t pick a woman o...,bk_middleclass,"Nashville, TN",Twitter for iPhone,0,1294,1213,0
3,2020-07-21 23:58:47,1285725945849151490,False,Right? 🤣🤣🤣 #Biden formally implicated in #Ukra...,MMorganBlair,United States,Twitter for iPhone,1,2476,3334,1
4,2020-07-21 23:58:02,1285725754961965056,False,These idiots let Trump do anything he wants. W...,realfacade1,Purgatory,Twitter for Android,0,2764,3799,0
5,2020-07-21 23:57:22,1285725587089362946,False,How could #Biden be +4 in #Ohio and +3 in #Pen...,bradpomerance,Los Angeles,Twitter for iPhone,0,2722,1406,0
6,2020-07-21 23:56:58,1285725486749032449,False,@mamendoza480 There was never teaching any rel...,UsecommonSentz,United States,Twitter Web App,0,403,781,0
7,2020-07-21 23:56:55,1285725476779102209,False,@chick_right @MSNBC @JoeNBC In your dreams @ch...,amauney28,"Maryland, USA",Twitter for iPhone,0,78,958,0
8,2020-07-21 23:56:19,1285725322399420416,False,Joy is on a one woman mission to expose #45 fo...,JanetThoma,Chicago,Twitter Web App,0,2617,4989,0
9,2020-07-21 23:56:05,1285725266065719296,False,I slept better with OBAMA and BIDEN \nnow JOE ...,RealMiddleClass,"Los Angeles, CA",Twitter Web App,1,375,815,0


In [7]:
df.drop_duplicates(subset = "Tweet", keep = "first",inplace = True,ignore_index = True) #Removing duplicates Biden

#### Cleaning Biden

In [8]:
tweet_list = []
for x in df["Tweet"]:
    tweet_list.append(p.clean(x)) #Cleaning tweets (Removes URLs, Hashtags, Mentions, Reserved Words (RT,FAV) Emojis, Smileys) and appending to a list
    
tweet_list = [re.sub('[^a-zA-Z]', ' ',i) for i in tweet_list] #Removing punctuatuons

In [9]:
print("----Before----")
print(df["Tweet"][3])

print("----After----")
print(tweet_list[3])

----Before----
Right? 🤣🤣🤣 #Biden formally implicated in #Ukraine scandal @ChanelRion reported this afternoon 
 #Burisma https://t.co/B13j0IR6er
----After----
Right  formally implicated in scandal reported this afternoon


#### Removing Stopwords

In [10]:
clean_tweets = []
for tweet in tweet_list: 
        for word in set(stopwords.words("english")):
            token = " "+word+" "
            tweet = tweet.replace(token, " ")
        clean_tweets.append(tweet)

In [11]:
clean_tweets[:5]

['VP Biden I wish debate Trump unless  Debate Trump shows taxesDebate Tells Putin stop killing soldiersDebate Put sanctions Putin election hacks amp  I wish publicly challenge   Rice',
 '  sad wake everyday forgetting YOU George Will    like Swampers badly need disinfect  Youre new breed SenileElites  Same man plan vote for',
 'Question If doesnt pick woman color  would black community hold ',
 'Right  formally implicated scandal reported afternoon',
 'These idiots let Trump anything wants  When Biden gets elected  I want hear word  I hope come like ']

#### Stemming

In [12]:
stemmed = []
for word in clean_tweets:
    stemmed.append(PorterStemmer().stem(word))

In [13]:
stemmed[:5]

['vp biden i wish debate trump unless  debate trump shows taxesdebate tells putin stop killing soldiersdebate put sanctions putin election hacks amp  i wish publicly challenge   ric',
 '  sad wake everyday forgetting you george will    like swampers badly need disinfect  youre new breed senileelites  same man plan vote for',
 'question if doesnt pick woman color  would black community hold ',
 'right  formally implicated scandal reported afternoon',
 'these idiots let trump anything wants  when biden gets elected  i want hear word  i hope come like ']

In [14]:
df["Processed"] = stemmed

### Tfidf Vectorizer

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

vector = TfidfVectorizer(ngram_range=(1,1), max_features=150).fit(df["Processed"]) 

trans = vector.transform(df["Processed"])  # Transforming the vectorizer

df_vect=pd.DataFrame(trans.toarray(), columns=vector.get_feature_names_out()) # Converting to DataFrame

df_vect.head(10)

Unnamed: 0,all,already,also,america,american,americans,amp,and,another,anyone,...,will,win,wins,work,world,would,years,yes,you,your
0,0.0,0.0,0.0,0.0,0.0,0.0,0.290424,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.387464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.299367,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.516294,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.530189,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.284088,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Sentiment Analysis using TextBlob

In [16]:
pol = []

for tweet in df["Processed"]:
    blob = TextBlob(tweet)
    pol.append(blob.sentiment.polarity)
df['Polarity']=pol

In [17]:
df['Location'].head(10)

0      United States
1    East Aurora, NY
2      Nashville, TN
3      United States
4         Purgatory 
5        Los Angeles
6      United States
7      Maryland, USA
8            Chicago
9    Los Angeles, CA
Name: Location, dtype: object

#### Segregating into Positive, Negative and Neutral

In [18]:
import pandas as pd

# Create a mapping of state names and abbreviations to their index in the 'states' list
state_mapping = {
    'Alabama': 0, 'AL': 0,
    'Alaska': 1, 'AK': 1,
    'Arizona': 2, 'AZ': 2,
    'Arkansas': 3, 'AR': 3,
    'California': 4, 'CA': 4,
    'Colorado': 5, 'CO': 5,
    'Connecticut': 6, 'CT': 6,
    'Delaware': 7, 'DE': 7,
    'Florida': 8, 'FL': 8,
    'Georgia': 9, 'GA': 9,
    'Hawaii': 10, 'HI': 10,
    'Idaho': 11, 'ID': 11,
    'Illinois': 12, 'IL': 12,
    'Indiana': 13, 'IN': 13,
    'Iowa': 14, 'IA': 14,
    'Kansas': 15, 'KS': 15,
    'Kentucky': 16, 'KY': 16,
    'Louisiana': 17, 'LA': 17,
    'Maine': 18, 'ME': 18,
    'Maryland': 19, 'MD': 19,
    'Massachusetts': 20, 'MA': 20,
    'Michigan': 21, 'MI': 21,
    'Minnesota': 22, 'MN': 22,
    'Mississippi': 23, 'MS': 23,
    'Missouri': 24, 'MO': 24,
    'Montana': 25, 'MT': 25,
    'Nebraska': 26, 'NE': 26,
    'Nevada': 27, 'NV': 27,
    'New Hampshire': 28, 'NH': 28,
    'New Jersey': 29, 'NJ': 29,
    'New Mexico': 30, 'NM': 30,
    'New York': 31, 'NY': 31,
    'North Carolina': 32, 'NC': 32,
    'North Dakota': 33, 'ND': 33,
    'Ohio': 34, 'OH': 34,
    'Oklahoma': 35, 'OK': 35,
    'Oregon': 36, 'OR': 36,
    'Pennsylvania': 37, 'PA': 37,
    'Rhode Island': 38, 'RI': 38,
    'South Carolina': 39, 'SC': 39,
    'South Dakota': 40, 'SD': 40,
    'Tennessee': 41, 'TN': 41,
    'Texas': 42, 'TX': 42,
    'Utah': 43, 'UT': 43,
    'Vermont': 44, 'VT': 44,
    'Virginia': 45, 'VA': 45,
    'Washington': 46, 'WA': 46,
    'West Virginia': 47, 'WV': 47,
    'Wisconsin': 48, 'WI': 48,
    'Wyoming': 49, 'WY': 49,
}


# Initialize the 'states' list with zeros
states = [0] * 50

# Define a function to categorize the location
def categorize_location(location, state_mapping):
    if pd.isna(location):
        return None
    for key, index in state_mapping.items():
        pattern = r'\b(?:{})\b'.format(re.escape(key))
        if re.search(pattern, location, re.IGNORECASE):
            return index
    return None

# Apply the categorize_location function to the 'Location' column and store the results in a new column
df['State_Index'] = df['Location'].apply(categorize_location, args=(state_mapping,))

# Update the 'states' list with the count of tweets from each state
for index in df['State_Index'].dropna().astype(int):
    states[index] += 1

Unnamed: 0,Date,Tweet_id,Verified,Tweet,User,Location,Source,Likes,Followers,Following,Retweets,Processed,Polarity,State_Index
0,2020-07-21 23:59:43,1285726179907993601,False,@JoeBiden VP Biden I wish you wouldn't debate ...,BballmomEjZay,United States,Twitter for Android,0,54,192,0,vp biden i wish debate trump unless debate tr...,0.0,
1,2020-07-21 23:59:27,1285726110513344520,False,"#GeorgeWill , it’s so sad you wake up everyday...",TalbotMac,"East Aurora, NY",Twitter for iPhone,0,882,1554,0,sad wake everyday forgetting you george will...,-0.252727,31.0
2,2020-07-21 23:59:24,1285726099239055361,False,Question:\nIf @JoeBiden doesn’t pick a woman o...,bk_middleclass,"Nashville, TN",Twitter for iPhone,0,1294,1213,0,question if doesnt pick woman color would bla...,-0.166667,41.0
3,2020-07-21 23:58:47,1285725945849151490,False,Right? 🤣🤣🤣 #Biden formally implicated in #Ukra...,MMorganBlair,United States,Twitter for iPhone,1,2476,3334,1,right formally implicated scandal reported af...,-0.057143,
4,2020-07-21 23:58:02,1285725754961965056,False,These idiots let Trump do anything he wants. W...,realfacade1,Purgatory,Twitter for Android,0,2764,3799,0,these idiots let trump anything wants when bi...,-0.3,


In [19]:
# Initialize dictionaries to store sentiment counts, total polarity, and tweet counts for each state
sentiment_counts = {index: {'positive': 0, 'negative': 0, 'neutral': 0} for index in range(50)}
total_polarity = {index: 0 for index in range(50)}
tweet_counts = {index: 0 for index in range(50)}

# Iterate through the DataFrame rows and update sentiment counts, total polarity, and tweet counts for each state
for _, row in df.iterrows():
    if pd.isna(row['State_Index']):
        continue
    state_index = int(row['State_Index'])
    polarity = row['Polarity']
    
    if polarity > 0:
        sentiment_counts[state_index]['positive'] += 1
    elif polarity < 0:
        sentiment_counts[state_index]['negative'] += 1
    else:
        sentiment_counts[state_index]['neutral'] += 1

    total_polarity[state_index] += polarity
    tweet_counts[state_index] += 1

# Calculate the mean polarity for each state
mean_polarity = {index: total_polarity[index] / tweet_counts[index] if tweet_counts[index] > 0 else 0 for index in range(50)}

# Define a function to assign sentiment based on polarity
def assign_sentiment(polarity):
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

# Assign sentiment to each tweet using the assign_sentiment function
df['Sentiment'] = df['Polarity'].apply(assign_sentiment)

# Convert the mean_polarity and sentiment_counts dictionaries to have state names as keys instead of indices
state_names = [
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida',
    'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
    'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
    'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
    'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
    'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'
]

mean_polarity_by_state = {state_names[index]: mean_polarity[index] for index in range(50)}
sentiment_counts_by_state = {state_names[index]: sentiment_counts[index] for index in range(50)}


In [20]:
mean_polarity_by_state

{'Alabama': -0.008569594757094746,
 'Alaska': 0.007282913165266107,
 'Arizona': 0.05137708982454205,
 'Arkansas': -0.02490842490842491,
 'California': 0.06572405363909169,
 'Colorado': 0.04875727792878956,
 'Connecticut': 0.035447845804988665,
 'Delaware': -0.03474258275394639,
 'Florida': 0.05395323713605328,
 'Georgia': 0.09011241238919811,
 'Hawaii': 0.035955086580086566,
 'Idaho': 0.01410984848484848,
 'Illinois': 0.024853509384025807,
 'Indiana': 0.037442149167509375,
 'Iowa': 0.14856702586541298,
 'Kansas': 0.002881708238851096,
 'Kentucky': 0.0205764163372859,
 'Louisiana': 0.05482055042910307,
 'Maine': 0.16753826878826877,
 'Maryland': 0.045851222782192926,
 'Massachusetts': 0.061930990340505296,
 'Michigan': 0.05762365362811793,
 'Minnesota': 0.01831525514715171,
 'Mississippi': -0.019449206349206327,
 'Missouri': 0.09842136579149569,
 'Montana': 0.020436507936507937,
 'Nebraska': 0.0517625231910946,
 'Nevada': 0.030566836915521124,
 'New Hampshire': -0.05762085137085137,
 'N

In [21]:
sentiment_counts

{0: {'positive': 21, 'negative': 17, 'neutral': 22},
 1: {'positive': 5, 'negative': 3, 'neutral': 9},
 2: {'positive': 65, 'negative': 41, 'neutral': 51},
 3: {'positive': 4, 'negative': 2, 'neutral': 7},
 4: {'positive': 300, 'negative': 168, 'neutral': 263},
 5: {'positive': 58, 'negative': 30, 'neutral': 41},
 6: {'positive': 9, 'negative': 9, 'neutral': 17},
 7: {'positive': 11, 'negative': 11, 'neutral': 11},
 8: {'positive': 189, 'negative': 105, 'neutral': 194},
 9: {'positive': 52, 'negative': 18, 'neutral': 42},
 10: {'positive': 9, 'negative': 6, 'neutral': 7},
 11: {'positive': 4, 'negative': 1, 'neutral': 7},
 12: {'positive': 84, 'negative': 72, 'neutral': 57},
 13: {'positive': 81, 'negative': 72, 'neutral': 80},
 14: {'positive': 22, 'negative': 4, 'neutral': 5},
 15: {'positive': 8, 'negative': 6, 'neutral': 7},
 16: {'positive': 13, 'negative': 12, 'neutral': 21},
 17: {'positive': 17, 'negative': 8, 'neutral': 13},
 18: {'positive': 18, 'negative': 2, 'neutral': 17},

In [22]:
positive ,negative, neutral = 0,0,0

for polar in df["Polarity"]:
    if polar > 0:
        positive += 1
    elif polar < 0:
        negative += 1
    else:
        neutral += 1
        
pol_list = []

for x in pol:
    if x>0:
        pol_list.append("Positive")
    elif x<0:
        pol_list.append("Negative")
    else:
        pol_list.append("Neutral")
        
df["Sentiment"] = pol_list

In [23]:
print("Positive :",positive)
print("Negative :",negative)
print("Neutral :",neutral)

Positive : 6029
Negative : 3608
Neutral : 5409


In [24]:
sentences = list(df.Processed)
labels = df.Sentiment.tolist()

### Same thing but for Trump

In [25]:
df2.drop_duplicates(subset = "Tweet", keep = "first",inplace = True,ignore_index = True) #Removing duplicates Biden

In [26]:
tweet_list = []
for x in df2["Tweet"]:
    tweet_list.append(p.clean(x)) #Cleaning tweets (Removes URLs, Hashtags, Mentions, Reserved Words (RT,FAV) Emojis, Smileys) and appending to a list
    
tweet_list = [re.sub('[^a-zA-Z]', ' ',i) for i in tweet_list] #Removing punctuatuons

In [27]:
clean_tweets = []
for tweet in tweet_list: 
        for word in set(stopwords.words("english")):
            token = " "+word+" "
            tweet = tweet.replace(token, " ")
        clean_tweets.append(tweet)

In [28]:
stemmed = []
for word in clean_tweets:
    stemmed.append(PorterStemmer().stem(word))

In [30]:
df2["Processed"] = stemmed

In [31]:
vector2 = TfidfVectorizer(ngram_range=(1,1), max_features=150).fit(df2["Processed"]) 

trans2 = vector2.transform(df2["Processed"])  # Transforming the vectorizer

df2_vect=pd.DataFrame(trans2.toarray(), columns=vector2.get_feature_names_out()) # Converting to DataFrame

In [53]:
pol = []

for tweet in df2["Processed"]:
    blob = TextBlob(tweet)
    pol.append(blob.sentiment.polarity)
df2['Trump_Polarity']=pol

#### Segregating into Positive, Negative and Neutral

In [52]:
# Apply the categorize_location function to the 'Location' column and store the results in a new column
df2['State_Index'] = df2['Location'].apply(categorize_location, args=(state_mapping,))

biden_states = states
trump_states = [0]*50

# Update the 'states' list with the count of tweets from each state
for index in df2['State_Index'].dropna().astype(int):
    trump_states[index] += 1

In [54]:
# Initialize dictionaries to store sentiment counts, total polarity, and tweet counts for each state
trump_sentiment_counts = {index: {'trump positive': 0, 'trump negative': 0, 'trump neutral': 0} for index in range(50)}
trump_total_polarity = {index: 0 for index in range(50)}
trump_tweet_counts = {index: 0 for index in range(50)}

# Iterate through the DataFrame rows and update sentiment counts, total polarity, and tweet counts for each state
for _, row in df2.iterrows():
    if pd.isna(row['State_Index']):
        continue
    state_index = int(row['State_Index'])
    polarity = row['Trump_Polarity']
    
    if polarity > 0:
        trump_sentiment_counts[state_index]['trump positive'] += 1
    elif polarity < 0:
        trump_sentiment_counts[state_index]['trump negative'] += 1
    else:
        trump_sentiment_counts[state_index]['trump neutral'] += 1

    trump_total_polarity[state_index] += polarity
    trump_tweet_counts[state_index] += 1

# Calculate the mean polarity for each state
trump_mean_polarity = {index: trump_total_polarity[index] / trump_tweet_counts[index] if trump_tweet_counts[index] > 0 else 0 for index in range(50)}

# Assign sentiment to each tweet using the assign_sentiment function
df2['Sentiment'] = df2['Polarity'].apply(assign_sentiment)

# Convert the mean_polarity and sentiment_counts dictionaries to have state names as keys instead of indices
trump_mean_polarity_by_state = {state_names[index]: trump_mean_polarity[index] for index in range(50)}
trump_sentiment_counts_by_state = {state_names[index]: trump_sentiment_counts[index] for index in range(50)}


In [55]:
positive ,negative, neutral = 0,0,0

for polar in df2["Polarity"]:
    if polar > 0:
        positive += 1
    elif polar < 0:
        negative += 1
    else:
        neutral += 1
        
pol_list = []

for x in pol:
    if x>0:
        pol_list.append("Trump Positive")
    elif x<0:
        pol_list.append("Trump Negative")
    else:
        pol_list.append("Trump Neutral")
        
df2["Sentiment"] = pol_list

In [74]:
biden_sentiment_count = pd.DataFrame.from_dict(sentiment_counts_by_state, orient='index').rename(columns={
    "positive":"biden positive",
    "negative":"biden negative",
    "neutral":"biden neutral"
})
biden_mean_sentminent = pd.DataFrame.from_dict(mean_polarity_by_state, orient='index').rename(columns={0:"biden mean"})
trump_sentiment_count = pd.DataFrame.from_dict(trump_sentiment_counts_by_state, orient='index')
trump_mean_sentminent = pd.DataFrame.from_dict(trump_mean_polarity_by_state, orient='index').rename(columns={0:"trump mean"})

In [80]:
output = biden_sentiment_count.join(biden_mean_sentminent).join(trump_sentiment_count).join(trump_mean_sentminent)

output.head(5)

Unnamed: 0,biden positive,biden negative,biden neutral,Biden mean,trump positive,trump negative,trump neutral,trump mean
Alabama,21,17,22,-0.00857,9,2,4,0.155742
Alaska,5,3,9,0.007283,1,1,5,0.053571
Arizona,65,41,51,0.051377,31,35,35,0.006855
Arkansas,4,2,7,-0.024908,3,4,2,-0.067542
California,300,168,263,0.065724,362,253,367,0.035545


In [81]:
output.to_csv('baseline_data.csv')