In [1]:
import pandas as pd
import json
import csv 
import statistics 
from textblob import TextBlob
from collections import Counter 
import operator 
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
# Getting CSV file
file_name = 'Filtered_Biden_CSV.csv'

# Read the CSV file into a DataFrame
data = pd.read_csv(file_name)

# Display the first few rows of the DataFrame
data

Unnamed: 0,Screen Name,Text,Language,Tweet Type
0,DannyOlive03,Fun fact: you’ll run into more trump supporter...,en,Tweet
1,most_sarcastic,@CountyHatter86 ‘That's From Then-Vice Preside...,en,Tweet
2,btpftpotp,@WalshFreedom Joe Biden? Where’d he go?,en,Reply
3,newstell2,Biden and Harris roasted for ‘going to lunch’ ...,en,Tweet
4,LucindaRhysEvan,"And in #America, #TuckerCarlson receives a sta...",en,Tweet
...,...,...,...,...
560,ryman_eugene,@ColumbiaBugle Louisiana senator bill Cassidy ...,en,Reply
561,JosephRBr4ndon,@ABlackPolitical @TeamPelosi We've already had...,en,Reply
562,PatrioticOn22,Joe Biden instructs DeSantis to return to his ...,en,Tweet
563,ConservativeO17,Bernie Sanders Criticizes Biden’s Latest Move\...,en,Tweet


In [3]:
# Creation of a separate variable tweet_texts that contains all tweets
with open('Filtered_Biden_CSV.csv', encoding='utf-8', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    tweet_texts = [row['Text'] for row in reader]

print(tweet_texts)

['Fun fact: you’ll run into more trump supporters than Biden supporters in your life (unless you live in California/New York) but expect us to believe there are 80 million people who voted for Biden. Crazy how that works huh', "@CountyHatter86 ‘That's From Then-Vice President Biden...': GOP Senator Reads Old Biden ... https://t.co/n3MtTyUOm4 via @YouTube", '@WalshFreedom Joe Biden? Where’d he go?', 'Biden and Harris roasted for ‘going to lunch’ car selfie: ‘The cringe\xa0hurts’ https://t.co/ly2DlZr4QX', 'And in #America, #TuckerCarlson receives a standing ovation in Oxford, #Alabama - his first public event since firing by #Fox for political positions.\n\n#Tucker accused #Biden of creating #Ukraine crisis acting on interests of large corporarions in US #MilitaryIndustrialComplex. https://t.co/2z9xocvzZc', 'Joe Biden And His Democrats Swamped by New Wave\nhttps://t.co/eBFZsfkxG0', 'Joe Biden Received an Order from Bill Maher to Fire Leader\nhttps://t.co/1KNvn9MdkZ', '@OccupyDemocrats Th

In [4]:
# Creating a new varaible 'polarities', based off of tweet_texts and running a polarity check on each individual tweet/statement
polarities = []

for text in tweet_texts:
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    polarities.append(polarity)

print(polarities)

[0.0840909090909091, 0.1, 0.0, 0.0, 0.09285714285714286, 0.13636363636363635, 0.0, 0.1, 0.5, 0.0, 0.0, 0.16428571428571428, 0.1, 0.0, -0.3333333333333333, 0.0, -0.125, -0.16666666666666666, 0.25, -0.7999999999999999, -1.0, 0.35, 0.3734375, -0.25, -0.05, 0.0, 0.0, -0.29, -0.2318181818181818, -0.4, 0.25, 0.0, 0.0, 0.0, 0.07777777777777778, 0.0625, 0.0, -0.2916666666666667, 0.0, -0.22999999999999998, -0.05, 0.15416666666666667, 0.0, 0.07499999999999998, 0.25, 0.0, 0.25, 0.16458333333333333, 0.0, -0.5571428571428572, 0.19393939393939394, 0.0, 0.08, -0.325, 0.2808333333333333, -0.6, 0.0, 0.0, -0.375, 0.0, 0.0, -0.1875, 0.042272727272727274, 0.8, 0.0, 0.0, 0.15, 0.0, -0.2, 0.1, 0.0, -0.020000000000000018, 0.2, 0.0, -0.625, 0.48928571428571427, 1.0, -0.3, -0.2125, 0.13333333333333333, -0.25, -0.11111111111111112, 0.24, 0.5, 0.0, -0.2, 0.0, -0.4666666666666666, 0.16666666666666669, -0.1130952380952381, 0.0, -0.15000000000000002, 0.0, 0.0, -0.3333333333333333, 0.0, -0.2, 0.0, 0.0, 0.0, 0.0, 0.1

In [5]:
# Using statistics module in order to compute all values together and receive the overall/Mean Polarity
mean_polarity = statistics.mean(polarities)

print("Average Polarity of all Tweets:", mean_polarity)

Average Polarity of all Tweets: 0.03036579726095766


In [6]:
from collections import Counter
import re
import string

# Define a function to clean and tokenize the text
def tokenize(text):
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    
    # Remove punctuations
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Convert text to lowercase
    text = text.lower()
    
    # Split text into tokens (words)
    tokens = text.split()
    
    return tokens

In [7]:
# Tokenize all tweet texts and create a list of all words
all_words = []
for text in tweet_texts:
    tokens = tokenize(text)
    all_words.extend(tokens)

# Calculate polarity scores for all words
word_polarity = {}
for word in all_words:
    word_blob = TextBlob(word)
    word_polarity[word] = word_blob.sentiment.polarity

# Filter words with non-zero polarity
non_zero_polarity_words = [word for word, polarity in word_polarity.items() if polarity != 0.0]

# Find the top 100 recurring words with non-zero polarity
most_common_non_zero_polarity_words = Counter(non_zero_polarity_words).most_common(100)

# Print the top 100 recurring words with non-zero polarity and their polarity scores
print("Top 100 recurring words with non-zero polarity and their polarity scores:")
for word, count in most_common_non_zero_polarity_words:
    print(f"{word}: {word_polarity[word]}")

Top 100 recurring words with non-zero polarity and their polarity scores:
fun: 0.3
more: 0.5
live: 0.13636363636363635
crazy: -0.6
old: 0.1
first: 0.25
large: 0.21428571428571427
new: 0.13636363636363635
capable: 0.2
most: 0.5
heavy: -0.2
special: 0.35714285714285715
near: 0.1
criminal: -0.4
foreign: -0.125
tense: -0.3333333333333333
stupid: -0.7999999999999999
insane: -1.0
slightly: -0.16666666666666666
very: 0.2
full: 0.35
evil: -1.0
jail: -0.1
failed: -0.5
past: -0.25
scary: -0.5
shit: -0.2
dangerous: -0.6
false: -0.4000000000000001
“criminal: -0.4
many: 0.5
straight: 0.2
social: 0.03333333333333333
‘major: 0.0625
hard: -0.2916666666666667
ill: -0.5
illegally: -0.5
much: 0.2
illegal: -0.5
twisted: -0.5
true: 0.35
expected: -0.1
fine: 0.4166666666666667
distasteful: -0.5
good: 0.7
wins: 0.3
other: -0.125
little: -0.1875
sick: -0.7142857142857143
tired: -0.4
great: 0.8
high: 0.16
worse: -0.4
sure: 0.5
huge: 0.4000000000000001
corrupt: -0.5
worthless: -0.8
useless: -0.5
destroy: -0.2
w

In [8]:
###This code works and was used to create Biden_Word_Polarity.csv file, if done now it just creates a repeat output###

# Tokenize all tweet texts and create a list of all words
all_words = []
for text in tweet_texts:
    tokens = tokenize(text)
    all_words.extend(tokens)

# Calculate polarity scores for all words
word_polarity = {}
for word in all_words:
    word_blob = TextBlob(word)
    word_polarity[word] = word_blob.sentiment.polarity

# Filter words with non-zero polarity
non_zero_polarity_words = [word for word, polarity in word_polarity.items() if polarity != 0.0]

# Find the top 100 recurring words with non-zero polarity
most_common_non_zero_polarity_words = Counter(non_zero_polarity_words).most_common(100)

# Write the results to a CSV file
with open('Biden_Word_Polarity.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Word', 'Polarity'])
    for word, count in most_common_non_zero_polarity_words:
        writer.writerow([word, word_polarity[word]])