# Adidas Sentiment Analysis Project

### In this Jupyter Notebook, I will clean both sets of the Adidas data and merge them together into a single dataframe, begin exploratory data analysis, calculate engagement metrics, and build regression models of the data. The finalized data from this workbook will be uploaded to Tableau for further data exploration and visualization.

In [2]:
#Importing Libraries

!pip install nbconvert[webpdf]
!pip install textblob
!pip install preprocessor

import numpy as np
import os
import pandas as pd
from pandas.io.formats.excel import ExcelFormatter
import tweepy
import re
import string
from textblob import TextBlob
import preprocessor as p
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
set(stopwords.words('english'))
from nltk.tokenize import word_tokenize

from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import logit, probit, ols



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/elisecarlomagno/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/elisecarlomagno/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Importing pre-Kanye data

In [3]:
#Importing tweets
predata = pd.read_csv('adidas_pre.csv')

In [4]:
#Exploratory analysis
predata.head()

Unnamed: 0,id,created_at,source,original_text,clean_text,sentiment,polarity,subjectivity,lang,favorite_count,retweet_count,user,followers,friends,hashtags,place,coordinates,favorites
0,1.58e+18,Tue Oct 25 02:15:46 +0000 2022,"<a href=""https://mobile.twitter.com"" rel=""nofo...",RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,en,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,"LosAngeles, LAMayor, adidas, Antisemitism","eortner, KarenBassLA","Georgia, USA",
1,1.58e+18,Tue Oct 25 02:15:05 +0000 2022,"<a href=""http://twitter.com/download/android"" ...","Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,en,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,"adidas, KanyeWest",adidas,"Los Angeles, CA",
2,1.58e+18,Tue Oct 25 02:14:45 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,en,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,Kanye,"DanielNewman, adidas",,
3,1.58e+18,Tue Oct 25 02:14:39 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,en,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,,Ordainedprophet,Chicago,
4,1.58e+18,Tue Oct 25 02:14:14 +0000 2022,"<a href=""http://twitter.com/download/android"" ...","RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,en,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,adidas,"nancylevine, KatiePhang, adidas, adidas",Florida,


In [5]:
predata.tail()

Unnamed: 0,id,created_at,source,original_text,clean_text,sentiment,polarity,subjectivity,lang,favorite_count,retweet_count,user,followers,friends,hashtags,place,coordinates,favorites
4105,1.58e+18,Sun Oct 23 14:01:30 +0000 2022,"<a href=""http://twitter.com/#!/download/ipad"" ...",RT @nancylevine: I have reached out to @adidas...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.066667,0.3,en,0.0,42.0,"{'id': 25207598, 'id_str': '25207598', 'name':...",8317.0,8442.0,adidas,"nancylevine, adidas","Texas, USA",
4106,1.58e+18,Sun Oct 23 14:01:13 +0000 2022,"<a href=""http://twitter.com/#!/download/ipad"" ...",RT @nancylevine: Neo-Nazis in Los Angeles yest...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.285714,0.535714,en,0.0,414.0,"{'id': 25207598, 'id_str': '25207598', 'name':...",8317.0,8442.0,,"nancylevine, adidas, CNBC","Texas, USA",
4107,1.58e+18,Sun Oct 23 14:00:19 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",@kanyewest brags he can say “anti-Semitic sh*t...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,en,2.0,1.0,"{'id': 1643453666, 'id_str': '1643453666', 'na...",280.0,1568.0,adidas,"kanyewest, adidas","Atlanta, GA",
4108,,,,,,,,,,,,,,,,,,
4109,,,,,,,0.070477,,,,,,,,,,,


In [6]:
#descriptive statistics 
sumstats=predata.describe()
sumstats.round()

Unnamed: 0,id,polarity,subjectivity,favorite_count,retweet_count,followers,friends
count,4108.0,4109.0,4108.0,4108.0,4108.0,4108.0,4108.0
mean,1.58e+18,0.0,0.0,1.0,97.0,7835.0,3187.0
std,107021.0,0.0,0.0,15.0,151.0,54397.0,14240.0
min,1.58e+18,-1.0,0.0,0.0,0.0,0.0,0.0
25%,1.58e+18,0.0,0.0,0.0,0.0,144.0,249.0
50%,1.58e+18,0.0,0.0,0.0,7.0,638.0,996.0
75%,1.58e+18,0.0,1.0,0.0,130.0,2638.0,3472.0
max,1.58e+18,1.0,1.0,506.0,487.0,2107522.0,581668.0


## Cleaning pre-Kanye data: Removing NA values and unusable columns

In [7]:
na_values = predata.isna()
print(na_values)

         id  created_at  source  original_text  clean_text  sentiment  \
0     False       False   False          False       False      False   
1     False       False   False          False       False      False   
2     False       False   False          False       False      False   
3     False       False   False          False       False      False   
4     False       False   False          False       False      False   
...     ...         ...     ...            ...         ...        ...   
4105  False       False   False          False       False      False   
4106  False       False   False          False       False      False   
4107  False       False   False          False       False      False   
4108   True        True    True           True        True       True   
4109   True        True    True           True        True       True   

      polarity  subjectivity   lang  favorite_count  retweet_count   user  \
0        False         False  False           

In [8]:
#dropping the favorites, hashtags, place, and coordinates columns

columns_to_drop = ['favorites','hashtags','place','coordinates','created_at','source','lang','id']
predata.drop(columns=columns_to_drop, inplace=True)

In [9]:
predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0


In [10]:
#dropping NA values

na_count_per_column = predata.isna().sum()
print(na_count_per_column)

original_text     2
clean_text        2
sentiment         2
polarity          1
subjectivity      2
favorite_count    2
retweet_count     2
user              2
followers         2
friends           2
dtype: int64


In [11]:
predata.dropna(inplace=True)

In [12]:
new_na_count = predata.isna().sum()
print(new_na_count)

original_text     0
clean_text        0
sentiment         0
polarity          0
subjectivity      0
favorite_count    0
retweet_count     0
user              0
followers         0
friends           0
dtype: int64


In [13]:
#final NA check

na_values2 = predata.isna()
print(na_values2)

      original_text  clean_text  sentiment  polarity  subjectivity  \
0             False       False      False     False         False   
1             False       False      False     False         False   
2             False       False      False     False         False   
3             False       False      False     False         False   
4             False       False      False     False         False   
...             ...         ...        ...       ...           ...   
4103          False       False      False     False         False   
4104          False       False      False     False         False   
4105          False       False      False     False         False   
4106          False       False      False     False         False   
4107          False       False      False     False         False   

      favorite_count  retweet_count   user  followers  friends  
0              False          False  False      False    False  
1              False         

In [14]:
#checking the final pre-Kanye data
predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0


In [15]:
predata.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends
4103,@adidasHoops @adidas @NBA @JalenGreen @evanmob...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,0.0,"{'id': 707443044338016256, 'id_str': '70744304...",203.0,279.0
4104,RT @nancylevine: @FPWellman @larryfd .@adidas ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.133333,0.6,0.0,77.0,"{'id': 839605463293415425, 'id_str': '83960546...",739.0,1403.0
4105,RT @nancylevine: I have reached out to @adidas...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.066667,0.3,0.0,42.0,"{'id': 25207598, 'id_str': '25207598', 'name':...",8317.0,8442.0
4106,RT @nancylevine: Neo-Nazis in Los Angeles yest...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.285714,0.535714,0.0,414.0,"{'id': 25207598, 'id_str': '25207598', 'name':...",8317.0,8442.0
4107,@kanyewest brags he can say “anti-Semitic sh*t...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,2.0,1.0,"{'id': 1643453666, 'id_str': '1643453666', 'na...",280.0,1568.0


## Calculating engagement metrics for pre-Kanye data

In [16]:
#calculating impressions
impressions = predata["favorite_count"] + predata["retweet_count"]
predata["impressions"] = impressions

In [17]:
#verifying that impressions is added to the data

predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,19.0
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,0.0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,130.0
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,31.0
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,8.0


In [18]:
#calculating reach
reach = predata["followers"] + predata["friends"]
predata["reach"] = reach

In [19]:
#verifying that reach is added to the data

predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,19.0,16179.0
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,0.0,3445.0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,130.0,5999.0
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,31.0,7488.0
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,8.0,5704.0


In [20]:
#calculating an engagement metric

engagement = predata["impressions"] / predata["reach"]
predata["engagement"] = engagement

In [21]:
#verifying that engagement has been included

predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,19.0,16179.0,0.001174
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,0.0,3445.0,0.0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,130.0,5999.0,0.02167
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,31.0,7488.0,0.00414
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,8.0,5704.0,0.001403


In [22]:
#calculating engagement rate as a percentage
engagement_rate = predata["engagement"]*100
predata["engagement_rate"] = engagement_rate

In [23]:
#verifying that the variable has been added

predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,19.0,16179.0,0.001174,0.117436
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,0.0,3445.0,0.0,0.0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,130.0,5999.0,0.02167,2.167028
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,31.0,7488.0,0.00414,0.413996
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,8.0,5704.0,0.001403,0.140252


## Cleaning the post-Kanye data: Removing NA values and unusable columns

In [24]:
#importing the second data set
postdata = pd.read_csv('adidas_post.csv')

In [25]:
postdata.head()

Unnamed: 0,id,created_at,source,original_text,clean_text,sentiment,polarity,subjectivity,lang,favorite_count,retweet_count,user,followers,friends,favorites
0,1.58496e+18,Tue Oct 25 17:27:50 +0000 2022,"<a href=""https://mobile.twitter.com"" rel=""nofo...",RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,en,0,1,Kehinde Ola,5,47,"{'id': 1581283587000860672, 'id_str': '1581283..."
1,1.58496e+18,Tue Oct 25 17:27:49 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,en,0,1,jupitergirl🇺🇦,6063,4804,"{'id': 12978042, 'id_str': '12978042', 'name':..."
2,1.58496e+18,Tue Oct 25 17:27:38 +0000 2022,"<a href=""https://mobile.twitter.com"" rel=""nofo...",#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,en,1,1,Kehinde Ola,5,47,"{'id': 1581283587000860672, 'id_str': '1581283..."
3,1.58496e+18,Tue Oct 25 17:27:03 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...","Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,en,1,0,Alex Skolnick,85581,2645,"{'id': 15268202, 'id_str': '15268202', 'name':..."
4,1.58496e+18,Tue Oct 25 17:26:59 +0000 2022,"<a href=""https://mobile.twitter.com"" rel=""nofo...",It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,en,0,1,AZgrace,2366,3054,"{'id': 925076033186906112, 'id_str': '92507603..."


In [26]:
postdata.tail()

Unnamed: 0,id,created_at,source,original_text,clean_text,sentiment,polarity,subjectivity,lang,favorite_count,retweet_count,user,followers,friends,favorites
3159,1.58483e+18,Tue Oct 25 08:34:27 +0000 2022,"<a href=""http://twitter.com/download/android"" ...",RT @4dLoveofdogs: #KimKardashian was having di...,dinner traitor stochastic terrorist trash Thei...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,en,0,44,Carrie,1122,1478,"{'id': 1574757334215458819, 'id_str': '1574757..."
3160,1.58483e+18,Tue Oct 25 08:34:07 +0000 2022,"<a href=""http://www.newsoneplace.com"" rel=""nof...",(Insider):#Adidas is cutting ties with #Kanye ...,Insider cutting ties West recent offensive beh...,"Sentiment(polarity=-0.3, subjectivity=0.575)",-0.3,0.575,en,0,0,NewsOnePlace.com,1155,159,"{'id': 3534222021, 'id_str': '3534222021', 'na..."
3161,1.58483e+18,Tue Oct 25 08:33:48 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",Calls grow to #boycott #Adidas as the company ...,Calls grow company stays silent NPR,"Sentiment(polarity=0.0, subjectivity=0.1)",0.0,0.1,en,0,0,NewsJunky,522,1970,"{'id': 919021711437082630, 'id_str': '91902171..."
3162,1.58483e+18,Tue Oct 25 08:32:58 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",@Cary_Elwes @adidas You are all dummies playin...,You dummies playing game He wanted contract wa...,"Sentiment(polarity=-0.4, subjectivity=0.4)",-0.4,0.4,en,0,0,Mauricio,900,487,"{'id': 35745582, 'id_str': '35745582', 'name':..."
3163,1.58483e+18,Tue Oct 25 08:32:43 +0000 2022,"<a href=""http://twitter.com/download/iphone"" r...",@adidas come on! I now have shoes I won’t wear...,come I shoes I ’ wear,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,en,0,0,chris,185,1274,"{'id': 15663298, 'id_str': '15663298', 'name':..."


In [27]:
#descriptive statistics 
sumstats=postdata.describe()
sumstats.round()

Unnamed: 0,id,polarity,subjectivity,favorite_count,retweet_count,followers,friends
count,3164.0,3164.0,3164.0,3164.0,3164.0,3164.0,3164.0
mean,1.5849e+18,0.0,0.0,1.0,170.0,18069.0,2401.0
std,32756190000000.0,0.0,0.0,22.0,385.0,250678.0,6053.0
min,1.58483e+18,-1.0,0.0,0.0,0.0,0.0,0.0
25%,1.58487e+18,0.0,0.0,0.0,0.0,137.0,218.0
50%,1.5849e+18,0.0,0.0,0.0,0.0,584.0,783.0
75%,1.58493e+18,0.0,1.0,0.0,14.0,2339.0,2586.0
max,1.58496e+18,1.0,1.0,1126.0,1157.0,7884407.0,160106.0


In [28]:
#finding NA values

na_values_post = postdata.isna()
print(na_values_post)

         id  created_at  source  original_text  clean_text  sentiment  \
0     False       False   False          False       False      False   
1     False       False   False          False       False      False   
2     False       False   False          False       False      False   
3     False       False   False          False       False      False   
4     False       False   False          False       False      False   
...     ...         ...     ...            ...         ...        ...   
3159  False       False   False          False       False      False   
3160  False       False   False          False       False      False   
3161  False       False   False          False       False      False   
3162  False       False   False          False       False      False   
3163  False       False   False          False       False      False   

      polarity  subjectivity   lang  favorite_count  retweet_count   user  \
0        False         False  False           

In [29]:
#dropping the unnecessary columns

columns_to_drop = ['favorites','created_at','source','lang','id']
postdata.drop(columns=columns_to_drop, inplace=True)

In [30]:
#counting remaining NAs
post_na_count = postdata.isna().sum()
print(post_na_count)

original_text     0
clean_text        5
sentiment         0
polarity          0
subjectivity      0
favorite_count    0
retweet_count     0
user              0
followers         0
friends           0
dtype: int64


In [31]:
#dropping last NAs
postdata.dropna(inplace=True)

In [32]:
#final NA check
na_count_per_postcolumn = postdata.isna().sum()
print(na_count_per_postcolumn)

original_text     0
clean_text        0
sentiment         0
polarity          0
subjectivity      0
favorite_count    0
retweet_count     0
user              0
followers         0
friends           0
dtype: int64


In [33]:
#checking final post-Kanye data
postdata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends
0,RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,1,Kehinde Ola,5,47
1,RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,jupitergirl🇺🇦,6063,4804
2,#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,1,1,Kehinde Ola,5,47
3,"Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,1,0,Alex Skolnick,85581,2645
4,It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,AZgrace,2366,3054


In [34]:
postdata.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends
3159,RT @4dLoveofdogs: #KimKardashian was having di...,dinner traitor stochastic terrorist trash Thei...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,44,Carrie,1122,1478
3160,(Insider):#Adidas is cutting ties with #Kanye ...,Insider cutting ties West recent offensive beh...,"Sentiment(polarity=-0.3, subjectivity=0.575)",-0.3,0.575,0,0,NewsOnePlace.com,1155,159
3161,Calls grow to #boycott #Adidas as the company ...,Calls grow company stays silent NPR,"Sentiment(polarity=0.0, subjectivity=0.1)",0.0,0.1,0,0,NewsJunky,522,1970
3162,@Cary_Elwes @adidas You are all dummies playin...,You dummies playing game He wanted contract wa...,"Sentiment(polarity=-0.4, subjectivity=0.4)",-0.4,0.4,0,0,Mauricio,900,487
3163,@adidas come on! I now have shoes I won’t wear...,come I shoes I ’ wear,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,0,chris,185,1274


## Calculating engagement metrics for post-Kanye data

In [35]:
# calculating impressions
impressions = postdata["favorite_count"] + postdata["retweet_count"]
postdata["impressions"] = impressions

In [36]:
# verifying that impressions is added to the data
postdata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions
0,RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,1,Kehinde Ola,5,47,1
1,RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,jupitergirl🇺🇦,6063,4804,1
2,#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,1,1,Kehinde Ola,5,47,2
3,"Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,1,0,Alex Skolnick,85581,2645,1
4,It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,AZgrace,2366,3054,1


In [37]:
#calculate reach
reach = postdata["followers"] + postdata["friends"]
postdata["reach"] = reach

In [38]:
postdata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach
0,RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,1,Kehinde Ola,5,47,1,52
1,RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,jupitergirl🇺🇦,6063,4804,1,10867
2,#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,1,1,Kehinde Ola,5,47,2,52
3,"Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,1,0,Alex Skolnick,85581,2645,1,88226
4,It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,AZgrace,2366,3054,1,5420


In [39]:
#calculate engagement metric
engagement = postdata["impressions"] / postdata["reach"]
postdata["engagement"] = engagement

In [40]:
postdata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement
0,RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,1,Kehinde Ola,5,47,1,52,0.019231
1,RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,jupitergirl🇺🇦,6063,4804,1,10867,9.2e-05
2,#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,1,1,Kehinde Ola,5,47,2,52,0.038462
3,"Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,1,0,Alex Skolnick,85581,2645,1,88226,1.1e-05
4,It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,AZgrace,2366,3054,1,5420,0.000185


In [41]:
postdata.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement
3159,RT @4dLoveofdogs: #KimKardashian was having di...,dinner traitor stochastic terrorist trash Thei...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,44,Carrie,1122,1478,44,2600,0.016923
3160,(Insider):#Adidas is cutting ties with #Kanye ...,Insider cutting ties West recent offensive beh...,"Sentiment(polarity=-0.3, subjectivity=0.575)",-0.3,0.575,0,0,NewsOnePlace.com,1155,159,0,1314,0.0
3161,Calls grow to #boycott #Adidas as the company ...,Calls grow company stays silent NPR,"Sentiment(polarity=0.0, subjectivity=0.1)",0.0,0.1,0,0,NewsJunky,522,1970,0,2492,0.0
3162,@Cary_Elwes @adidas You are all dummies playin...,You dummies playing game He wanted contract wa...,"Sentiment(polarity=-0.4, subjectivity=0.4)",-0.4,0.4,0,0,Mauricio,900,487,0,1387,0.0
3163,@adidas come on! I now have shoes I won’t wear...,come I shoes I ’ wear,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,0,chris,185,1274,0,1459,0.0


In [42]:
#creating the engagement rate for postdata
engagement_rate = postdata["engagement"]*100
postdata["engagement_rate"] = engagement_rate

In [43]:
#checking final postdata
postdata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate
0,RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,1,Kehinde Ola,5,47,1,52,0.019231,1.923077
1,RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,jupitergirl🇺🇦,6063,4804,1,10867,9.2e-05,0.009202
2,#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,1,1,Kehinde Ola,5,47,2,52,0.038462,3.846154
3,"Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,1,0,Alex Skolnick,85581,2645,1,88226,1.1e-05,0.001133
4,It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,AZgrace,2366,3054,1,5420,0.000185,0.01845


In [44]:
postdata.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate
3159,RT @4dLoveofdogs: #KimKardashian was having di...,dinner traitor stochastic terrorist trash Thei...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,44,Carrie,1122,1478,44,2600,0.016923,1.692308
3160,(Insider):#Adidas is cutting ties with #Kanye ...,Insider cutting ties West recent offensive beh...,"Sentiment(polarity=-0.3, subjectivity=0.575)",-0.3,0.575,0,0,NewsOnePlace.com,1155,159,0,1314,0.0,0.0
3161,Calls grow to #boycott #Adidas as the company ...,Calls grow company stays silent NPR,"Sentiment(polarity=0.0, subjectivity=0.1)",0.0,0.1,0,0,NewsJunky,522,1970,0,2492,0.0,0.0
3162,@Cary_Elwes @adidas You are all dummies playin...,You dummies playing game He wanted contract wa...,"Sentiment(polarity=-0.4, subjectivity=0.4)",-0.4,0.4,0,0,Mauricio,900,487,0,1387,0.0,0.0
3163,@adidas come on! I now have shoes I won’t wear...,come I shoes I ’ wear,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,0,chris,185,1274,0,1459,0.0,0.0


## Combining the two datasets using dummy variables

In [45]:
#creating a dummy variable to define Kanye's involvement; 0=before Kanye
predata["Kanye"]=0

In [46]:
predata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,19.0,16179.0,0.001174,0.117436,0
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,0.0,3445.0,0.0,0.0,0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,130.0,5999.0,0.02167,2.167028,0
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,31.0,7488.0,0.00414,0.413996,0
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,8.0,5704.0,0.001403,0.140252,0


In [47]:
predata.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
4103,@adidasHoops @adidas @NBA @JalenGreen @evanmob...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,0.0,"{'id': 707443044338016256, 'id_str': '70744304...",203.0,279.0,0.0,482.0,0.0,0.0,0
4104,RT @nancylevine: @FPWellman @larryfd .@adidas ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.133333,0.6,0.0,77.0,"{'id': 839605463293415425, 'id_str': '83960546...",739.0,1403.0,77.0,2142.0,0.035948,3.594771,0
4105,RT @nancylevine: I have reached out to @adidas...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.066667,0.3,0.0,42.0,"{'id': 25207598, 'id_str': '25207598', 'name':...",8317.0,8442.0,42.0,16759.0,0.002506,0.250612,0
4106,RT @nancylevine: Neo-Nazis in Los Angeles yest...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.285714,0.535714,0.0,414.0,"{'id': 25207598, 'id_str': '25207598', 'name':...",8317.0,8442.0,414.0,16759.0,0.024703,2.470314,0
4107,@kanyewest brags he can say “anti-Semitic sh*t...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,2.0,1.0,"{'id': 1643453666, 'id_str': '1643453666', 'na...",280.0,1568.0,3.0,1848.0,0.001623,0.162338,0


In [48]:
#export pre-Kanye data to a csv file
predata.to_csv('prekanye.csv')

In [49]:
#export finalized pre-Kanye data to Excel
predata.to_excel('prekanye.xlsx')

In [50]:
#recalculating descriptive stats with new variables

sumstats=predata.describe()
sumstats.round()

Unnamed: 0,polarity,subjectivity,favorite_count,retweet_count,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
count,4108.0,4108.0,4108.0,4108.0,4108.0,4108.0,4108.0,4108.0,4105.0,4105.0,4108.0
mean,0.0,0.0,1.0,97.0,7835.0,3187.0,98.0,11022.0,0.0,22.0,0.0
std,0.0,0.0,15.0,151.0,54397.0,14240.0,152.0,60924.0,2.0,240.0,0.0
min,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,144.0,249.0,1.0,498.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,7.0,638.0,996.0,10.0,1920.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,130.0,2638.0,3472.0,130.0,6477.0,0.0,4.0,0.0
max,1.0,1.0,506.0,487.0,2107522.0,581668.0,747.0,2108825.0,94.0,9433.0,0.0


In [51]:
sumstatstable=predata.describe()

In [52]:
#exporting summary statistics table to excel
sumstatstable.to_excel('prekanye_sumstats.xlsx')

In [53]:
#creating a dummy variable to define Kanye's involvement; 1=after Kanye
postdata["Kanye"]=1

In [54]:
postdata.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
0,RT @Kehinde89386217: #Kanye #Adidas #Yeezys ...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,1,Kehinde Ola,5,47,1,52,0.019231,1.923077,1
1,RT @AZgraceJMC: It's more than a little funny ...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,jupitergirl🇺🇦,6063,4804,1,10867,9.2e-05,0.009202,1
2,#Kanye #Adidas #Yeezys #iMessage #Brittney...,…,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,1,1,Kehinde Ola,5,47,2,52,0.038462,3.846154,1
3,"Ok, should’ve happened sooner, shouldn’t have ...",Ok ’ happened sooner ’ required backlash ’ sai...,"Sentiment(polarity=0.5, subjectivity=0.5)",0.5,0.5,1,0,Alex Skolnick,85581,2645,1,88226,1.1e-05,0.001133,1
4,It's more than a little funny that #Adidas too...,It 's little funny took long step away Kanye S...,"Sentiment(polarity=0.004166666666666666, subje...",0.004167,0.633333,0,1,AZgrace,2366,3054,1,5420,0.000185,0.01845,1


In [55]:
postdata.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
3159,RT @4dLoveofdogs: #KimKardashian was having di...,dinner traitor stochastic terrorist trash Thei...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,44,Carrie,1122,1478,44,2600,0.016923,1.692308,1
3160,(Insider):#Adidas is cutting ties with #Kanye ...,Insider cutting ties West recent offensive beh...,"Sentiment(polarity=-0.3, subjectivity=0.575)",-0.3,0.575,0,0,NewsOnePlace.com,1155,159,0,1314,0.0,0.0,1
3161,Calls grow to #boycott #Adidas as the company ...,Calls grow company stays silent NPR,"Sentiment(polarity=0.0, subjectivity=0.1)",0.0,0.1,0,0,NewsJunky,522,1970,0,2492,0.0,0.0,1
3162,@Cary_Elwes @adidas You are all dummies playin...,You dummies playing game He wanted contract wa...,"Sentiment(polarity=-0.4, subjectivity=0.4)",-0.4,0.4,0,0,Mauricio,900,487,0,1387,0.0,0.0,1
3163,@adidas come on! I now have shoes I won’t wear...,come I shoes I ’ wear,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0,0,chris,185,1274,0,1459,0.0,0.0,1


In [56]:
#export post-Kanye data to a csv file
postdata.to_csv('postkanye.csv')

In [57]:
#export finalized post-Kanye data to Excel
postdata.to_excel('postkanye.xlsx')

In [58]:
#descriptive statistics 
sumstats_engagement2=postdata.describe()
sumstats_engagement2.round()

Unnamed: 0,polarity,subjectivity,favorite_count,retweet_count,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
count,3159.0,3159.0,3159.0,3159.0,3159.0,3159.0,3159.0,3159.0,3154.0,3154.0,3159.0
mean,0.0,0.0,1.0,171.0,18090.0,2398.0,172.0,20488.0,1.0,70.0,1.0
std,0.0,0.0,22.0,385.0,250875.0,6051.0,385.0,251181.0,21.0,2078.0,0.0
min,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,0.0,0.0,0.0,0.0,137.0,218.0,0.0,469.0,0.0,0.0,1.0
50%,0.0,0.0,0.0,0.0,584.0,783.0,1.0,1597.0,0.0,0.0,1.0
75%,0.0,1.0,0.0,14.0,2341.0,2587.0,18.0,5686.0,0.0,2.0,1.0
max,1.0,1.0,1126.0,1157.0,7884407.0,160106.0,1384.0,7884590.0,1157.0,115700.0,1.0


In [59]:
#exporting summary statistics table to excel
sumstats_engagement2.to_excel('postkanye_sumstats.xlsx')

In [60]:
#combining the two datasets into one dataframe
frames = [predata, postdata]

data = pd.concat(frames)

In [61]:
#verifying that all created variables have transferred over
data.head()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
0,RT @eortner: Hey #LosAngeles 2 tweets from #LA...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.1,0.1,0.0,19.0,"{'id': 98903593, 'id_str': '98903593', 'name':...",7728.0,8451.0,19.0,16179.0,0.001174,0.117436,0
1,"Adidas is taking way too long ""reviewing"" the ...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.05,0.4,0.0,0.0,"{'id': 20696449, 'id_str': '20696449', 'name':...",2003.0,1442.0,0.0,3445.0,0.0,0.0,0
2,RT @DanielNewman: Hi @adidas . Either make an ...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",-0.157143,0.717857,0.0,130.0,"{'id': 32962994, 'id_str': '32962994', 'name':...",1002.0,4997.0,130.0,5999.0,0.02167,2.167028,0
3,RT @Ordainedprophet: 🚨Kanye should have been s...,/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,31.0,"{'id': 1050050985685594112, 'id_str': '1050050...",2887.0,4601.0,31.0,7488.0,0.00414,0.413996,0
4,"RT @nancylevine: @KatiePhang @adidas Thanks, K...",/Users/alisonsanchez/Dropbox/ALI/USD TEACHING/...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.2,0.2,0.0,8.0,"{'id': 1703575711, 'id_str': '1703575711', 'na...",706.0,4998.0,8.0,5704.0,0.001403,0.140252,0


In [62]:
#verifying that both 0s and 1s transferred over
data.tail()

Unnamed: 0,original_text,clean_text,sentiment,polarity,subjectivity,favorite_count,retweet_count,user,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
3159,RT @4dLoveofdogs: #KimKardashian was having di...,dinner traitor stochastic terrorist trash Thei...,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,44.0,Carrie,1122.0,1478.0,44.0,2600.0,0.016923,1.692308,1
3160,(Insider):#Adidas is cutting ties with #Kanye ...,Insider cutting ties West recent offensive beh...,"Sentiment(polarity=-0.3, subjectivity=0.575)",-0.3,0.575,0.0,0.0,NewsOnePlace.com,1155.0,159.0,0.0,1314.0,0.0,0.0,1
3161,Calls grow to #boycott #Adidas as the company ...,Calls grow company stays silent NPR,"Sentiment(polarity=0.0, subjectivity=0.1)",0.0,0.1,0.0,0.0,NewsJunky,522.0,1970.0,0.0,2492.0,0.0,0.0,1
3162,@Cary_Elwes @adidas You are all dummies playin...,You dummies playing game He wanted contract wa...,"Sentiment(polarity=-0.4, subjectivity=0.4)",-0.4,0.4,0.0,0.0,Mauricio,900.0,487.0,0.0,1387.0,0.0,0.0,1
3163,@adidas come on! I now have shoes I won’t wear...,come I shoes I ’ wear,"Sentiment(polarity=0.0, subjectivity=0.0)",0.0,0.0,0.0,0.0,chris,185.0,1274.0,0.0,1459.0,0.0,0.0,1


In [63]:
#export combined data to a csv file
data.to_csv('combdata.csv')

In [64]:
#export combined data to Excel
data.to_excel('combdata.xlsx')

In [65]:
#combined descriptive statistics
 
combined_sumstats=data.describe()
combined_sumstats.round()

Unnamed: 0,polarity,subjectivity,favorite_count,retweet_count,followers,friends,impressions,reach,engagement,engagement_rate,Kanye
count,7267.0,7267.0,7267.0,7267.0,7267.0,7267.0,7267.0,7267.0,7259.0,7259.0,7267.0
mean,0.0,0.0,1.0,129.0,12293.0,2844.0,130.0,15137.0,0.0,43.0,0.0
std,0.0,0.0,18.0,281.0,170450.0,11432.0,281.0,171876.0,14.0,1382.0,0.0
min,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,141.0,249.0,0.0,498.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,2.0,611.0,886.0,4.0,1758.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,73.0,2522.0,3053.0,77.0,6095.0,0.0,3.0,1.0
max,1.0,1.0,1126.0,1157.0,7884407.0,581668.0,1384.0,7884590.0,1157.0,115700.0,1.0


In [66]:
#exporting combined summary statistics table to excel
combined_sumstats.to_excel('combined_sumstats.xlsx')

## Regression Analysis

#### In this section, I will build a regression model(s) to investigate which factors are most significant in the variation of polarity and subjectivity.

In [67]:
#building a linear regression model using subjectivity as the dependent variable

subjectivity_predict = ols("subjectivity ~ polarity + Kanye + engagement_rate", data).fit()

subjectivity_predict.summary()

0,1,2,3
Dep. Variable:,subjectivity,R-squared:,0.021
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,50.69
Date:,"Sun, 18 Feb 2024",Prob (F-statistic):,2.06e-32
Time:,10:41:09,Log-Likelihood:,-2276.1
No. Observations:,7259,AIC:,4560.0
Df Residuals:,7255,BIC:,4588.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3220,0.005,61.197,0.000,0.312,0.332
polarity,0.1710,0.015,11.757,0.000,0.142,0.200
Kanye,-0.0188,0.008,-2.389,0.017,-0.034,-0.003
engagement_rate,-3.922e-06,2.81e-06,-1.394,0.163,-9.44e-06,1.59e-06

0,1,2,3
Omnibus:,2220.089,Durbin-Watson:,1.864
Prob(Omnibus):,0.0,Jarque-Bera (JB):,680.702
Skew:,0.547,Prob(JB):,1.54e-148
Kurtosis:,1.974,Cond. No.,5200.0


In [68]:
polarity_predict = ols("polarity ~ subjectivity + Kanye + friends", data).fit()

polarity_predict.summary()

0,1,2,3
Dep. Variable:,polarity,R-squared:,0.028
Model:,OLS,Adj. R-squared:,0.028
Method:,Least Squares,F-statistic:,70.56
Date:,"Sun, 18 Feb 2024",Prob (F-statistic):,5.629999999999999e-45
Time:,10:41:09,Log-Likelihood:,-650.93
No. Observations:,7267,AIC:,1310.0
Df Residuals:,7263,BIC:,1337.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0342,0.005,6.544,0.000,0.024,0.045
subjectivity,0.1107,0.009,11.913,0.000,0.092,0.129
Kanye,-0.0449,0.006,-7.151,0.000,-0.057,-0.033
friends,-1.011e-06,2.72e-07,-3.719,0.000,-1.54e-06,-4.78e-07

0,1,2,3
Omnibus:,514.501,Durbin-Watson:,1.758
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2544.282
Skew:,-0.116,Prob(JB):,0.0
Kurtosis:,5.889,Cond. No.,37800.0


In [69]:
#building a probit model to analyze which factors are most significant 
#in predicting whether a tweet comes from the pre_kanye or post-kanye data

In [70]:
kanye_predict = probit("Kanye ~ polarity + subjectivity + reach", data).fit()

kanye_predict.summary()

Optimization terminated successfully.
         Current function value: 0.679962
         Iterations 5


0,1,2,3
Dep. Variable:,Kanye,No. Observations:,7267.0
Model:,Probit,Df Residuals:,7263.0
Method:,MLE,Df Model:,3.0
Date:,"Sun, 18 Feb 2024",Pseudo R-squ.:,0.006768
Time:,10:41:09,Log-Likelihood:,-4941.3
converged:,True,LL-Null:,-4975.0
Covariance Type:,nonrobust,LLR p-value:,1.582e-14

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1149,0.021,-5.591,0.000,-0.155,-0.075
polarity,-0.3962,0.056,-7.021,0.000,-0.507,-0.286
subjectivity,-0.1107,0.045,-2.483,0.013,-0.198,-0.023
reach,2.417e-07,1.27e-07,1.906,0.057,-6.87e-09,4.9e-07
