In [1]:
import pandas as pd

## Objective

For the purpose of this analysis, I will attempt to measure the sentiment of tweets to learn whether tweets impact the number of Covid-19 cases and deaths in the United States. 

To create the dataset, I utilized the TWINT library to collect all tweets from January 1,2020 until July 10th. I then made various subsets of the tweets. For example, to measure the impact of tweets by public leaders viewed as polar opposites regarding their response to the pandemic, I collected tweets by President Trump and the Governor of New York, Andrew Cuomo. Another subset of tweets that I labeled as baseline consists of tweets by the New York Times and Washington Post - two of America's leading journalism outlets.

The purpose of creating these subsets is that the baseline tweets can be considered to be those that communicate mainly fact. While they might have op-ed columnists, we can assume that most tweets from the news reporting divisions will provide factual updates on the Covid response. By considering the two polar opposites, Trump and Cuomo, we can measure Covid outcomes, in terms of cases, after the tweets have been consumed by the public. Finally, the main Covid collection will allow us to see whether more individuals subscribed to the Trump/Cuomo tweets and how Covid cases changed, for the positive or negative, in their region.

## Obtaining Data

For the notebooks that contain the queries for the tweets gathered on TWINT, please refer to the Covid Data Queries notebook in the repo. The JSON files for these queries were used to create DataFrames.

In [2]:
#All Covid tweets
All_Covid_tweets = pd.read_json('Covid_tweets3.json',lines=True)

#All Trump tweets
Trump_Covid_tweets = pd.read_json('Trump_Covid_tweets3.json', lines=True)

#All Cuomo tweets
Cuomo_Covid_tweets = pd.read_json('Cuomo_Covid_tweets3.json',lines=True)

#Baseline Tweets
NYTimes_tweets = pd.read_json('Nytimes_Covid_tweets3.json',lines=True)
#print( len(NYTimes_tweets))
WashingtonPost_tweets = pd.read_json('Washpost_tweets3.json',lines=True)
#print( len(Washpost_tweets3.json))

In [3]:
#combining NYTimes and Washington Post to get Baseline Tweets
Baseline_tweets = pd.concat([NYTimes_tweets,WashingtonPost_tweets],axis=0)

In [4]:
# Covid data set

covid_cases = pd.read_csv('time_series_covid_19_confirmed.csv')

#Getting US data - confirmed cases
covid_cases = covid_cases[covid_cases['Country/Region'] == 'US']
#covid_cases = covid_cases.transpose()

# Covid death data set

covid_deaths = pd.read_csv('time_series_covid_19_deaths.csv')


#Getting US data - confirmed cases

#covid_deaths = covid_deaths.transpose()
covid_deaths = covid_deaths[covid_deaths['Country/Region'] == 'US']


In [5]:
#Covid cases and deaths (still need to rename columns, from left to right = cases then deaths)
covid_data = pd.concat([covid_cases,covid_deaths],axis=0)
covid_data = covid_data.transpose()
covid_data.head()

Unnamed: 0,225,225.1
Province/State,,
Country/Region,US,US
Lat,37.0902,37.0902
Long,-95.7129,-95.7129
1/22/20,1,0


In [6]:
#Getting rid of unnecessary rows
covid_data = covid_data.drop(['Province/State','Country/Region','Lat','Long'])

In [7]:
covid_data.head()

Unnamed: 0,225,225.1
1/22/20,1,0
1/23/20,1,0
1/24/20,2,0
1/25/20,2,0
1/26/20,5,0


In [21]:
Cuomo_Covid_tweets

Unnamed: 0,date,time,timezone,username,name,place,tweet,mentions,urls,photos,...,link,retweet,quote_url,video,near,geo,source,user_rt_id,user_rt,retweet_id
0,2020-06-16,12:00:49,EDT,nygovcuomo,Andrew Cuomo,,"New York State surveyed 12,000 people over 6 w...",[],[],[https://pbs.twimg.com/media/EapSx2TXgAEG3rj.jpg],...,https://twitter.com/NYGovCuomo/status/12729220...,False,,0,,,,,,
1,2020-06-15,17:44:38,EDT,nygovcuomo,Andrew Cuomo,,New York thanks @Sony for a generous donation ...,[sony],[https://www.healthresearch.org/donation-form/...,[],...,https://twitter.com/NYGovCuomo/status/12726462...,False,https://twitter.com/Sony/status/12725909482964...,0,,,,,,
2,2020-06-14,12:35:00,EDT,nygovcuomo,Andrew Cuomo,,I signed into law two more bills today.\n\nThe...,[],[],[],...,https://twitter.com/NYGovCuomo/status/12722059...,False,,0,,,,,,
3,2020-06-13,08:49:25,EDT,nygovcuomo,Andrew Cuomo,,We have seen an alarming spike in domestic vio...,[melissadderosa],[https://www.vogue.com/article/new-york-covid-...,[],...,https://twitter.com/NYGovCuomo/status/12717867...,False,,0,,,,,,
4,2020-06-11,14:43:13,EDT,nygovcuomo,Andrew Cuomo,,I am accepting in full the recommendations of ...,[melissadderosa],[https://www.governor.ny.gov/sites/governor.ny...,[],...,https://twitter.com/NYGovCuomo/status/12711510...,False,,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659,2020-03-02,08:22:00,EDT,nygovcuomo,Andrew Cuomo,,UPDATE: The #COVID19 patient lives in Manhatta...,[],[],[],...,https://twitter.com/NYGovCuomo/status/12344690...,False,,0,,,,,,
660,2020-03-01,19:51:35,EDT,nygovcuomo,Andrew Cuomo,,We have learned of the 1st positive case of CO...,[],[],[https://pbs.twimg.com/media/ESELO37WkAYLZI0.png],...,https://twitter.com/NYGovCuomo/status/12342801...,False,,0,,,,,,
661,2020-02-26,14:30:20,EDT,nygovcuomo,Andrew Cuomo,,There are still zero confirmed cases of COVID-...,[],[],[],...,https://twitter.com/NYGovCuomo/status/12327497...,False,,0,,,,,,
662,2020-02-26,14:30:19,EDT,nygovcuomo,Andrew Cuomo,,"As we see the novel #coronavirus spread, we ar...",[],[],[],...,https://twitter.com/NYGovCuomo/status/12327497...,False,,0,,,,,,


In [14]:
Trump_Covid_tweets

Unnamed: 0,date,time,timezone,username,name,place,tweet,mentions,urls,photos,...,link,retweet,quote_url,video,near,geo,source,user_rt_id,user_rt,retweet_id
0,2020-06-09,13:43:35,EDT,realdonaldtrump,Donald J. Trump,,I am allocating $2.96 BILLION in Emergency Sol...,[secretarycarson],[],[],...,https://twitter.com/realDonaldTrump/status/127...,False,,0,,,,,,
1,2020-07-07,08:52:40,EDT,realdonaldtrump,Donald J. Trump,,“COVID-19 (China Virus) Death Rate PLUNGES Fro...,[washtimes],[],[],...,https://twitter.com/realDonaldTrump/status/128...,False,,0,,,,,,
2,2020-07-06,14:36:59,EDT,realdonaldtrump,Donald J. Trump,,“Treatment with hydroxychloroquine cut the dea...,[],[],[],...,https://twitter.com/realDonaldTrump/status/128...,False,,0,,,,,,
3,2020-06-09,13:43:35,EDT,realdonaldtrump,Donald J. Trump,,I am allocating $2.96 BILLION in Emergency Sol...,[secretarycarson],[],[],...,https://twitter.com/realDonaldTrump/status/127...,False,,0,,,,,,
4,2020-05-27,22:07:37,EDT,realdonaldtrump,Donald J. Trump,,Texas Supreme Court: Lack of immunity to COVID...,[],[https://www.dallasnews.com/news/public-health...,[],...,https://twitter.com/realDonaldTrump/status/126...,False,,0,,,,,,
5,2020-05-27,09:19:37,EDT,realdonaldtrump,Donald J. Trump,,"The Radical Left Lamestream Media, together wi...",[],[],[],...,https://twitter.com/realDonaldTrump/status/126...,False,,0,,,,,,
6,2020-05-25,16:16:06,EDT,realdonaldtrump,Donald J. Trump,,"Great reviews on our handling of Covid 19, som...",[],[],[],...,https://twitter.com/realDonaldTrump/status/126...,False,,0,,,,,,
7,2020-04-22,10:17:08,EDT,realdonaldtrump,Donald J. Trump,,CDC Director was totally misquoted by Fake New...,[cnn],[],[],...,https://twitter.com/realDonaldTrump/status/125...,False,,0,,,,,,
8,2020-04-22,06:50:19,EDT,realdonaldtrump,Donald J. Trump,,“Economic impact of Covid-19.” @foxandfriends ...,[foxandfriends],[],[],...,https://twitter.com/realDonaldTrump/status/125...,False,,0,,,,,,
9,2020-04-21,12:49:12,EDT,realdonaldtrump,Donald J. Trump,,....to State/Local Governments for lost revenu...,[],[],[],...,https://twitter.com/realDonaldTrump/status/125...,False,,0,,,,,,
