# Ablation Twitter Analysis

In [1]:
import numpy as np # Imports
import pandas as pd
from scipy import stats
from statistics import mean, stdev
import requests
import re
import ast


In [2]:
data = pd.read_csv('ablation_full.csv') #Reading in the CSV
data.hashtags = data.hashtags.apply(lambda s: list(ast.literal_eval(s)))

In [3]:
data.columns #Columns in the CSV

Index(['tweet_id', 'username', 'name', 'profile_picture', 'replies',
       'retweets', 'likes', 'is_retweet', 'posted_time', 'content', 'hashtags',
       'mentions', 'images', 'videos', 'tweet_url', 'link'],
      dtype='object')

In [4]:
data.head() #Sample the top of the CSV

Unnamed: 0,tweet_id,username,name,profile_picture,replies,retweets,likes,is_retweet,posted_time,content,hashtags,mentions,images,videos,tweet_url,link
0,1579259974756491264,Cajsa,Cajsa,https://pbs.twimg.com/profile_images/129364594...,0,0,1,False,2022-10-09T23:58:13+00:00,I am hoping it will heal fast so I don't have ...,[],[],[],[],https://twitter.com/Cajsa/status/1579259974756...,
1,1579258665894547456,EpFinishing,EPFinishingSchool,https://pbs.twimg.com/profile_images/151258340...,0,2,7,False,2022-10-09T23:53:01+00:00,Meet our Judges for #EPFinishing22 Present you...,[EPFinishing22],"['utedrow', 'markalink', 'LuigiDiBiaseMD', 'sh...",['https://pbs.twimg.com/media/FeqnYvzXoAYkb6x?...,[],https://twitter.com/EpFinishing/status/1579258...,
2,1579258489876393984,RavindraSoni1,"Ravindra Soni, editor, businessfortnight.com",https://pbs.twimg.com/profile_images/809424050...,0,0,0,False,2022-10-09T23:52:19+00:00,CLS Americas Present Fusion-Guided Focal Laser...,[],[],[],[],https://twitter.com/RavindraSoni1/status/15792...,https://t.co/sRIYJq8FPV
3,1579256691643731968,Koichi16423232,こうちゃん Koichi Nagashima @EP Univ. Ablation School,https://pbs.twimg.com/profile_images/157933116...,1,0,4,False,2022-10-09T23:45:11+00:00,すごいですねぇ\nそうですそうです！！真実に辿り着くにはまだ時間がかかりますけど,[],[],[],[],https://twitter.com/Koichi16423232/status/1579...,
4,1579253919288786944,Spiculus_nero,Abdul Mazagri ⵣ.,https://pbs.twimg.com/profile_images/156919634...,0,0,0,False,2022-10-09T23:34:10+00:00,"An ultrasound showed I had fibroids, and a lot...",[],[],[],[],https://twitter.com/Spiculus_nero/status/15792...,


## 2 Sided T-tests 

Comparing the amount of likes with an image attached or no image, and there is a significant change

In [5]:
stats.ttest_ind(data['likes'].values, data[data['images'] != "[]"]["likes"].values, trim=.2) #Evaluating if there is a significant change in the amount of likes if you include an image, and there the ttest is statistically significant

Ttest_indResult(statistic=-15.242365850673801, pvalue=8.816514027296647e-50)

In [6]:
print(mean(data['likes'].values))
print(mean(data[data['images'] != "[]"]["likes"].values)) #Shows that the mean of the amount of likes is much higher with images

9
21


Comparing the amount of likes with hashtags, and there is a significant change

In [7]:
stats.ttest_ind(data['likes'].values, data[data['hashtags'].map(len) > 0]["likes"].values, trim=.2)

Ttest_indResult(statistic=-11.91886184819754, pvalue=1.080273208209551e-31)

In [8]:
print(mean(data['likes'].values))
print(mean(data[data['hashtags'].map(len) > 0]["likes"].values)) #Shows that tweets with more hashtags get more lieks

9
12


### Hashtags

In [9]:
hashtagDict = {} # Getting the hashtags and the amount of likes they have, eliminating hashtags with just one tweet

for index, row in data.iterrows():
    for hashtag in row['hashtags']:
        if hashtag in hashtagDict:
            hashtagDict[hashtag].append(row['likes'])
        else:
            hashtagDict[hashtag] = [row['likes']]

hashtagDictFil = {}
for item in hashtagDict:
    if len(hashtagDict[item]) > 1:
        hashtagDictFil[item] = sum(hashtagDict[item])

In [10]:
{k: v for k, v in sorted(hashtagDictFil.items(), key=lambda item: item[1], reverse=True)}

{'EPeeps': 1511,
 'CardioTwitter': 342,
 'Epeeps': 294,
 'ablation': 257,
 'EPLuebeck': 175,
 'Cardiotwitter': 160,
 'epeeps': 144,
 'AblateVT': 121,
 'JACCCaseReports': 116,
 'Ablation': 109,
 'IRad': 90,
 'cancer': 89,
 'Cardiology': 86,
 'AFib': 85,
 'AHAJournals': 83,
 'GlobalEPSummit': 83,
 'RoboticEP': 82,
 'AI': 80,
 'backpain': 78,
 'Octaray': 76,
 'cardiotwitter': 69,
 'HRC2022': 66,
 'EpilepsyAwareness': 65,
 'Afib': 64,
 'EPFinishing22': 60,
 'atrialfibrillation': 60,
 'MedTwitter': 52,
 'GCOS22': 52,
 'irad': 51,
 'WPW': 50,
 'afib': 49,
 'Volta': 49,
 'Mapping': 49,
 'FreeRead': 46,
 'EP大学': 43,
 'cardiology': 42,
 'EACTS2022': 40,
 'glioblastoma': 40,
 'VTSymposium': 39,
 'electrophysiology': 35,
 'AF': 34,
 'AFibAwarenessMonth': 31,
 'Catheter': 31,
 'PediRhythmX': 31,
 'biosense': 30,
 'arrhythmia': 29,
 'DGKHerztage': 29,
 'PedirhythmX': 29,
 'research': 28,
 'DGK_Herztage_Bonn_2022': 28,
 'thyroid': 28,
 'interventionaloncology': 25,
 'ProstateCancer': 24,
 'ablateVT'

In [11]:
hashtagFullDict = {} #Getting the number of likes per tweet with the hashtag

for index, row in data.iterrows():
    for hashtag in row['hashtags']:
        if hashtag in hashtagFullDict:
            hashtagFullDict[hashtag].append(row['likes'])
        else:
            hashtagFullDict[hashtag] = [row['likes']]

Shows that general hashtags don't work (CardioTwitter or Cardiology) won't get as much traction as just putting the topic as the hashtag

In [12]:
stats.ttest_ind(hashtagFullDict["ablation"], hashtagFullDict["CardioTwitter"], trim=.2) 

Ttest_indResult(statistic=0.10788928630762189, pvalue=0.9146366657061062)

In [13]:
stats.ttest_ind(hashtagFullDict["ablation"], hashtagFullDict["Cardiology"], trim=.2)

Ttest_indResult(statistic=0.19597769971510595, pvalue=0.8459490878264727)

Shows that specifics do work

In [14]:
stats.ttest_ind(hashtagFullDict["ablation"], hashtagFullDict["JACCCaseReports"], trim=.2)

Ttest_indResult(statistic=-6.330069535964719, pvalue=8.908264254127928e-07)

In [15]:
print(mean(hashtagFullDict["ablation"]))
print(mean(hashtagFullDict["JACCCaseReports"])) #ablation vs JACCCAseReports

6.119047619047619
38.666666666666664


In [16]:
stats.ttest_ind(hashtagFullDict["ablation"], hashtagFullDict["AHAJournals"], trim=.2)

Ttest_indResult(statistic=-5.366374114151326, pvalue=1.2811781304466585e-05)

In [17]:
print(mean(hashtagFullDict["ablation"]))
print(mean(hashtagFullDict["AHAJournals"])) #ablation vs AHAJournals

6.119047619047619
41.5


### Articles or Images

In [18]:
def get_original_twitter_url(twitter_url):
    # without masking it as a browser request, it wont work properly
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r = requests.get(url = twitter_url,headers=headers)
    data = r.text
    try:
        url = re.search("(?P<url>https?://[^\s]+)\"", data).group("url")
        return url
    except:
        return "nothing"
print(get_original_twitter_url('https://t.co/ESwzZO0ncA'))

https://www.frontiersin.org/articles/10.3389/fneur.2022.1009914?utm_source=S-TWT&amp;utm_medium=SNET&amp;utm_campaign=ECO_FNEUR_XXXXXXXX_auto-dlvrit


In [19]:
numOfLikes = []
for index, row in data[data['link'].notnull()].iterrows():
    value = ('article' or 'report') in get_original_twitter_url(row['link'])
    value1 = ('article' or 'report') in row.content
    if value or value1:
        numOfLikes.append(row['likes'])
print(numOfLikes)


[0, 19, 0, 5, 0, 2, 5, 2, 4, 1, 0, 1, 0, 15, 0, 3, 20, 0, 20, 10, 38, 0, 12, 1, 1, 0, 5, 2, 8, 10, 0, 5, 15, 1, 0, 4, 1, 4, 0, 0, 12, 4, 5, 0, 167, 0, 15, 8, 1, 0, 0, 2, 10, 7, 16, 6, 8, 2, 0, 1, 1, 0, 7, 2, 0]


In [20]:
stats.ttest_ind(numOfLikes, data[data['images'] != "[]"]["likes"].values, trim=.2) #Shows that images and number of likes don't have a significant change

Ttest_indResult(statistic=-1.7380882420634858, pvalue=0.08291615425841296)

In [21]:
print(mean(data[data['images'] != "[]"]["likes"].values))
print(mean(numOfLikes))

21
7.507692307692308


In [22]:
print(stdev(data[data['images'] != "[]"]["likes"].values)) #The mean of the number of likes with images is much higher than number of likes, but has a high variance
print(stdev(numOfLikes))

141.9506956657839
21.25229625602481
