In [None]:
# The purpose of this file is to create datasets of tweets about each of the teams participating in VCT Champions 2021.
# The file uses Tweepy to pull tweets using a given query and stores them as CSVs using pandas.
# A sentiment analysis is conducted on all tweets, using the textblob library

In [1]:
import os
import tweepy
import pandas as pd
import re
import webbrowser
from textblob import TextBlob
from wordcloud import WordCloud

In [2]:
# Authentication

consumer_key = "" # Omitted for security reasons
consumer_secret = "" # Omitted for security reasons
callback_uri = 'oob' # https://cfe.sh/twitter/callback
auth = tweepy.OAuthHandler(consumer_key, consumer_secret, callback_uri)
redirect_url = auth.get_authorization_url()
webbrowser.open(redirect_url)
user_pin_input = input("What's the pin value?")
auth.get_access_token(user_pin_input) # auth.access_token & auth.access_token_secret

What's the pin value?0004520


('2773628975-7rI3AqocD91zMGhKn3fp2FnaNSz4JTj4sq7b7gN',
 'r8qAvaE9DeOk5m0JiRfVqSiAqZG6isH1slx8CyhwWGzEy')

In [3]:
# Verifying that the api is working, prints my username (aliasidd04)

api = tweepy.API(auth, wait_on_rate_limit = True)
me = api.me()
print(me.screen_name)

aliasidd04


In [4]:
def extract_list_as_df(timeline_list, team, playing_today):
    
    # Takes in a list of tweets, generated through tweepy, and converts the list into a pandas dataframe
    
    columns = set()
    allowed_types = [str, int]
    tweets_data = [] # List that will contain the data for the dataframe
    
    for tweet in timeline_list: # For each tweet in the list of tweets pulled from tweepy

        status_obj_url = f"https://twitter.com/{tweet['screen_name']}/status/{tweet['id']}" # Creates a link for tweet, to be added to the dataframe
        
        single_tweet_data = {"user": tweet['screen_name'], # Username of the tweeter
                             "id": tweet['id'], # Unique ID of tweets
                             "text": tweet['text'], # Text in the tweet
                             "url": status_obj_url, # URL generated above
                             "datetime": tweet['date'], # Date and time of tweet is UTC
                             "playing_today": playing_today} # Custom field I manually updated each day for each team, to help label which teams were playing on what days
        
        tweets_data.append(single_tweet_data)
        
    header_cols = list(columns) # Creating the column names of the dataframe
    header_cols.extend(("user","id", "text","url","datetime", "playing_today"))

    df = pd.DataFrame(tweets_data, columns = header_cols)    
    return df

In [5]:
def cleanText(text):
    
    # Takes in the list of tweet text and removes any unecessary symbols, numbers, or letters which may interfere with the sentiment analysis
    
    text = re.sub(r'(@[A-Za-z]+)|([:_]+)', '', text) # remove @ mentions
    text = re.sub(r'#', '', text) # remove # symbol
    text = re.sub(r'RT[\s]+', '', text) # remove retweets
    text = re.sub(r'https?:\/\/\S+', '', text) # remove hyperlinks
    text = re.sub(r'https\S+', '', text) # remove hyperlinks
    
    return text

In [6]:
def getResults(query, day):
    results = []

    for i, status in enumerate(tweepy.Cursor(api.search, q=query, since=day,lang="en").items(10000)):
        tweet = {}
        tweet['user_id']  = str(status.user.id)
        tweet['id'] = str(status.id)
        tweet['screen_name'] = status.user.screen_name
        tweet['text'] = status.text
        tweet['date'] = status.created_at
        tweet['date'] = tweet['date'].strftime('%Y-%m-%d, %H:%M:%S')
        results.append(tweet.copy())
        
    return results

In [7]:
def getSubjectivity(text):
   
    # Function to get the subjectivity of a tweet (between 0 and 1, 1 being more opinionated and 0 being more factual
    
    return TextBlob(text).sentiment.subjectivity

def getPolarity(text):
    
    # Create function to get the polarity (how positive or negative the function is)
    
    return TextBlob(text).sentiment.polarity

"\n# Create two new columns\ndf['Subjectivity'] = df['text'].apply(getSubjectivity)\ndf['Polarity'] = df['text'].apply(getPolarity)\n\n# Show the new dataframe with the new columns\ndf\n"

In [8]:
def getAnalysis(score):
    
    # Function to compute the negative, neutral and positive analysis, based on a tweets polarity
    
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'

"\n# Create the new column\ndf['Analysis'] = df['Polarity'].apply(getAnalysis)\n\n# Show the new dataframe with the new column\ndf\n"

In [59]:
team = 'acend' # Team for which the tweets are being pulled
day = '2021-12-11' # Day of the tournament for which the tweets are being pulled
playing_today = True # False if the team played on that day, True if they did
path = f'C:\\Users\\Ali\\Desktop\\Coding\\Data Science Practice\\Twitter\\CSV Files\\{day}' 
query = f"{team} #VCT OR {team} #VALORANTChampions -filter:retweets -from:valesports_na -from:ValorantEsports" # Query that is sent through tweepy

results = getResults(query,day) # Pulls
df = extract_list_as_df(results, team, playing_today)

pd.set_option('max_colwidth', 400)
df['text']= df['text'].apply(cleanText)

df['text'] = df['text'].str.replace(r'[^\x00-\x7F]+', '', regex=True)

df['Subjectivity'] = df['text'].apply(getSubjectivity)
df['Polarity'] = df['text'].apply(getPolarity)

df['Analysis'] = df['Polarity'].apply(getAnalysis)

df

Unnamed: 0,user,id,text,url,datetime,playing_today,Subjectivity,Polarity,Analysis
0,pezlee,1470272997441392641,I know who won. But Gambit is still the best team in the world to me. Idc idc. Congrats to Acend tho they fought hard af. VALORANTChampions,https://twitter.com/pezlee/status/1470272997441392641,"2021-12-13, 06:02:53",True,0.420833,0.354167,Positive
1,NetherEnderStry,1470272453385539584,"Acend are the first champs. Gambit are one of the first two finalists. No matter what, ALL teams that competed got",https://twitter.com/NetherEnderStry/status/1470272453385539584,"2021-12-13, 06:00:43",True,0.333333,0.250000,Positive
2,t7hai,1470269773845958659,acend build diff VALORANTChampions,https://twitter.com/t7hai/status/1470269773845958659,"2021-12-13, 05:50:05",True,0.000000,0.000000,Neutral
3,ninopineda_,1470263763979096068,Called it!\n\nCongrats Acend and gg Gambit! \n\nVALORANTChampions,https://twitter.com/ninopineda_/status/1470263763979096068,"2021-12-13, 05:26:12",True,0.000000,0.000000,Neutral
4,teammercindia,1470252473512194048,It's was 3-2 win for Acend!\nWell Played! \nVALORANTChampions,https://twitter.com/teammercindia/status/1470252473512194048,"2021-12-13, 04:41:20",True,0.400000,1.000000,Positive
...,...,...,...,...,...,...,...,...,...
752,GeorgeCGed,1469675662701121545,My official VALORANTChampions Predictions for Day 10 \n\n- Liquid 2-1 Acend \n- Gambit 2-1 KRU \n\nThoughts?,https://twitter.com/GeorgeCGed/status/1469675662701121545,"2021-12-11, 14:29:17",True,0.000000,0.000000,Neutral
753,AdamShoto,1469666866519891976,my predictions for today \nTeam Liquid 2-1 Acend\nas long SCREAM not playing jett XD\nGambit 2-1 Kru \nunless Gambit st,https://twitter.com/AdamShoto/status/1469666866519891976,"2021-12-11, 13:54:20",True,0.400000,-0.050000,Negative
754,NoobHaruru,1469665319094403073,my predictions for today \nTeam Liquid 2-1 Acend\nGambit 2-0 Kru \nVAMOS VALORANTChampions,https://twitter.com/NoobHaruru/status/1469665319094403073,"2021-12-11, 13:48:11",True,0.000000,0.000000,Neutral
755,hex4MT,1469622975385251840,Semi Finals day! VALORANTChampions \n\nLiquid 2-1 Acend?\nGambit 2-0 KRU?\n\nhard to predict,https://twitter.com/hex4MT/status/1469622975385251840,"2021-12-11, 10:59:56",True,0.541667,-0.291667,Negative


In [60]:
df.to_csv(os.path.join(path,f'{team} ({day}).csv'))