In [None]:
import os
import tweepy
import pandas as pd
import re
import webbrowser

In [None]:
# Authentication for tweepy

consumer_key = "" # Omitted for security reasons
consumer_secret = "" # Omitted for security reasons
callback_uri = 'oob' # https://cfe.sh/twitter/callback
auth = tweepy.OAuthHandler(consumer_key, consumer_secret, callback_uri)
redirect_url = auth.get_authorization_url()
webbrowser.open(redirect_url)
user_pin_input = input("What's the pin value?")
auth.get_access_token(user_pin_input) # auth.access_token & auth.access_token_secret

In [None]:
# Verifying that the api is working, prints my username (aliasidd04)

api = tweepy.API(auth, wait_on_rate_limit = True)
me = api.me()
print(me.screen_name)

In [None]:
def extract_list_as_df(timeline_list, team, playing_today):
    
    # Takes in a list of tweets, generated through tweepy, and converts the list into a pandas dataframe
    
    columns = set()
    allowed_types = [str, int]
    tweets_data = [] # List that will contain the data for the dataframe
    
    for tweet in timeline_list: # For each tweet in the list of tweets pulled from tweepy

        status_obj_url = f"https://twitter.com/{tweet['screen_name']}/status/{tweet['id']}" # Creates a link for tweet, to be added to the dataframe
        
        single_tweet_data = {"user": tweet['screen_name'], # Username of the tweeter
                             "id": tweet['id'], # Unique ID of tweets
                             "text": tweet['text'], # Text in the tweet
                             "url": status_obj_url, # URL generated above
                             "datetime": tweet['date'], # Date and time of tweet is UTC
                             "playing_today": playing_today} # Custom field I manually updated each day for each team, to help label which teams were playing on what days
        
        tweets_data.append(single_tweet_data)
        
    header_cols = list(columns) # Creating the column names of the dataframe
    header_cols.extend(("user","id", "text","url","datetime", "playing_today"))

    df = pd.DataFrame(tweets_data, columns = header_cols)    
    return df

In [None]:
def getResults(query, day):
    
    # Executes query and stores the specified fields in a list of tweets
    
    results = []

    for i, status in enumerate(tweepy.Cursor(api.search, q=query, since=day,lang="en").items(10000)):
        tweet = {}
        tweet['user_id']  = str(status.user.id)
        tweet['id'] = str(status.id)
        tweet['screen_name'] = status.user.screen_name
        tweet['text'] = status.text
        tweet['date'] = status.created_at
        tweet['date'] = tweet['date'].strftime('%Y-%m-%d, %H:%M:%S')
        results.append(tweet.copy())
        
    return results

In [None]:
team = 'acend' # Team for which the tweets are being pulled
day = '2021-12-11' # Day of the tournament for which the tweets are being pulled
playing_today = True # False if the team played on that day, True if they did
path = f'C:\\Users\\Ali\\Desktop\\Coding\\Data Science Practice\\Twitter\\CSV Files\\{day}' 
query = f"{team} #VCT OR {team} #VALORANTChampions -filter:retweets -from:valesports_na -from:ValorantEsports" # Query that is sent through tweepy

results = getResults(query,day) # Pulls tweets
df = extract_list_as_df(results, team, playing_today) # Converts to pandas dataframe