## Importing Python modules

In [1]:
import os
import pandas as pd
import requests
import time
import json
from datetime import datetime
from twarc import Twarc2, expansions

## Input your Twitter Bearer Token, query, and date

In [18]:
TOKEN = "YOUR TWITTER API TOKEN"
query = "(#millerlite) lang:en"
start_time = "2022-01-01T00:00:00Z"
end_time = '2023-01-01T00:00:00Z'

In [19]:
os.environ["BEARER_TOKEN"] = TOKEN
bearer_token = os.environ["BEARER_TOKEN"]

def auth():
    return os.environ.get("BEARER_TOKEN")

next_token = ""
def create_url():
    if next_token == "":
        return "https://api.twitter.com/2/tweets/search/all"
    else: 
        return "https://api.twitter.com/2/tweets/search/all?next_token={}".format(next_token)
    
search_url = create_url()
    
query_params = {'query': query, # search query
                'tweet.fields': "author_id,created_at,public_metrics,in_reply_to_user_id",
                'start_time': start_time, # data collection start time
                'end_time': end_time,   # data collection end time
                'max_results': 300,
                "expansions":"referenced_tweets.id"}

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def connect_to_endpoint(url, headers, params):
    response = requests.request("GET", search_url, headers=headers, params=params)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

## Crawling Start

In [21]:
df = pd.DataFrame() 
retweet_df_disk = pd.DataFrame() 

next_token = ''
full_text = ''
row = ''

count = 0

full_text_column = []
includes_id = []
includes_retweet = []


while True:
    bearer_token = auth()
    search_url = create_url()
    headers = create_headers(bearer_token)    
    json_response = connect_to_endpoint(search_url, headers, query_params)
    
    tweet = pd.DataFrame.from_dict(json_response['data']) 
    try:
        tweet_1 = tweet.explode('referenced_tweets')
        tweet_1 = tweet_1.referenced_tweets.apply(pd.Series)
        tweet_2 = tweet_1[["id", "type"]]
        tweet_2.columns = ['retweet_id', 'type']
        tweets = pd.concat([tweet, tweet_2], axis=1)
    
        ### data file
        df = df.reset_index(drop=True)
        df = pd.concat([df, tweets], axis=0)

        result_count = json_response['meta']['result_count']
        count += result_count
        print(count)
        time.sleep(3)

    except KeyError:
        pass
    
    try:
        includes_range = range(len(json_response['includes']['tweets']))
        includes_id = []
        includes_retweet = []
            
        for i in includes_range:
            id_info = json_response['includes']['tweets'][i]['id']
            text = json_response['includes']['tweets'][i]['text']

            includes_id.append(id_info)
            includes_retweet.append(text)
            retweet_df = ''
            retweet_df = pd.DataFrame(list(zip(includes_id, includes_retweet)), 
                       columns =['retweet_id', 'text'])
    
    except KeyError:
        pass
        
    retweet_df_disk = pd.concat([retweet_df_disk, retweet_df], axis=0)
    retweet_df_disk = retweet_df_disk.drop_duplicates()
                
    if 'next_token' in json_response['meta']:
        next_token = json_response['meta']['next_token']
    else:
        break
        
engagement = df['public_metrics'].apply(pd.Series)
df = pd.concat([df[['id', 'author_id','created_at', 'text', 'type', 'retweet_id']], engagement], axis=1)
final_data = df.merge(retweet_df_disk, on='retweet_id', how='left')
final_data['retweet_account'] = final_data.text_x.str.extract(r'(RT \@\w+)', expand = True)
final_data.text_y.fillna(final_data.text_x, inplace=True)
final_data.type.fillna("original", inplace=True)
final_data['text_z'] = final_data[['retweet_account', 'text_y']].apply(lambda x: ' '.join(x.dropna()), axis=1)
final_data = final_data[['id', 'author_id', 'created_at', 'type', 'retweet_count', 'reply_count', 'like_count', 'quote_count', 'text_z']].rename(columns={"text_z": "text"})

client = Twarc2(bearer_token=TOKEN)
user_id_list = final_data["author_id"]
lookup = client.user_lookup(users=user_id_list)

username = []
follower_n = []
following_n = []

for page in lookup:
    result = expansions.flatten(page)
    for user in result:
        username.append(user['username'])
        follower_n.append(user['public_metrics']['followers_count'])
        following_n.append(user['public_metrics']['following_count'])
        
final_data["username"] = username
final_data["follower_count"] = follower_n
final_data["following_count"] = following_n
final_data

225
498
774
1067
1362
1656
1953
2249
2544
2835
3123
3420
3719
4005
4299
4586
4854
5141
5384
5658
5950
6127


Unnamed: 0,id,author_id,created_at,type,retweet_count,reply_count,like_count,quote_count,text,username,follower_count,following_count
0,1609337958045462531,2597149692,2022-12-31T23:57:23.000Z,replied_to,0,0,0,0,"New Year's resolutions are hard to keep, but t...",MKolodiej,162,984
1,1609329695547396098,712817719838707713,2022-12-31T23:24:33.000Z,original,0,0,1,0,Let's go Griffins! Let's end 2022 with a win!...,MrsTerrifier22,38,95
2,1609040320418963456,1149437094,2022-12-31T04:14:41.000Z,replied_to,0,0,0,0,It’s hard to keep New Year's resolutions. That...,shondaterry1,118,665
3,1609009902512656390,945849495517925376,2022-12-31T02:13:49.000Z,original,0,1,9,0,So my @BudLightNext @nounsdao glasses just arr...,SammyCryptoKing,605,1353
4,1608919617132728320,1588597758377791489,2022-12-30T20:15:03.000Z,replied_to,0,0,0,0,This is the Patriot Front white supremacists. ...,JaberIzz,11,369
...,...,...,...,...,...,...,...,...,...,...,...,...
6123,1477139096921419781,1326716539542908928,2022-01-01T04:46:19.000Z,original,0,0,1,0,#Legend in the front row of Nashville New Year...,iowa_podcast,847,1744
6124,1477127444150853638,4865350201,2022-01-01T04:00:01.000Z,original,0,0,0,0,Hey Forge fam! Save this flyer to your phone a...,1039forge,229,709
6125,1477116620220686336,1431002237656993793,2022-01-01T03:17:00.000Z,retweeted,1,0,0,0,RT @TeresaP81069921 So good I had to share! Ch...,StellaHposher,331,235
6126,1477088170407833601,1473144801105219588,2022-01-01T01:23:57.000Z,replied_to,0,1,0,0,Can’t beat it! @OliverQuickle @BuckCummings3 h...,Ronchee42,24,32


## Daily Tweet Volume

In [22]:
final_data['date'] = pd.to_datetime(final_data['created_at']).dt.date
daily_volume = final_data.groupby(['date'])['date'].count().to_frame("freq")

## Export the data as an excel file

In [24]:
final_data.to_excel("Miller.xlsx")
daily_volume.to_excel("Miller_daily_volume.xlsx")