In [1]:
#Code by Saad Khalid

import requests
import os
import json
import numpy as np
import pandas as pd
import time
from datetime import date
from dateutil.relativedelta import relativedelta

bearer_token = ''
 #copy bearer_token here

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

#this manages the http request
def connect_to_endpoint(url, headers, params):
    response = requests.request("GET", url, headers=headers, params=params)
    print(response.status_code)
    while(response.status_code == 429):
        time.sleep(6000)
        print(response.status_code)
        response = requests.request("GET", url, headers=headers, params=params)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    #print(response.text)
    return response.json()

#takes id of a twitter post and outputs all the people who retweeted that post as a list of strings
def get_retweeters(id):
    tweet_search_url = "https://api.twitter.com/2/tweets/{}".format(id)
    headers = create_headers(bearer_token)
    main_tweet_query_params = {'tweet.fields': 'created_at,author_id'}
    main_tweet_json = connect_to_endpoint(tweet_search_url, headers, main_tweet_query_params)
    main_tweet_text = main_tweet_json['data']['text']
    main_tweet_author = main_tweet_json['data']['author_id']
    
    retweeters_id = []
    all_search_url = "https://api.twitter.com/2/tweets/search/all"
    retweeters_query_params = {'query': '\"{}\" retweets_of:{}'.format(main_tweet_text,main_tweet_author),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '2012-01-01T23:00:00Z'}
    retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
    retweeters_id.append([tweet['author_id'] for tweet in retweeters_json['data']])
    time.sleep(4)
    while len(retweeters_json['meta']) > 3:
        retweeters_query_params = {'query': '\"{}\" retweets_of:{}'.format(main_tweet_text,main_tweet_author),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '2012-01-01T23:00:00Z', 'next_token': '{}'.format(retweeters_json['meta']['next_token'])}
        retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
        retweeters_id.append([tweet['author_id'] for tweet in retweeters_json['data']])
        time.sleep(4)
    flattened_retweeters_id = [id for page in retweeters_id for id in page]
    return flattened_retweeters_id

#retweeter_id is the string id of a user who is known to have retweeted some prominent
# figure, whose id is given in figure_id.
#Returns date that retweeteter had retweeted the figure "min_retweets" number of times
def follow_date(retweeter_id, figure_id): 
    all_search_url = "https://api.twitter.com/2/tweets/search/all"
    headers = create_headers(bearer_token)
    
    retweet_count = 0
    end_year = 2013
    min_retweets = 3 #consider them as starting following the figure after 3 retweets
    date = ''
    while retweet_count < min_retweets: 
        retweeters_query_params = {'query': 'retweets_of:{} from:{}'.format(figure_id,retweeter_id),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '2012-01-01T23:00:00Z', 'end_time': '{}-01-01T23:00:00Z'.format(str(end_year))}
        retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
        if retweeters_json['meta']['result_count'] > 0:
            retweet_count = retweeters_json['meta']['result_count']
            date = 0
            if retweet_count >= min_retweets:
                date = retweeters_json['data'][-3]['created_at']
            while len(retweeters_json['meta']) > 3: #paginating, running until the last page of retweets
                retweeters_query_params = {'query': 'retweets_of:{} from:{}'.format(figure_id,retweeter_id),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '2012-01-01T23:00:00Z', 'end_time': '{}-01-01T23:00:00Z'.format(str(end_year)), 'next_token': '{}'.format(retweeters_json['meta']['next_token'])}
                retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
                date = retweeters_json['data'][-3]['created_at']
                time.sleep(4)
        else: #if there are no results for a range of years, increase range of years by 1
            end_year = end_year + 1
            
        time.sleep(4)
    return date

def get_user_activity(user_id, follow_date):#check the average user activity in a radius around the date the user starts following figure
    all_search_url = "https://api.twitter.com/2/tweets/search/all"
    headers = create_headers(bearer_token)
    month_rad = 1#count number of tweets from follow_date - month_rad to follow_date + month_rad
    
    full_beginning_date_list = list(follow_date)
    full_ending_date_list = list(follow_date)
    extracted_date = follow_date[0:10]
    
    beginning_date = (date.fromisoformat(extracted_date) + relativedelta(months=-1*month_rad)).isoformat()
    full_beginning_date_list[0:10] = list(beginning_date)
    full_beginning_date_str = "".join(full_beginning_date_list)
    ending_date = (date.fromisoformat(extracted_date) + relativedelta(months=month_rad)).isoformat()
    full_ending_date_list[0:10] = list(ending_date)
    full_ending_date_str = "".join(full_ending_date_list)
    retweeters_query_params = {'query': 'from:{}'.format(user_id),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '{}'.format(full_beginning_date_str), 'end_time': '{}'.format(full_ending_date_str)}
    retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
    tweets = []
    tweets.append(retweeters_json['data'])
    time.sleep(4)
    while len(retweeters_json['meta']) > 3:
        retweeters_query_params = {'query': 'from:{}'.format(user_id),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '{}'.format(full_beginning_date_str), 'end_time': '{}'.format(full_ending_date_str), 'next_token': '{}'.format(retweeters_json['meta']['next_token'])}
        retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
        tweets.append(retweeters_json['data'])
        time.sleep(4)
    tweets_flat = [tweets for page in tweets for tweets in page]
    total_tweets = len(tweets_flat)
    return [total_tweets/(2*month_rad),tweets_flat]

def get_monthly_tweets_list(user_id,full_date):
    all_search_url = "https://api.twitter.com/2/tweets/search/all"
    headers = create_headers(bearer_token)
    all_tweets = []
    num_Months_Before = 3
    num_Months_After = 6
    
    for month in range(-1*num_Months_Before,num_Months_After):
        full_beginning_date_list = list(full_date)
        full_ending_date_list = list(full_date)
        extracted_date = full_date[0:10]
        
        beginning_date = (date.fromisoformat(extracted_date) + relativedelta(months=month)).isoformat()
        full_beginning_date_list[0:10] = list(beginning_date)
        full_beginning_date_str = "".join(full_beginning_date_list)
        
        ending_date = (date.fromisoformat(extracted_date) + relativedelta(months=month+1)).isoformat()
        full_ending_date_list[0:10] = list(ending_date)
        full_ending_date_str = "".join(full_ending_date_list)
        
        retweeters_query_params = {'query': 'from:{}'.format(user_id),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '{}'.format(full_beginning_date_str), 'end_time': '{}'.format(full_ending_date_str)}
        retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
        months_tweets = []
        months_tweets.append(retweeters_json['data'])
        time.sleep(4)
        while len(retweeters_json['meta']) > 3:
            retweeters_query_params = {'query': 'from:{}'.format(user_id),'tweet.fields': 'author_id,created_at,public_metrics', 'max_results': '499', 'start_time': '{}'.format(full_beginning_date_str), 'end_time': '{}'.format(full_ending_date_str), 'next_token': '{}'.format(retweeters_json['meta']['next_token'])}
            retweeters_json = connect_to_endpoint(all_search_url, headers, retweeters_query_params)
            months_tweets.append(retweeters_json['data'])
            time.sleep(4)
        monthly_tweets_flat = [tweets for page in months_tweets for tweets in page]
        all_tweets.append(monthly_tweets_flat)
        time.sleep(4)
        
    return all_tweets

In [3]:
TL_tweet_id= '1024078344113942528' #id of specific tweet by TL
TL_id= '468646961'
TL_retweeters = get_retweeters(TL_tweet_id)
TL_user_follow_DT = []
#get follow date
for retweeter in TL_retweeters:
    follow_DT = follow_date(retweeter, TL_id)
    TL_user_follow_DT.append([retweeter, follow_DT])

#check user activity
for i in range(len(TL_user_follow_DT)):
    activity = get_user_activity(TL_user_follow_DT[i][0],TL_user_follow_DT[i][1])
    TL_user_follow_DT[i].append(activity[0])
    print(i)

#change file path below to where you want the user_ids and dates to be saved
(pd.DataFrame(TL_user_follow_DT)).to_csv(r'C:\Users\xzono\Documents\SocialNetworkers\TL_user_follow_DT_tweet_{}'.format(TL_tweet_id), index = False, header = False)

all_tweets = []
for i in range(len(TL_user_follow_DT)):
    if TL_user_follow_DT[i][2] > 15: #ie if they average more than 50 tweets a month
        users_tweets = get_monthly_tweets_list([TL_user_follow_DT[0],TL_user_follow_DT[1]])
        (pd.DataFrame(users_tweets)).to_csv(r'C:\Users\xzono\Documents\SocialNetworkers\export_tweets_{}.csv'.format(TL_user_follow_DT[i][0]), index = False, header = False)
        #all_tweets.append(users_tweets)
        #all_tweets_df = pd.DataFrame(all_tweets)
        print(i)
        #change file path below to where you want the tweets to be saved
        #all_tweets_df.to_csv(r'C:\Users\xzono\Documents\SocialNetworkers\export_tweets.csv', index = False, header = False)
        break

#all_tweets_df = pd.DataFrame(all_tweets)
#change file path below to where you want the tweets to be saved
#all_tweets_df.to_csv(r'C:\Users\xzono\Documents\SocialNetworkers\export_tweets.csv', index = False, header = False)

200
200


KeyError: 'data'

In [12]:
all_tweets

NameError: name 'all_tweets' is not defined