In [None]:
import tweepy as tp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import configparser
import codecs

In [None]:
#Get your tokens on the developper portal and store it in a config.ini file
config = configparser.RawConfigParser()
config.read("config.ini")
bearer_token = config['twitter']['bearer_token']

In [None]:
#Initializing the client
client = tp.Client(bearer_token=bearer_token, wait_on_rate_limit=True)

In [None]:
#Setting up the query
keywords = 'your query'

#Retrieving the data
data= []
for tweets in tp.Paginator(client.search_recent_tweets, query=keywords,
                              max_results = 10,
    since_id=1546863444284030977,
    tweet_fields = ['id', 'author_id','created_at','text','entities'],
    user_fields = ['name','username','id', 'public_metrics'],
    expansions = ['author_id']):
    for tweet, user in zip(tweets.data, tweets.includes['users']) : 
        try :
            data.append([tweet.id, user.id, tweet.created_at, tweet.text, tweet.entities['mentions'], user.name, user.username, user.public_metrics['followers_count']])
        except:
            pass

#Turn it into a daraframe
columns = ['tweet_id', 'author_id','created_at','text','mentions', 'name','username', 'followers']

df = pd.DataFrame(data, columns=columns)
df

In [None]:
#Store the data
df.to_csv('twitterapi.csv', encoding='utf-8', index=False)

In [None]:
#Making the edges dataset
edges = df[['username', 'mentions','created_at']]
edges.columns = ['Source', 'Target', 'Weight']
edges['Target'] = edges['Target'].apply(lambda x : [x.split("'")[7+i*12] for i in range(len(x.split("'"))//12)])
edges = edges.explode('Target')
edges = edges.groupby(['Source','Target'])['Weight'].count()
edges.to_csv('edges.csv', encoding='utf-8', index=False)

In [None]:
#Making the nodes dataset
users = df[['username','followers']]

ids = edges['Source'].append(edges['Target']).to_frame()
ids['Label'] = ids
ids.columns = ['username', 'Label']
ids = ids.drop_duplicates(['username'], keep='first') 
nodes = pd.merge(ids, users, on='username', how='left')
nodes = nodes.drop_duplicates(['username'], keep='first') 

#The targets mentionned in tweets don't have the follower count in their data so we scrape it
nodesnull = nodes[nodes.followers.isna()]
usernames = nodesnull['username'].to_list()

follower_count =[]

for user in usernames:
    try :
        follower_count.append(client.get_user(username=user,user_fields='public_metrics').data.public_metrics['followers_count'])
    except:
        follower_count.append(0)

nodesnull.loc[nodesnull['followers'].isna(), 'followers'] = follower_count
nodes.loc[nodes['followers'].isna(), 'followers'] = follower_count
nodes = nodes.reset_index()
nodes = nodes[['username', 'Label', 'followers']]
nodes.columns = ['Id', 'Label', 'followers']
nodes.to_csv('nodes.csv', encoding='utf-8', index=False)