First, we will import all the needed libraries

In [None]:
from tweepy import OAuthHandler
from tweepy import Client
import tweepy
import neo4j.time
from py2neo import Node, Graph, Relationship
import networks
from datetime import datetime
from creds import *

Next, we will import the API credentials. \
**access_token (Authentication Access Token):** Username of the Twitter account the request is made on behalf of \
**access_token_secret (Authentication Access Token Secret):** Password of the Twitter account the request is made on behalf of\
**consumer_key (Consumer API Key):** Username that represents your App when making API requests \
**consumer_secret (Consumer API Key Secret):** Password that represents your App when making API requests \
**bearer_token (Bearer Token):** Bearer Token authenticates requests on behalf of your developer App 

In [None]:
consumer_key = consumer_api_key 
consumer_secret = consumer_api_key_secret
bearer = bearer_token
access_token = authentication_access_token
access_token_secret = authentication_access_token_secret

In [None]:
client = Client(bearer_token=bearer) # creating client for our developer app
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth) # creating an endpoint for our twitter account to access the api

Here, we will define methods that will help with cleaning up the results from our querying of the API and insertion of the Tweet data into a Neo4J graph. We can even  calculate each account's age, which is an important metric for detecting potential fake accounts. This link to the documentation for datetimes in Neo4J will explain  the approach for making the dates retrieved from Twitter compatible for querying within Neo4J.

In [None]:
def get_tweet_attributes(tweet_id, attributes):
    """Returns the attributes of a particular tweet
    
    tweet_id:  string or integer representation of the tweet's ID
    attributes: list of desired attributes
    """
    tweet = api.get_status(str(tweet_id))
    i = 0
    for a in attributes:
        print(attributes[i],": ",getattr(tweet, a))
        i += 1
    return 0
        
def get_author_attributes(author_id, attributes):
    """Returns the attributes of a particular tweet
    
    author_id:  string or integer representation of the tweet's ID
    attributes: list of desired attributes
    """
    author = api.get_user(user_id=author_id)
    
    created = datetime.fromisoformat(author.created_at.strftime("%Y-%m-%d"))
    today = datetime.fromisoformat(datetime.now().strftime("%Y-%m-%d"))
    accountAge = (today - created).days
    
    i = 0
    for a in attributes:
        print(attributes[i],": ",getattr(author, a))
        i += 1
    print("account_age (at time of querying): ", accountAge, "days")
    return 0



def create_tweet_node(graph, tweet_id, tweet_attributes):
    
    tweet = api.get_status(str(tweet_id))
    author_id = tweet.author.id
    author = api.get_user(user_id=author_id)
    
    tweet_created = datetime.fromisoformat(tweet.created_at.strftime("%Y-%m-%d"))
    
    
    
    
# def create_author_node()

Now, let's explore what attributes we can get about each tweet and author by using the dir() command. We will use a random tweet's ID from a previous query to do this.

In [None]:
tweet_id = 1519034348833587204
tweet = api.get_status(tweet_id)
print("Possible Tweet Attributes: \n\n",dir(tweet),'\n\n')

author_id = tweet.author.id
author = api.get_user(user_id=author_id)
print("Possible Author Attributes: \n\n",dir(author))

Now, we define the attributes that we would like to obtain from querying the API. First, however, we must choose a topic for our query. You can addd in your own fields as you see fit. For an idea of what fields are available, check out the link here: https://developer.twitter.com/en/docs/twitter-api/fields. \
After getting our response from the client, we select the tweet attributes that we want to extract from the tweets we have retrieved.

In [None]:
query = 'ukraine'
response = client.search_recent_tweets(query=query,
                                       max_results=100,
                                       tweet_fields=['id',
                                                     'author_id',
                                                     'referenced_tweets'])
# Type in your desired tweet attributes from the list of tweet attributes above
tweet_attributes = ['id',
                    'created_at',
                    'favorite_count',
                    'lang',
                    'geo',
                    'place',
                    'retweet_count',
                    'source_url',
                    'text',
                    'entities']

# Type in your desired author attributes from the list of author attributes above
author_attributes = ['name',
                     'id',
                     'created_at',
                     'time_zone',
                     'protected',
                     'lang',
                     'followers_count',
                     'friends_count']

Below, we print each tweet we have retrieved to get an idea of what information we have retrieved. Seems like we have a lot of useful information- enough to even create a graph Database to visualize relationships.  \
\
Quick note that every supported languages' language code can be found here: https://developer.twitter.com/en/docs/twitter-for-websites/supported-languages.

In [None]:
for i in response.data:
    print("Author \n")
    print("*"*15)
    get_author_attributes(i.author_id, author_attributes)
    print("\nTweet\n")
    print("*"*15)
    get_tweet_attributes(i.id, tweet_attributes)
    if i.referenced_tweets is not None:
        print(i.referenced_tweets[0].get("type"), i.referenced_tweets[0].get("id"))    
    print("-" * 30)

Note that the code below is quite monolithic and is not as efficient as it could be. This is an initial, brute force approach to creating a graph with nodes along with their authors and referenced tweets. It accomplishes the goal, but it can definitely be cleaned up.  For instance, there can be separate methods for creating nodes for tweets while handling referenced tweets, nodes for authors, relationships, and calculating account age.

In [None]:
graph  = Graph("bolt://localhost:7687", user="neo4j", password="password1")
tweets = []
authors = []
counter = 0
for i in response.data:
    # Creating tweet node
    tweets.append(Node("tweet", 
                       tweetID= i.id,
                       authorID= i.author_id,
                       text=i.text,
                       ReferencedTweets=str(i.referenced_tweets)
                      ))
    # Creating author node
    author = api.get_user(user_id=i.author_id)
    created = datetime.fromisoformat(author.created_at.strftime("%Y-%m-%d"))
    today = datetime.fromisoformat(datetime.now().strftime("%Y-%m-%d"))
    accountAge = (today - created).days
    authors.append(Node("author",
                        authorID=author.id,
                        username=author.name,
#                         created_at=author.created_at,
                        time_zone=author.time_zone,
                        protected=author.protected,
                        language=author.lang,
                        follower_count=author.followers_count,
                        account_age=accountAge))
    graph.create(authors[counter])
    authored = Relationship(authors[counter], "tweeted", tweets[counter])
    graph.create(authored)
    # Dealing with the tweet that is being  referenced by our current tweet
    if i.referenced_tweets is not None:
        ref_id = i.referenced_tweets[0].get("id")
        ref_type = i.referenced_tweets[0].get("type")
        tweet = api.get_status(str(ref_id))
        tweets[counter][ref_type] = ref_id
        referenced = Node("tweet",
                       tweetID= ref_id,
                       authorID = tweet.author.id,
                       text=tweet.text)
        graph.create(tweets[counter])
        graph.create(referenced)
        relationship = Relationship(tweets[counter], ref_type, referenced)
        graph.create(relationship)
        # Create author node for referenced tweet.
        ref_author = api.get_user(user_id=tweet.author.id)
        created = datetime.fromisoformat(author.created_at.strftime("%Y-%m-%d"))
        today = datetime.fromisoformat(datetime.now().strftime("%Y-%m-%d"))
        accountAge = (today - created).days
        ref_author_node = (Node("author",
                          authorID=ref_author.id,
                          username=ref_author.name,
                          time_zone=ref_author.time_zone,
                          protected=ref_author.protected,
                          language=ref_author.lang,
                          follower_count=ref_author.followers_count,
                          account_age=accountAge))
        graph.create(ref_author_node)
        ref_relationship = Relationship(ref_author_node, "tweeted", referenced)
        graph.create(ref_relationship)
    else:
        graph.create(tweets[counter])
    counter += 1

In [None]:
if response.data[0].referenced_tweets is not None:
    print(response.data[0].referenced_tweets[0].get("id"))
    print(response.data[0].referenced_tweets[0].get("type"))