# First protocol
_Code written and runs in python 3.11.0. Modify environment variables and queries as needed._  
_Please use venv_

## Protocol
· Start with keywords:

- Smartchain

- Nft

- Airdrop

- Crypto

- …etc.

1. Sample up to 10k tweets containing at least one term from 100 random hours from the past year (so 1M tweets)

2. Determine the most engaged (top) with users from this combined sample (100 or 1000)

3. Pull up to 1000 comments for each top user

4. Determine top users whose comments mention at least three users other than the top user

5. Expand top user sample if we don’t have at least 100 airdrop seeders

6. Time series chart plots:

7. Top user activity

8. Airdrop seeder activity

9. Negative reaction activity? (based on sentiment analysis of replies to airdrop messages)

10. External crypto value signals (from where?)



# Dependencies
Run the following commands in the terminal to install the required packages

$pip install requests  
  
$pip install pandas  
  
$pip install datetime  
  
$pip install python-dateutil
  

--------------------  
create a files <data.json> in the same folder as this jupyter notebook


# Authentication step
In the code cell below replace bearer_token with your bearer token. Run the cell, then delete your bearer token.
This creates the token as an environment variable to be used under the name TOKEN. The token can then be removed so that others do not have access to your token when code is shared via GitHub. I will change this to dotenv and a .gitignore file later I just havent done that yet.

In [43]:
os.environ['TOKEN'] = ''

In [44]:
"""This cell creates functions to be used for authentication as well as creating endpoints."""
import requests

def auth():
    """Retrieves your bearer token."""
    return os.getenv('TOKEN')

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers


def create_url(keyword, start_date, end_date, max_results = 100):
    
    #Change to the endpoint you want to collect data from
    search_url = "https://api.twitter.com/2/tweets/search/all" 

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,referenced_tweets.id,entities.mentions.username,referenced_tweets.id.author_id',
                    'tweet.fields': 'id,author_id,conversation_id,created_at,in_reply_to_user_id,lang,public_metrics,referenced_tweets,source,text',
                    'user.fields': 'id,name,public_metrics,username,verified',
                    #'place.fields': 'country',
                    'next_token': {}}
    return (search_url, query_params)

def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("\n\n\n\Endpoint Response Code: " + str(response.status_code) + "\n\n\n")
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

#print(auth())

In [45]:
import datetime
import random
"""This code cell contains two functions (is_leap_year and random_date) which help generate a random one hour date range when random_date() is called"""
# Use by calling "start_time, end_time = random_date()"

def is_leap_year(year):
    """Returns True if the given year is a leap year, False otherwise."""
    if year % 4 == 0:
        if year % 100 == 0:
            if year % 400 == 0:
                return True
            else:
                return False
        else:
            return True
    else:
        return False

def random_date():
    """Generate a random one hour date range within the last year in RFC 3339 format to be used with twitter API."""
    year = random.randint(datetime.datetime.now().year - 1, datetime.datetime.now().year)
    month = random.randint(1, datetime.datetime.now().month)
    if month == datetime.datetime.now().month:
        day = random.randint(1, datetime.datetime.now().day - 1)
    elif month == 2:
        if is_leap_year(year):
            day = random.randint(1, 29)
        else:
            day = random.randint(1, 28)
    elif month in [1, 3, 5, 7, 8, 10, 12]:
        day = random.randint(1, 31)
    else:
        day = random.randint(1, 31)
    hour = random.randint(0, 23)
    start_time = datetime.datetime(year, month, day, hour)
    end_time = start_time + datetime.timedelta(hours=1)
    start_timestamp = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
    end_timestamp = end_time.strftime("%Y-%m-%dT%H:%M:%SZ")
    return start_timestamp, end_timestamp

#start_time1, end_time1 = random_date()
#print(f"The one hour range is from {start_time1} to {end_time1}")

In [46]:
"""WORKING EXAMPLE OF SO FAR, Above is for more function use etc."""
import json
import pandas as pd
import csv
from typing import Optional
import time

def tweets_per_range(keyword: str, start_times_list: list, end_times_list: list, results_per_range: int, next_token: Optional[str] = None) -> json:
    bearer_token = auth()
    headers = create_headers(bearer_token)
    json_obj_data = dict()
    for i in range(0, len(start_times_list)):
        total_count = 0
        max_results = 100
        print(f"\n\n\n\n\n\n\n\n\n\n\nSTART TIME: {start_times_list[i]} \nEND TIME: {end_times_list[i]}")
        print(f"\nkeyword: {keyword}\n start: {start_times_list[i]}\n end time: {end_times_list[i]}\n next token: {next_token}\n" )
        # Creates url and connects to endpoint then assignts the JSON STRING API response to json_response
        url = create_url(keyword, start_times_list[i], end_times_list[i], max_results)
        json_obj_response = connect_to_endpoint(url[0], headers, url[1], next_token) # prints response code

        # Appends the json object API response to the json_obj_data dictionary.
        json_obj_data.append(json_obj_response["data"])
        total_count += json_response['meta']['result_count']
        time.sleep(5)
        
        while total_count <= results_per_range:
            if 'next_token' in json_object_response['meta']:
                next_token = json_object_response['meta']['next_token']
                print(f"\n\n\n keyword: {keyword}\n start: {start_times_list[i]}\n end time: {end_times_list[i]}\n next token: {next_token}\n")

                # Creates url and connects to endpoint then assignts the JSON STRING API response to json_response
                url = create_url(keyword, start_times_list[i], end_times_list[i], max_results)
                json_response = connect_to_endpoint(url[0], headers, url[1], next_token) # prints response code

                print(json_response)
                if 'data' in json_response:
                    json_data.append(json_response['data'])
                    total_count += json_response['meta']['result_count']
                else:
                    print("empty next token: max results reached before desired amount")
                time.sleep(5)
            else:
                print("Max results reached before desired amount of tweets.")
                next_token = None
                break
            next_token = None
    if len(json_obj_data) > 10000:
        del json_obj_data[10001: len(json_obj_data)]
    return(json_obj_data)




In [47]:
"""WORKING EXAMPLE OF SO FAR, Above is for more function use etc."""
import json
import pandas as pd
import csv

bearer_token = auth()
"""input the necessary inputs below."""
headers = create_headers(bearer_token)
keyword = "Smartchain OR Airdrop OR Crypto"
# Use the commented out code below when using full access api
#start_time, end_time = random_date()
start_time = "2023-01-02T17:00:00Z"
end_time = "2023-01-04T20:00:00Z"

max_results = 10
url = create_url(keyword, start_time, end_time, max_results)
json_response = connect_to_endpoint(url[0], headers, url[1])
print(type(json_response))
#print(json_response)




\Endpoint Response Code: 200



<class 'dict'>


In [50]:
print(json.dumps(json_response, indent=15))

{
               "data": [
                              {
                                             "text": "RT @crypto_inez: $30 GIVEAWAY ~ 6 HOURS \u23f3\ufe0f\n\n- RT &amp; Follow @SparrkleCrypto",
                                             "referenced_tweets": [
                                                            {
                                                                           "type": "retweeted",
                                                                           "id": "1610562872731643905"
                                                            }
                                             ],
                                             "author_id": "1608727704194658304",
                                             "public_metrics": {
                                                            "retweet_count": 5288,
                                                            "reply_count": 0,
                                                 

In [65]:
"""This function scrapes through a JSON data file from a twitter API call and returns the most """
def analyze_retweet_counts():
    tweet_metrics_dict: dict = dict()
    for i in range(0, len(json_response['data'])):
        if 'referenced_tweets' in json_response['data'][i] and json_response['data'][i]['referenced_tweets'][0]['type'] == "retweeted":
            original_tweet_id_from_retweet = json_response['data'][i]['referenced_tweets'][0]['id']
            if original_tweet_id_from_retweet in tweet_metrics_dict:
                tweet_metrics_dict[original_tweet_id_from_retweet] += 1
            else:
                tweet_metrics_dict[original_tweet_id_from_retweet] = 1
        else:
            this_tweet_id = json_response['data'][i]['id']
            if this_tweet_id in tweet_metrics_dict:
                tweet_metrics_dict[this_tweet_id] += 1
            else:
                tweet_metrics_dict[this_tweet_id] = 1
    return(tweet_metrics_dict)

analyze_retweet_counts()

{'1610562872731643905': 1,
 '1610622349107417090': 1,
 '1610727764369211408': 1,
 '1610727744039534592': 1,
 '1610727762578251776': 1,
 '1610727464296382464': 1,
 '1610637175317487616': 1,
 '1610654370726649859': 1,
 '1610714117538562053': 1}

In [62]:
rand_dict = dict()
rand_dict['num'] = 1
print(rand_dict['num'])

1


# Main Working Cell

The below cell is intended to be the final cell which ties together all functions into the 'solution' to the protocol

In [None]:
# Use the commented out code below when using full access api
# #start_time, end_time = random_date()
start_list = ["2023-01-10T17:00:00Z", "2023-01-11T17:00:00Z"] #"2023-01-08T17:00:00Z", 
end_list = ["2023-01-10T18:00:00Z", "2023-01-11T18:00:00Z"] #"2023-01-08T18:00:00Z",

json_final_data = tweets_per_range("Smartchain OR Airdrop OR Crypto OR Nft", start_list, end_list, 200)

json_to_file = json.dumps(json_final_data, indent=4)
with open("data.json", "w") as outfile:
    outfile.write(json_to_file)