# First protocol
_Code written and runs in python 3.11.0. Progresses from top to botttom. Go to bottom and run all for proper use. Modify environment variables and queries as needed._  
_Markdown cells will highlight functionality of following code cell. Functionality will be derived from the list below with the numbered list being referenced in markdown cells._

## Protocol
· Start with keywords:

- Smartchain

- Nft

- Airdrop

- Crypto

- …etc.

1. Sample up to 10k tweets containing at least one term from 100 random hours from the past year (so 1M tweets)

2. Determine the most engaged (top) with users from this combined sample (100 or 1000)

3. Pull up to 1000 comments for each top user

4. Determine top users whose comments mention at least three users other than the top user

5. Expand top user sample if we don’t have at least 100 airdrop seeders

6. Time series chart plots:

7. Top user activity

8. Airdrop seeder activity

9. Negative reaction activity? (based on sentiment analysis of replies to airdrop messages)

10. External crypto value signals (from where?)



# Dependencies
Run the following commands in the terminal to install the required packages

$pip install requests  
$pip install pandas  
$pip install datetime  
$pip install python-dateutil

--------------------  
create two files <data.json> and <data.csv> in the same folder as this jupyter notebook


# Authentication step
In the code cell below replace bearer_token with your bearer token. Run the cell, then delete your bearer token.
This creates the token as an environment variable to be used under the name TOKEN. The token can then be removed so that others do not have access to your token when code is shared via GitHub. I will change this to dotenv and a .gitignore file later I just havent done that yet.

In [68]:
os.environ['TOKEN'] = ''

In [69]:
"""This cell creates functions to be used for authentication as well as creating endpoints."""
import requests

def auth():
    """Retrieves your bearer token."""
    return os.getenv('TOKEN')

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers


def create_url(keyword, start_date, end_date, max_results = 10):
    
    #Change to the endpoint you want to collect data from
    search_url = "https://api.twitter.com/2/tweets/search/recent" 

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id',
                    'tweet.fields': 'id,text,author_id,public_metrics',
                    'user.fields': 'id,name,username,created_at',
                    #'place.fields': 'country',
                    'next_token': {}}
    return (search_url, query_params)

def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

print(auth())




In [60]:
import datetime
import random
"""This code cell contains two functions (is_leap_year and random_date) which help generate a random one hour date range when random_date() is called"""
# Use by calling "start_time, end_time = random_date()"

def is_leap_year(year):
    """Returns True if the given year is a leap year, False otherwise."""
    if year % 4 == 0:
        if year % 100 == 0:
            if year % 400 == 0:
                return True
            else:
                return False
        else:
            return True
    else:
        return False

def random_date():
    """Generate a random one hour date range within the last year in RFC 3339 format to be used with twitter API."""
    year = random.randint(datetime.datetime.now().year - 1, datetime.datetime.now().year)
    month = random.randint(1, datetime.datetime.now().month)
    if month == datetime.datetime.now().month:
        day = random.randint(1, datetime.datetime.now().day - 1)
    elif month == 2:
        if is_leap_year(year):
            day = random.randint(1, 29)
        else:
            day = random.randint(1, 28)
    elif month in [1, 3, 5, 7, 8, 10, 12]:
        day = random.randint(1, 31)
    else:
        day = random.randint(1, 31)
    hour = random.randint(0, 23)
    start_time = datetime.datetime(year, month, day, hour)
    end_time = start_time + datetime.timedelta(hours=1)
    start_timestamp = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
    end_timestamp = end_time.strftime("%Y-%m-%dT%H:%M:%SZ")
    return start_timestamp, end_timestamp

#start_time1, end_time1 = random_date()
#print(f"The one hour range is from {start_time1} to {end_time1}")

In [61]:
"""Creates a function called write_to_csv which allows for writing a json file to a csv file."""
import json
import csv
 

def write_to_csv():
    # Opening JSON file and loading the data
    # into the variable data
    with open('data.json') as json_file:
        data = json.load(json_file)
 
    tweet_data = data['data']
 
    # now we will open a file for writing
    data_file = open('data.csv', 'w')
 
    # create the csv writer object
    csv_writer = csv.writer(data_file)
 
    # Counter variable used for writing
    # headers to the CSV file
    count = 0
 
    for head in tweet_data:
        if count == 0:
 
            # Writing headers of CSV file
            header = head.keys()
            csv_writer.writerow(header)
            count += 1
 
        # Writing data of CSV file
        csv_writer.writerow(head.values())
 
    data_file.close()

In [64]:
# Test list until i get full access endpoint and can use the random generated hour ranges.
start_list = ["2023-01-08T17:00:00Z", "2023-01-10T17:00:00Z", "2023-01-11T17:00:00Z"]
end_list = ["2023-01-08T18:00:00Z", "2023-01-10T18:00:00Z", "2023-01-11T18:00:00Z"]

In [65]:
import csv
import time


def tweets_per_hour_range(keyword: str):
    """This function returns 10,000 tweets (containing keyword) per 1 hour range from 100 random hour ranges in the past year."""
    bearer_token = auth()
    headers = create_headers(bearer_token)
    # Creates 100 random one hour ranges in the past year
    """
    start_list = list()
    end_list = list()
    for _ in range(0, 100):
        start_time, end_time = random_date()
        while start_time in start_list:
            start_time, end_time = random_date()
        start_list.append(start_time)
        end_list.append(end_time)
    """

    total_tweets: int = 0
    max_results: int = 100

    # Create file data.csv
    csvFile = open("data.csv", "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    #Create headers for the data you want to save, in this example, we only want save these columns in our dataset
    csvWriter.writerow(['author id', 'created_at', 'id', 'like_count', 'reply_count','retweet_count', 'source', 'tweet'])
    csvFile.close()

    for i in range(0,len(start_list)):

        # Inputs
        count: int = 0 # Counting tweets per time period
        max_count: int = 400 # Max tweets per time period
        flag: bool = True
        next_token: str = None
    
        # Check if flag is true
        while flag:
            # Check if max_count reached
            if count >= max_count:
                break
            print("-------------------")
            print("Token: ", next_token)
            url = create_url(keyword, start_list[i],end_list[i], max_results)
            json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
            result_count = json_response['meta']['result_count']

            if 'next_token' in json_response['meta']:
                # Save the token to use for next call
                next_token = json_response['meta']['next_token']
                print("Next Token: ", next_token)
                if result_count is not None and result_count > 0 and next_token is not None:
                    print("Start Date: ", start_list[i])
                    write_to_csv(json_response, "data.csv")
                    count += result_count
                    total_tweets += result_count
                    print("Total # of Tweets added: ", total_tweets)
                    print("-------------------")
                    time.sleep(5)                
            # If no next token exists
            else:
                if result_count is not None and result_count > 0:
                    print("-------------------")
                    print("Start Date: ", start_list[i])
                    write_to_csv(json_response, "data.csv")
                    count += result_count
                    total_tweets += result_count
                    print("Total # of Tweets added: ", total_tweets)
                    print("-------------------")
                    time.sleep(5)
            
                #Since this is the final request, turn flag to false to move to the next time period.
                flag = False
                next_token = None
            time.sleep(5)
    print("Total number of results: ", total_tweets)

# Main Working Cell

The below cell is intended to be the final cell which ties together all functions into the 'solution' to the protocol

In [66]:
import json
import pandas as pd
import csv

tweets_per_hour_range("Smartchain OR Airdrop OR Crypto")

-------------------
Token:  None
Endpoint Response Code: 200
Next Token:  b26v89c19zqg8o3fqk406o5tclcda1x6vzad62syn575p
Start Date:  2023-01-08T17:00:00Z


TypeError: write_to_csv() takes 0 positional arguments but 2 were given

In [45]:
"""WORKING EXAMPLE OF SO FAR, Above is for more function use etc."""
import json
import pandas as pd
import csv

bearer_token = auth()
"""input the necessary inputs below."""
headers = create_headers(bearer_token)
keyword = "Smartchain OR Airdrop OR Crypto"
# Use the commented out code below when using full access api
#start_time, end_time = random_date()
start_time = "2023-01-02T17:00:00Z"
end_time = "2023-01-04T20:00:00Z"

max_results = 100
url = create_url(keyword, start_time, end_time, max_results)
json_response = connect_to_endpoint(url[0], headers, url[1])
result_dict = json_response["data"]
#while "next_token" in json_response["meta"] and len(result_dict) < 9900:
    #print(json.dumps(json_response, indent=4, sort_keys=True))
    #json_response = connect_to_endpoint(url[0], headers, url[1], json_response["meta"]["next_token"])
    #result_dict.extend(json_response["data"])
#print(json.dumps(json_response, indent=4, sort_keys=True))
print(json.dumps(result_dict, indent=4, sort_keys=True))

#def append_to_csv(json_response, fileName):
with open('data.json', 'w') as f:
    json.dump(json_response, f)
#df = pd.read_json('data.json')
write_to_csv()
#url = create_url(keyword, start_time, end_time, max_results)
#json_response = connect_to_endpoint(url[0], headers, url[1])
#print(json.dumps(json_response, indent=4, sort_keys=True))

Endpoint Response Code: 400


Exception: (400, '{"errors":[{"parameters":{"max_results":["1000"]},"message":"The `max_results` query parameter value [1000] is not between 10 and 100"}],"title":"Invalid Request","detail":"One or more parameters to your request was invalid.","type":"https://api.twitter.com/2/problems/invalid-request"}')