## Task 2: Mastodon API

### Step 1: Create a Mastodon Application
1. Log-In to your Mastodon Server (eg. https://sciences.social/)
2. Under Preferences, click on Development
3. Click on New Application
4. Select functionalities i.e. read and write access the new application needs. For the purposes of this tutorial we will only be using 'read' functions.
5. Copy the 'Access Token' and store it securely

### Required packages
pip3 install httpx

In [25]:
import httpx
import getpass
from IPython.core import display
from dateutil import parser
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
# from mastodon import Mastodon
# ks3yuAI7JcF3VhhA8euLqN1MHvMrpyzd4l2HOs1udac

In [4]:
my_instance = "https://sciences.social/" # eg. https://mastodon.social/

In [5]:
token=getpass.getpass()

········


In [6]:
client = httpx.Client(headers=dict(Authorization=f"Bearer {token}"))

In [7]:
res = client.get("{}/api/v1/accounts/verify_credentials".format(my_instance))

In [8]:
res.raise_for_status()
result = res.json()
result["id"], result["username"]

('109334421628967389', 'anmolpanda')

In [9]:
usernames = [
    'https://mastodon.social/@RepShontelBrown'
]

In [51]:
def parse(result):
    result.raise_for_status()
    return result.json()

def get_date_from_string(date_str):
    return datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ')

def get_toots_of_user(username):
    account = parse(client.get("https://sciences.social/api/v1/accounts/lookup?acct={}".format(username)))
    id_ = account['id']
    result = client.get("https://sciences.social/api/v1/accounts/{}/statuses?limit=40".format(id_))
    all_statuses = []
    statuses = parse(result)
    all_statuses.extend(statuses)
    while len(statuses) > 0:
        print(len(all_statuses))
        result = client.get("https://sciences.social/api/v1/accounts/{}/statuses?limit=40&max_id={}".format(id_, all_statuses[-1]['id']))
        statuses = parse(result)
        all_statuses.extend(statuses)
    return all_statuses


def get_id_from_date(date):
    # Code for this function taken from: 
    # https://shkspr.mobi/blog/2022/11/building-an-on-this-day-service-for-mastodon/
    return (int(date.timestamp()) << 16)*1000
    
def get_toots_of_user_between_dates(username, start_date, end_date):
    account = parse(client.get("https://sciences.social/api/v1/accounts/lookup?acct={}".format(username)))
    id_ = account['id']
    max_id = get_id_from_date(end_date)
    min_id = get_id_from_date(start_date)
    result = client.get("https://sciences.social/api/v1/accounts/{}/statuses?limit=40&max_id={}".format(id_, max_id))
    all_statuses = []
    statuses = parse(result)
    all_statuses.extend(statuses)
    while True:
        print(len(all_statuses))
        result = client.get("https://sciences.social/api/v1/accounts/{}/statuses?limit=40&max_id={}".format(id_, all_statuses[-1]['id']))
        statuses = parse(result)
        all_statuses.extend(statuses)
        earliest_date = get_date_from_string(all_statuses[-1]['created_at'])
        if earliest_date < start_date or len(statuses) == 0:
            break
    return all_statuses
    

# https://shkspr.mobi/blog/2022/11/building-an-on-this-day-service-for-mastodon/

def get_followers_of_user(server, username):
    account = parse(client.get("https://{}/api/v1/accounts/lookup?acct={}".format(server, username)))
    id_ = account['id']
    results = client.get("https://{}/api/v1/accounts/{}/followers?limit=80".format(server, id_))
    followers = parse(results)
    i=1
    print('Request: {}, Number of followers: {}'.format(i, len(followers)))
    while True:
        link = results.headers['link']
        if not 'next' in link:
            break
        next_url = link.split('<')[1].split('>')[0]
        results = client.get(next_url)
        followers.extend(parse(results))
        i+=1
        print('Request: {}, Number of followers: {}'.format(i, len(followers)))
        if i>20:
            break
    return followers
        
def get_accounts_followed_by_user(server, username):
    account = parse(client.get("https://{}/api/v1/accounts/lookup?acct={}".format(server, username)))
    id_ = account['id']
    results = client.get("https://{}/api/v1/accounts/{}/following?limit=80".format(server, id_))
    following = parse(results)
    i=1
    print('Request: {}, Number of accounts followed: {}'.format(i, len(following)))
    while True:
        link = results.headers['link']
        if not 'next' in link:
            break
        next_url = link.split('<')[1].split('>')[0]
        results = client.get(next_url)
        following.extend(parse(results))
        i+=1
        print('Request: {}, Number of accounts followed: {}'.format(i, len(following)))
    return following

def search_hashtag(hashtag):
    results = client.get("https://sciences.social/api/v1/timelines/tag/{}?limit=100".format(hashtag))
    statuses = parse(results)
    i=1
    print('Request: {}, Number of toots: {}'.format(i, len(statuses)))
    while True:
        link = results.headers['link']
        if not 'next' in link:
            break
        next_url = link.split('<')[1].split('>')[0]
        results = client.get(next_url)
        statuses.extend(parse(results))
        i+=1
        print('Request: {}, Number of toots: {}'.format(i, len(statuses)))
        if i > 50:
            break
    return statuses

def get_top_100_trending_hashtags(server):
    results = client.get("https://sciences.social/api/v1/timelines/tag/{}?limit=100".format(hashtag))

In [37]:
username = '@cfiesler@hci.social'
# toots = get_toots_of_user(username)

In [26]:
df_all_toots = pd.DataFrame(toots)
df_all_toots

Unnamed: 0,id,created_at,in_reply_to_id,in_reply_to_account_id,sensitive,spoiler_text,visibility,language,uri,url,...,content,filtered,reblog,account,media_attachments,mentions,tags,emojis,card,poll
0,110489063647822039,2023-06-05T01:41:37.000Z,110488875560825729,109327849920776618,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110489062456206741,...,"<p><span class=""h-card""><a href=""https://hci.s...",[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109327849920776618', 'username': 'pg'...",[],[],,
1,110488764453590736,2023-06-05T00:25:28.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110488763013056688,...,<p>The very nice person who writes on cakes cl...,[],,"{'id': '109274913869213582', 'username': 'cfie...","[{'id': '110488764367663397', 'type': 'image',...",[],[],[],,
2,110464390159842304,2023-05-31T17:06:48.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110464388864587691,...,<p>found the bots in our Mastodon survey data</p>,[],,"{'id': '109274913869213582', 'username': 'cfie...","[{'id': '110464390053048894', 'type': 'image',...",[],[],[],,
3,110459966552111430,2023-05-30T22:21:47.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110459965088034460,...,<p>Your weekly reminder that I'm compiling a r...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],"[{'name': 'aiethicsnews', 'url': 'https://scie...",[],{'url': 'https://docs.google.com/spreadsheets/...,
4,110458292575777193,2023-05-30T15:16:05.000Z,,,False,,public,,https://hci.social/users/cfiesler/statuses/110...,,...,,[],"{'id': '110441210490958628', 'created_at': '20...","{'id': '109274913869213582', 'username': 'cfie...",[],[],[],[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,109257699985414271,2022-10-30T14:29:57.000Z,109251845340402435,109274913869213582,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109257699997620515,...,"<p>for more on the challenges of migration, ch...",[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109274914176958397', 'username': 'plu...",[],[],{'url': 'https://doctorow.medium.com/how-to-le...,
1299,109257244402166230,2022-10-30T12:34:05.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109257244353278456,...,<p>I saw a tiktok that played this scene from ...,[],,"{'id': '109274913869213582', 'username': 'cfie...","[{'id': '109314182725328826', 'type': 'image',...",[],"[{'name': 'TheGoodPlace', 'url': 'https://scie...",[],,
1300,109252899682302929,2022-10-29T18:09:10.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109252899659082452,...,"<p>an <a href=""https://hci.social/tags/introdu...",[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],"[{'name': 'Introduction', 'url': 'https://scie...",[],,
1301,109251845340402435,2022-10-29T13:41:02.000Z,109248654456391898,109274913869213582,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109251845321040242,...,<p>Someone unrolled my bird thread about this ...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],"[{'name': 'twittermigration', 'url': 'https://...",[],{'url': 'https://threadreaderapp.com/thread/15...,


In [50]:
df_all_toots.drop_duplicates('id')

Unnamed: 0,id,created_at,in_reply_to_id,in_reply_to_account_id,sensitive,spoiler_text,visibility,language,uri,url,...,content,filtered,reblog,account,media_attachments,mentions,tags,emojis,card,poll
0,110489063647822039,2023-06-05T01:41:37.000Z,110488875560825729,109327849920776618,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110489062456206741,...,"<p><span class=""h-card""><a href=""https://hci.s...",[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109327849920776618', 'username': 'pg'...",[],[],,
1,110488764453590736,2023-06-05T00:25:28.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110488763013056688,...,<p>The very nice person who writes on cakes cl...,[],,"{'id': '109274913869213582', 'username': 'cfie...","[{'id': '110488764367663397', 'type': 'image',...",[],[],[],,
2,110464390159842304,2023-05-31T17:06:48.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110464388864587691,...,<p>found the bots in our Mastodon survey data</p>,[],,"{'id': '109274913869213582', 'username': 'cfie...","[{'id': '110464390053048894', 'type': 'image',...",[],[],[],,
3,110459966552111430,2023-05-30T22:21:47.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110459965088034460,...,<p>Your weekly reminder that I'm compiling a r...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],"[{'name': 'aiethicsnews', 'url': 'https://scie...",[],{'url': 'https://docs.google.com/spreadsheets/...,
4,110458292575777193,2023-05-30T15:16:05.000Z,,,False,,public,,https://hci.social/users/cfiesler/statuses/110...,,...,,[],"{'id': '110441210490958628', 'created_at': '20...","{'id': '109274913869213582', 'username': 'cfie...",[],[],[],[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,109257699985414271,2022-10-30T14:29:57.000Z,109251845340402435,109274913869213582,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109257699997620515,...,"<p>for more on the challenges of migration, ch...",[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109274914176958397', 'username': 'plu...",[],[],{'url': 'https://doctorow.medium.com/how-to-le...,
1299,109257244402166230,2022-10-30T12:34:05.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109257244353278456,...,<p>I saw a tiktok that played this scene from ...,[],,"{'id': '109274913869213582', 'username': 'cfie...","[{'id': '109314182725328826', 'type': 'image',...",[],"[{'name': 'TheGoodPlace', 'url': 'https://scie...",[],,
1300,109252899682302929,2022-10-29T18:09:10.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109252899659082452,...,"<p>an <a href=""https://hci.social/tags/introdu...",[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],"[{'name': 'Introduction', 'url': 'https://scie...",[],,
1301,109251845340402435,2022-10-29T13:41:02.000Z,109248654456391898,109274913869213582,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109251845321040242,...,<p>Someone unrolled my bird thread about this ...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],"[{'name': 'twittermigration', 'url': 'https://...",[],{'url': 'https://threadreaderapp.com/thread/15...,


In [52]:
start_date = datetime(2023,1,1)
end_date = datetime(2023,1,31)
select_toots = get_toots_of_user_between_dates(username, start_date, end_date)

40
80


In [53]:
df_select_toots = pd.DataFrame(select_toots)
df_select_toots['date'] = df_select_toots.created_at.apply(get_date_from_string)
df_select_toots

Unnamed: 0,id,created_at,in_reply_to_id,in_reply_to_account_id,sensitive,spoiler_text,visibility,language,uri,url,...,filtered,reblog,account,media_attachments,mentions,tags,emojis,card,poll,date
0,109770038507233024,2023-01-29T02:04:07.000Z,109769457419150796,109280246048887299,False,,unlisted,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109770037501727567,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109280246048887299', 'username': 'asb...",[],[],,,2023-01-29 02:04:07
1,109767294528667392,2023-01-28T14:26:17.000Z,,,False,,public,,https://hci.social/users/cfiesler/statuses/109...,,...,[],"{'id': '109740145386169954', 'created_at': '20...","{'id': '109274913869213582', 'username': 'cfie...",[],[],[],[],,,2023-01-28 14:26:17
2,109767289380133727,2023-01-28T14:25:00.000Z,109767285439907920,109410675112416670,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109767288445160507,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109410675112416670', 'username': 'spa...",[],[],,,2023-01-28 14:25:00
3,109767275078337902,2023-01-28T14:21:22.000Z,109767254462195509,109270991407203918,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109767274140614999,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109270991407203918', 'username': 'jia...",[],[],,,2023-01-28 14:21:22
4,109767270504194919,2023-01-28T14:20:12.000Z,109767260096965253,109472665357221096,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109767269580244595,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109472665357221096', 'username': 'kil...",[],[],,,2023-01-28 14:20:12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,109592748118269341,2022-12-28T18:36:31.000Z,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109592745813118112,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],[],[],[],{'url': 'https://www.washingtonpost.com/dc-md-...,,2022-12-28 18:36:31
116,109591446449195947,2022-12-28T13:05:59.000Z,109589611839070291,109286119114006837,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109591446103451722,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109286119114006837', 'username': 'hyp...",[],[],,,2022-12-28 13:05:59
117,109588898669387959,2022-12-28T02:18:04.000Z,109588885964559566,109304693533446697,False,,unlisted,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109588898439808139,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109304693533446697', 'username': 'rya...",[],[],,,2022-12-28 02:18:04
118,109588872872861646,2022-12-28T02:11:32.000Z,109588814391130555,109304693533446697,False,,unlisted,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109588872717748128,...,[],,"{'id': '109274913869213582', 'username': 'cfie...",[],"[{'id': '109304693533446697', 'username': 'rya...",[],[],,,2022-12-28 02:11:32


In [55]:
df_all_toots['date'] = df_all_toots['created_at'].apply(get_date_from_string)

In [56]:
df_all_toots[(df_all_toots.date >start_date)&(df_all_toots.date < end_date)].date.describe()

  df_all_toots[(df_all_toots.date >start_date)&(df_all_toots.date < end_date)].date.describe()


count                      97
unique                     97
top       2023-01-29 02:04:07
freq                        1
first     2023-01-01 13:35:48
last      2023-01-29 02:04:07
Name: date, dtype: object

In [54]:
df_select_toots.date.describe()

  df_select_toots.date.describe()


count                     120
unique                    120
top       2023-01-29 02:04:07
freq                        1
first     2022-12-28 01:53:23
last      2023-01-29 02:04:07
Name: date, dtype: object

datetime.datetime(2023, 1, 10, 17, 4, 20)

In [None]:
username = '@QasimRashid@mastodon.social'
server = 'hci.social'
followers = get_followers_of_user(server, username)
len(followers)

In [None]:
df_followers = pd.DataFrame(followers)
df_followers.username.nunique()

In [None]:
df_following = pd.DataFrame(get_accounts_followed_by_user(server, username))
df_following

In [15]:
end_date = datetime(2023,1,1)
end_date.timestamp()

1672524000.0

In [17]:
bit_shifted = 110310925158294068 >> 16
bit_shifted

1683211138279

In [18]:
datetime.fromtimestamp(1683211138279/1000)
# datetime.datetime(2022, 11, 11, 18, 16, 4, 630000)

datetime.datetime(2023, 5, 4, 17, 38, 58, 279000)

In [23]:
(int(end_date.timestamp()) << 16)*1000

109610532864000000

In [21]:
(int( end_date.timestamp()) << 16)*1000

109610532864000000

In [None]:
result = parse(client.get("https://sciences.social/api/v1/accounts/109334421628967389"))
result["username"]

In [None]:
statuses = search_hashtag('democrats')

In [None]:
len(statuses)

In [None]:
df_statuses = pd.DataFrame(statuses)
df_statuses.columns