In [1]:
%pip install Mastodon.py --upgrade

Note: you may need to restart the kernel to use updated packages.


## mastodon api access

- Create an app to access mastodon data 
- Collect followers given a user
- Collect profile information (if available, Twitter user) given a user


In [1]:
import os
import glob
import parse
import numpy as np
import pandas as pd
import datetime

today = datetime.date.today()

https://github.com/mastodon/mastodon/

https://mastodonpy.readthedocs.io/en/stable/02_return_values.html#user-account-dicts


In [2]:
from mastodon import Mastodon

mastodon_app = "acmbrito_app_python_secret"
client_id, client_secret, access_token, api_base_url = open(mastodon_app).read().split()
api1 = Mastodon(
    client_id=client_id,
    client_secret=client_secret,
    access_token=access_token,
    api_base_url=api_base_url
)

tutorial: https://gist.github.com/aparrish/661fca5ce7b4882a8c6823db12d42d26

In [5]:
def get_userid(user_name, user_server):
    try:
        user_profile = api1.account_lookup("{}@{}".format(user_name, user_server))
        return user_profile["id"]
    except Exception as error:
        print(error)
        return None


def get_userprofile(user_name, user_server):
    user_profile = api1.account_lookup("{}@{}".format(user_name, user_server))

    tt_user = ""
    for field in user_profile["fields"]:
        if field["name"] == "Twitter":
            tt_user = field["value"]

    bio = user_profile["note"]
    created = user_profile["created_at"]
    display_name = user_profile["display_name"]
    followers = user_profile["followers_count"]
    following = user_profile["following_count"]

    return (
        user_name,
        user_server,
        display_name,
        tt_user,
        followers,
        following,
        created,
        bio.replace("\t", " "),
    )


In [6]:
def get_userfollowers(user_id):
    followers = api1.account_followers(user_id)
    followers_id = []
    count = 0
    while followers:
        count += 1
        if count > 5:
            break
        for f in followers:
            followers_id.append(f['id'])
        followers = api1.fetch_next(followers)

    return followers_id


In [7]:

handle = ["mastodon", "account"]

def get_scholars_handles(files, str_format):
    mastodon_users = dict()
    for file in files:
        print(file)
        df = pd.read_csv(file, on_bad_lines="warn")
        for col in df.columns:
            if handle[0] in col.lower() or handle[1] in col.lower():
                scholars = df[col].values
                parsed = parse.parse(str_format, file)
                mastodon_users[parsed[0]] = scholars
                break

    return mastodon_users

# files = glob.glob("csv\\*.csv")
# mastodon_users = get_scholars_handles(files, "csv\scholars_{}.csv")

files = glob.glob('csv\\others\\*.csv')
mastodon_users = get_scholars_handles(files, "csv\others\{}.csv")

csv\others\Astrophysicists.csv
csv\others\Biophysicists.csv
csv\others\Cheminformaticians and Computational Chemists.csv
csv\others\Communication, Media Studies, and Adjacent Fields.csv
csv\others\Geographers.csv
csv\others\GLAMS.csv
csv\others\Health Psychologists.csv
csv\others\International Image Interoperability Framework.csv
csv\others\Palaeogenomicists.csv
csv\others\Planetary Science.csv
csv\others\Social Workers on Mastodon.csv
csv\others\Tech Policy.csv


Skipping line 487: expected 2 fields, saw 3
Skipping line 503: expected 2 fields, saw 3
Skipping line 506: expected 2 fields, saw 3
Skipping line 511: expected 2 fields, saw 3
Skipping line 515: expected 2 fields, saw 3
Skipping line 518: expected 2 fields, saw 3
Skipping line 520: expected 2 fields, saw 3
Skipping line 523: expected 2 fields, saw 3
Skipping line 525: expected 2 fields, saw 3
Skipping line 533: expected 2 fields, saw 3
Skipping line 537: expected 2 fields, saw 3
Skipping line 538: expected 2 fields, saw 3
Skipping line 541: expected 2 fields, saw 3
Skipping line 547: expected 2 fields, saw 3
Skipping line 548: expected 2 fields, saw 3
Skipping line 550: expected 2 fields, saw 3
Skipping line 551: expected 2 fields, saw 3
Skipping line 552: expected 2 fields, saw 3
Skipping line 553: expected 2 fields, saw 3
Skipping line 560: expected 2 fields, saw 3
Skipping line 562: expected 2 fields, saw 3
Skipping line 564: expected 2 fields, saw 3



In [15]:
for field, scholars in mastodon_users.items():
    print(field)

Astrophysicists
Biophysicists
Cheminformaticians and Computational Chemists
Communication, Media Studies, and Adjacent Fields
Geographers
GLAMS
Health Psychologists
International Image Interoperability Framework
Palaeogenomicists
Planetary Science
Social Workers on Mastodon
Tech Policy


In [8]:

today = '2023-05-02'
for field, scholars in s.items():
    output = []
    output_filename = "../data/users_followers_{}_{}.csv".format(field, today)
    if os.path.exists(output_filename):
        print('already done {}'.format(field))
        continue

    for handle in scholars:
        if isinstance(handle, float) and np.isnan(handle):
            continue

        str_format = "{}@{}"
        if handle.startswith("@"):
            handle = handle[1:]
        parsed = parse.parse(str_format, handle)
        if parsed:
            user_name = parsed[0]
            user_server = parsed[1]

            user_id = get_userid(user_name, user_server)
            if user_id:
                followers = get_userfollowers(user_id)

                output.append((user_name, user_server, followers, field))

    print("finished {}".format(field))
    outfile = open(output_filename, "w")
    for line in output:
        outfile.write("{}\t{}\t{}\t{}\n".format(*line))
    outfile.close()


already done Astrophysicists
already done Biophysicists
already done Cheminformaticians and Computational Chemists
already done Communication, Media Studies, and Adjacent Fields
already done Geographers
already done GLAMS
already done Health Psychologists
already done International Image Interoperability Framework
already done Palaeogenomicists
already done Planetary Science
already done Social Workers on Mastodon
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returned error', 404, 'Not Found', 'Record not found')
('Mastodon API returne

Other approach to select users on mastodon using seeds and DFS

In [None]:
seeds = ['@Nature@mstdn.social', '@PNASNews@fediscience.org', '@sciencemagazine@sciencemastodon.com']
seed_ids = []
for seed in seeds:
    parsed = parse.parse('@{}@{}', seed)
    user_id = get_userid(parsed[0], parsed[1])
    seed_ids.append(user_id)


def get_users(user_id, deep):
    if deep == 0:
        return set()
    else:
        followers = get_userfollowers(user_id)
        if followers:
            followers = set(followers)
            new_followers = set()
            for follower in followers:
                new_followers |= get_users(follower, deep - 1)
            return followers | new_followers
        else:
            return set()
    
test = get_users(seed_ids[0], 2)     

In [None]:
out = open('mastodon_users.txt', 'w')
for row in test:
    out.write("{}\n".format(row))
out.close()

In [None]:
twitter mastodon scholars fields of study?????
uptade plot twitter mentioning mastodon
collect profile data
comments about finding followers