In [119]:
import time
import json
import os.path
import requests
import pandas as pd
from  datetime import date

In [46]:
today = date.today()

In [47]:
from mastodon import Mastodon

mastodon_app = "acmbrito_app_python_secret"
client_id, client_secret, access_token, api_base_url = open(mastodon_app).read().split()
api = Mastodon(
    client_id=client_id,
    client_secret=client_secret,
    access_token=access_token,
    api_base_url=api_base_url
)

In [48]:
users = pd.read_csv('../data/users_followers_African Studies_2023-05-02.csv', sep='\t', names=['user', 'instance', 'followers', 'field'])
print(users.head())

               user            instance   
0           rheinze        assemblag.es  \
1          BrianJAP      mastodon.world   
2       joelglasman     fediscience.org   
3  brenton_peterson     sciences.social   
4            keremd  social.anoxinon.de   

                                           followers            field  
0  [109417147220906349, 109243458234695495, 10924...  African Studies  
1  [1302832, 110023631275744313, 1099309840163259...  African Studies  
2                                                 []  African Studies  
3  [107192220324249637, 109354941450990518, 10951...  African Studies  
4  [1302832, 109212672053987101, 1092459114371518...  African Studies  


In [159]:
def get_userid(user_name, user_server, token_access):
    try:
        url = 'https://{}/api/v1/accounts/lookup/'.format(user_server)
        headers = {
            'Authorization' : 'Bearer {}'.format(token_access)
        }
        params = {
            'acct' : '{}@{}'.format(user_name, user_server)
        }
        r = requests.get(url, headers=headers, params=params)

        user = json.loads(r.text)
        return user["id"]
    except Exception as error:
        print(error)
        return None

In [160]:
get_userid('keirol', 'mastodon.social', 'IQkq7LlzT2nzADd1PaG6Q9gk-Gnq_cymXkiMeWm-P8w')

'110278049747310692'

In [50]:
mastodon_app = "acmbrito_app_python_secret"
client_id, client_secret, access_token, api_base_url = open(mastodon_app).read().split()

In [51]:
date_limit = date(2022, 9, 1)

In [161]:
def get_instance_token(instance):
    filename = "mastodon_app_key_{}.secret".format(instance)
    
    if os.path.exists('token_{}.secret'.format(instance)):
        token_json = json.loads(open('token_{}.secret'.format(instance)).read())
        return token_json['access_token']

    if os.path.exists(filename):
        file = open(filename).read().split('\n')
        client_id = file[0]
        client_secret = file[1]
        
        url = 'https://{}/oauth/token'.format(instance)
        params = {
            'client_id' : client_id,
            'client_secret' : client_secret,
            'redirect_uri' : 'urn:ietf:wg:oauth:2.0:oob',
            'grant_type' : 'client_credentials'
        }

        r = requests.post(url=url, params=params)
        token = open('token_{}.secret'.format(instance), 'w')
        token.write(r.text)
        token.close()

        token_json = json.loads(r.text)
        return token_json['access_token']
    else:
        return False

In [164]:
# papers

# source to collect user toots: https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/
# API code: https://mastodonpy.readthedocs.io/en/1.8.1/_modules/mastodon/timeline.html?highlight=users%20in%20a%20given%20list

instance_name = 'mastodon.social'
users_selected = users[users['instance'] == instance_name]
ids = users_selected['user'].values

# https://docs.joinmastodon.org/methods/search/#v1

def user_toots(instance, user_id):
    URL = 'https://{}/api/v1/accounts/{}/statuses'.format(instance, user_id)
    user_toots = []
    if access_token:

        headers = {
            'Authorization' : 'Bearer {}'.format(access_token)
        }
        
        user_toots = []
        params = {}
        while True:
            time.sleep(2)
            r = requests.get(URL, headers=headers, params=params)
            toots = json.loads(r.text)
            user_toots += toots
            if len(toots) == 0:
                break
            max_id = toots[-1]['id']
            params = {'max_id' : max_id}
            
            date_str = toots[-1]['created_at'].split('T')[0]
            last_date = date.fromisoformat(date_str)
            if last_date < date_limit:
                break



    return user_toots

In [128]:
valid_fields = ['id',
'created_at',
'in_reply_to_account_id',
'uri',
'replies_count',
'reblogs_count',
'favourites_count',
'content'
]

In [165]:
outputs = []
for _, user in users[:10].iterrows():
    instance_token = get_instance_token(user['instance'])
    user_id = get_userid(user['user'], user['instance'], instance_token)
    toots = user_toots(user['instance'], user_id)
    if len(toots) > 0:
        toots_pd = pd.json_normalize(toots)
        toots_pd.loc[:, valid_fields]
        toots_pd['user_id'] = user_id
        outputs.append(toots_pd)

data_toots = pd.concat(outputs)
data_toots.to_csv('users_toots_{}.tsv'.format(today), sep='\t')

https://assemblag.es/api/v1/accounts/109287555889598271/statuses
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=110145798822275289
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109703480628741398
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109642806958080131
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109477430489483115
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109439089280093853
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109387913676157868
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109377406918110694
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109359271807242046
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109330610262418625
https://assemblag.es/api/v1/accounts/109287555889598271/statuses?max_id=109310127819729768
https://assemblag.es/api/

KeyboardInterrupt: 

Unnamed: 0,id,created_at,in_reply_to_account_id,uri,replies_count,reblogs_count,favourites_count,content
0,110367559731957070,2023-05-14T14:41:53.294Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,
1,110328621940497784,2023-05-07T17:39:29.749Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,
2,110317671915039394,2023-05-05T19:14:45.617Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,<p>Thunderstorm coming in... 🌩</p>
3,110250048489623430,2023-04-23T20:37:14.056Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,
4,110208029833724061,2023-04-16T10:31:20.230Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,
...,...,...,...,...,...,...,...,...
253,109247219971230416,2022-10-28T18:04:45.217Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,2,"<p>Juhu, Ihr kommt alle!</p>"
254,109247132632389830,2022-10-28T17:42:32.522Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,
255,109247125094972240,2022-10-28T17:40:37.526Z,109246120984435842,https://mastodon.social/users/AnkeFK/statuses/...,0,0,0,"<p><span class=""h-card""><a href=""https://masto..."
256,109244812098809832,2022-10-28T07:52:24.003Z,,https://mastodon.social/users/AnkeFK/statuses/...,0,0,3,"<p>Und wie heißen jetzt hier die <a href=""http..."
