# Tutorial Outline

### Twitter Data Collection
1. twarc2 for data collection
2. Scraping Twitter with Selenium

### Reddit Data Collection
1. Reddit Data Using Selenium
2. Reddit Data Using PRAW 
- Is PRAW access changing?
3. Other options for Reddit (Pushshift files for past data)

### Mastodon Data Collection
1. Mastodon Data using httpx + Mastodon API
2. Mastodon Data using tweepy-mastodon wrapper for Mastodon API

### Twitter Data Collection using twarc2

#### What you'll need? 
1. Twitter API credentials (at least the Bearer token)
2. Academic API credentials (only for full archive access)

#### Setp 1: Setting up twarc: https://twarc-project.readthedocs.io/en/latest/twarc2_en_us/#configure

#### Step 2: Define functions to use twarc2 with custom parameters for tweet, user, and search queries

In [10]:
import os

In [6]:
def search(par_directory, query, counts=True, archive=True):
    start_date = '2019-01-01'
    end_date = '2020-07-01'
    out_file = par_directory + query + '.jsonl'
    print(query)
    if counts:
        out_file = '../counts/{}_counts.jsonl'.format(query)
        if archive:
            !twarc2 counts --archive --start-time {start_date} --end-time {end_date} {query} {out_file}
        else:
            !twarc2 counts --start-time {start_date} --end-time {end_date} {query} {out_file}
    else:
        if archive:
            !twarc2 search --archive --start-time {start_date} --end-time {end_date} --user-fields "created_at,description,id,location,protected,public_metrics,url,username,verified" {query} {out_file}
        else:
            !twarc2 search --start-time {start_date} --end-time {end_date} --user-fields "created_at,description,id,location,protected,public_metrics,url,username,verified" {query} {out_file}

def get_tweets(par_directory, screen_name, counts=True):
    start_date = '2019-01-01'
    end_date = '2020-07-01'
    out_file = par_directory + screen_name + '.jsonl'
    query = 'from:'+screen_name
    print(query)
    if counts:
        out_file = '../counts/{}_counts.jsonl'.format(screen_name)
        !twarc2 counts --archive --start-time {start_date} --end-time {end_date} {query} {out_file}
    else:
        !twarc2 search --archive --start-time {start_date} --end-time {end_date} --user-fields "created_at,description,id,location,protected,public_metrics,url,username,verified" {query} {out_file}
    
def get_tweets_from_userid(par_directory, user_id):
    start_date = '2018-01-01'
    end_date = '2022-11-10'
    out_file = par_directory + user_id + '.jsonl'
    query = 'from:'+user_id
#     print(query)
    !twarc2 search --archive --start-time {start_date} --end-time {end_date} --user-fields "created_at,description,id,location,protected,public_metrics,url,username,verified" {query} {out_file}
    
    
def get_tweets_that_mention_user(par_directory, screen_name, counts=True):
    start_date = '2023-02-21'
    end_date = '2023-02-28'
    out_file = par_directory + screen_name + '.jsonl'
    query = """'@{} -from:{}'""".format(screen_name, screen_name)
    if counts:
        out_file = '../counts/{}_counts.jsonl'.format(screen_name)
        !twarc2 counts --archive --start-time {start_date} --end-time {end_date} {query} {out_file}
    else:
        !twarc2 search --archive --start-time {start_date} --end-time {end_date} --user-fields "created_at,description,id,location,protected,public_metrics,url,username,verified" {query} {out_file}
    
def get_conversation(par_directory, screen_name, conversation_id):
    out_dir = par_directory + screen_name + '/'
    if not screen_name in os.listdir(par_directory):
        os.mkdir(out_dir)
    out_file = out_dir + conversation_id + '.jsonl'
    if conversation_id + '.jsonl' in os.listdir(out_dir):
        return
    !twarc2 conversation --archive {conversation_id} {out_file}    
    
def get_retweeters_of_tweet(par_directory, tweet_id):
    out_file = par_directory + tweet_id + '.jsonl'
    !twarc2 retweeted-by --archive {tweet_id} {out_file}
    
def get_likers_of_tweet(par_directory, tweet_id):
    out_file = par_directory + tweet_id + '.jsonl'
    !twarc2 liking-users --archive {tweet_id} {out_file}
    
def get_likes_by_user(par_directory, userid):
    out_file = par_directory + userid + '.jsonl'
    !twarc2 liked-tweets --archive {userid} {out_file}
    
def get_friends_of_user(par_directory, screen_name):
    out_file = par_directory + screen_name + '.jsonl'
    !twarc2 following {screen_name} {out_file}

def get_followers_of_user(par_directory, screen_name):
    out_file = par_directory + screen_name + '.jsonl'
    !twarc2 followers {screen_name} {out_file}
    
def get_followersf_user(par_directory, tweet_id):
    out_file = par_directory + tweet_id + '.jsonl'
    !twarc2 quotes {tweet_id} {out_file}

In [12]:
par_dir = '../counts/'
os.mkdir(par_dir)
get_tweets(par_dir, 'RahulGandhi')

from:RahulGandhi
100%|█| Processed 1 year, 5 months/1 year, 5 months [00:24<00:00, 654 tweets tot


In [8]:
query = 'from:RahulGandhi'
out_file = '../results/RahulGandhi.json'
!twarc2 search --user-fields "created_at,description,id,location,protected,public_metrics,url,username,verified" {query} {out_file}

  0%|                     | Processed a moment/6 days [00:00<?, 0 tweets total ]

KeyboardInterrupt: 

#### Step 3: Scraping  Subreddits

In [34]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
opts = Options()
opts.headless = True
import time
assert opts.headless  # Operating in headless mode

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd

In [3]:
browser = Firefox(options=opts)
browser.maximize_window()
# browser.implicitly_wait(30)
# wait = WebDriverWait(browser, 50)
URL = 'https://www.reddit.com/r/republicans/top/'

In [4]:
browser = Firefox(options=opts)
browser.maximize_window()
# browser.implicitly_wait(30)
# wait = WebDriverWait(browser, 50)
URL = 'https://www.reddit.com/r/republicans/new/'

browser.get(URL)
browser.maximize_window()
print(browser.title)
results = browser.find_elements(By.CSS_SELECTOR,'[data-testid=\"post-container\"]')
posts = []
for r in results:
    #     total_height += r.size['height']
    #     print(r.size)
#     time.sleep(3)
    try:
#         reddit_post = r.get_attribute('textContent')
        reddit_post = r.text
        posts.append(reddit_post)
        location = r.location
        size = r.size
    except Exception as e:
        print(e)

for p in posts:
    print(p)

Republicans - RNC - GOP: Grand Old Party
3
Posted by
u/Signal_Lemon_8926
6 hours ago
Trump May Not Commit to Supporting the Eventual 2024 Republican Nominee
patriot911news.com/2023/0...
1 Comment
Share
Save
3.0k
u/__BlueStacks__
•Promoted
67
49
57
9
& 2 More
Android gaming on PC, latest Android games, product updates, technical queries, and more. Happy gaming, happy redditing.
www.reddit.com
Learn More
2.3k Comments
Share
Save
8
Crossposted by
u/SequoiaBoi
12 hours ago
I’m sure her trips are funded by Michigan taxpayers
r/DeSantis
•
Posted by
u/SequoiaBoi
12 hours ago
I’m sure her trips are funded by Michigan taxpayers
DESANTIS 2024
31 points2 comments
2 Comments
Share
Save
8
Posted by
u/Signal_Lemon_8926
15 hours ago
WATCH: Trump Spotted in NYC on Memorial Day Weekend, Draws Loud Cheering from Supporters
trendingpoliticsnews.com/watch-...
1 Comment
Share
Save
3
Posted by
u/Snackasm
18 hours ago
But at least we got ice cream
4 Comments
Share
Save
9
Posted by
u/Signal_Lemon_8926
20 hour

### Task 2B: Collecting Reddit data using PRAW

In [5]:
import praw

In [6]:
reddit = praw.Reddit(client_id='ux5n5EbN6WewPyBw85bkuA', client_secret='gb88EulTvN_Cdd8xHRtWLx1JcBrbqw', user_agent='Test101')


In [7]:
hot_posts = reddit.subreddit('MachineLearning').hot(limit=10)
for post in hot_posts:
    print(post.title)

[D] Simple Questions Thread
Reminder: Use the report button and read the rules!
[N] Hinton, Bengio, and other AI experts sign collective statement on AI risk
[Discussion] Guidance to stay somewhat up-to date in AI
[R] LaVIN: Large Vision-Language Instructed Model
[D] Graph neural network on multiple graphs
[N] Researchers from MIT and McMaster University leveraged a machine learning AI algorithm to discover a new antibiotic for drug-resistant infections caused by Acinetobacter baumannii
[R] 1m+ High Res. vehicle images
[N] Free Machine Learning virtual conference tomorrow
[D] KPIs for Machine Learning Teams in an Industry Setting


In [8]:
posts = []
ml_subreddit = reddit.subreddit('JoeBiden')
for post in ml_subreddit.hot(limit=1000):
    posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
posts = pd.DataFrame(posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
# print(posts)

In [9]:
posts

Unnamed: 0,title,score,id,subreddit,url,num_comments,body,created
0,Good speech Joe,159,13uysuq,JoeBiden,https://i.redd.it/el4ijwp84u2b1.jpg,3,Good one for this memorial day. Ole Joe is the...,1.685376e+09
1,"Biden, Erdogan discuss F-16s and Sweden in pho...",40,13v7i3h,JoeBiden,https://www.gossipslife.com/2023/05/biden-erdo...,4,,1.685397e+09
2,"President Biden, please remember when you said...",77,13uq8jh,JoeBiden,https://ukrainetoday.org/2023/05/28/the-wind-w...,5,,1.685354e+09
3,Biden Administration Announces Indo-Pacific De...,48,13uhanw,JoeBiden,https://www.nytimes.com/2023/05/27/business/ec...,1,,1.685324e+09
4,Biden announces Indo-Pacific economic pact to ...,164,13u7eph,JoeBiden,https://www.nbcnews.com/politics/politics-news...,6,,1.685298e+09
...,...,...,...,...,...,...,...,...
721,Running Dems in “Christian” Red States - VIDEO,17,10fhu4o,JoeBiden,https://www.reddit.com/r/JoeBiden/comments/10f...,0,Video - [https://youtu.be/oQPkNIxeQlM](https:/...,1.674074e+09
722,San Marino and Finland Latest European Nations...,21,10fds3h,JoeBiden,https://liberalwisconsin.blogspot.com/2023/01/...,0,,1.674064e+09
723,"Biden Calls for Police Officer Retraining, Que...",325,10ervcp,JoeBiden,https://www.businessinsider.com/biden-calls-fo...,18,,1.674000e+09
724,Steph Curry thanks Biden for bringing Griner home,148,10eu4dr,JoeBiden,https://www.bbc.com/news/av/64313215,0,,1.674005e+09


### Task 3: Mastodon API

In [19]:
import httpx
import getpass
from IPython.core import display
from dateutil import parser
from datetime import datetime, timedelta
from mastodon import Mastodon
# import zoneinfo
# ks3yuAI7JcF3VhhA8euLqN1MHvMrpyzd4l2HOs1udac

In [14]:
token=getpass.getpass()

········


In [15]:
client = httpx.Client(headers=dict(Authorization=f"Bearer {token}"))

In [16]:
res = client.get("https://sciences.social/api/v1/accounts/verify_credentials")


In [17]:
res.raise_for_status()
result = res.json()
result["id"], result["username"]

('109334421628967389', 'anmolpanda')

In [None]:
usernames = [
    'https://mastodon.social/@RepShontelBrown'
]

In [142]:
def parse(result):
    result.raise_for_status()
    return result.json()

def get_toots_of_user(username):
    account = parse(client.get("https://sciences.social/api/v1/accounts/lookup?acct={}".format(username)))
    id_ = account['id']
    result = client.get("https://sciences.social/api/v1/accounts/{}/statuses?limit=40".format(id_))
    all_statuses = []
    statuses = parse(result)
    all_statuses.extend(statuses)
    while len(statuses) > 0:
        print(len(all_statuses))
        result = client.get("https://sciences.social/api/v1/accounts/{}/statuses?limit=40&max_id={}".format(id_, all_statuses[-1]['id']))
        statuses = parse(result)
        all_statuses.extend(statuses)
    return all_statuses

def get_toots_of_user_between_dates(username):
    pass

# https://shkspr.mobi/blog/2022/11/building-an-on-this-day-service-for-mastodon/

def get_followers_of_user(server, username):
    account = parse(client.get("https://{}/api/v1/accounts/lookup?acct={}".format(server, username)))
    id_ = account['id']
    results = client.get("https://{}/api/v1/accounts/{}/followers?limit=80".format(server, id_))
    followers = parse(results)
    i=1
    print('Request: {}, Number of followers: {}'.format(i, len(followers)))
    while True:
        link = results.headers['link']
        if not 'next' in link:
            break
        next_url = link.split('<')[1].split('>')[0]
        results = client.get(next_url)
        followers.extend(parse(results))
        i+=1
        print('Request: {}, Number of followers: {}'.format(i, len(followers)))
    return followers
        
def get_accounts_followed_by_user(server, username):
    account = parse(client.get("https://{}/api/v1/accounts/lookup?acct={}".format(server, username)))
    id_ = account['id']
    results = client.get("https://{}/api/v1/accounts/{}/following?limit=80".format(server, id_))
    following = parse(results)
    i=1
    print('Request: {}, Number of accounts followed: {}'.format(i, len(following)))
    while True:
        link = results.headers['link']
        if not 'next' in link:
            break
        next_url = link.split('<')[1].split('>')[0]
        results = client.get(next_url)
        following.extend(parse(results))
        i+=1
        print('Request: {}, Number of accounts followed: {}'.format(i, len(following)))
    return following
# TRY WITH mastodon.search() and mastodon.timeline()
def get_toots_of_user_between_dates_w_mastodon(username, start_date, end_date):
    #  Set up access
    instance = "https://sciences.social"
    mastodon = Mastodon( api_base_url=instance, access_token=token)

    #  Bitwise shift the integer representation and convert to milliseconds
#     min_id = ( int( start_date.timestamp() ) << 16 ) * 1000
    max_id = ( int( end_date.timestamp() ) << 16 ) * 1000
    min_id = 110307552862407741
    #  Get User's Mastodon ID
    account = parse(client.get("https://sciences.social/api/v1/accounts/lookup?acct={}".format(username)))
    id_ = account['id']
    #  Call the API
    statuses = mastodon.account_statuses(id = id_, 
#                                          max_id=max_id, 
                                         min_id=min_id, 
                                         limit="40", 
                                         exclude_reblogs=False)
    #  Fetch further statuses if there are any
    all_statuses = mastodon.fetch_remaining(statuses)
    #  Print the date and URl
    return all_statuses

In [34]:
toots = get_toots_of_user('@cfiesler@hci.social')

40
80
120
160
200
240
280
320
360
400
440
480
520
560
600
640
680
720
760
800
840
880
920
960
1000
1040
1080
1120
1160
1200
1240
1280
1289


In [136]:
username = '@cfiesler@hci.social'
server = 'hci.social'
followers = get_followers_of_user(server, username)
len(followers)

Request: 1, Number of followers: 80
Request: 2, Number of followers: 160
Request: 3, Number of followers: 240
Request: 4, Number of followers: 320
Request: 5, Number of followers: 400
Request: 6, Number of followers: 480
Request: 7, Number of followers: 560
Request: 8, Number of followers: 640
Request: 9, Number of followers: 720
Request: 10, Number of followers: 800
Request: 11, Number of followers: 880
Request: 12, Number of followers: 960
Request: 13, Number of followers: 1040
Request: 14, Number of followers: 1120
Request: 15, Number of followers: 1200
Request: 16, Number of followers: 1280
Request: 17, Number of followers: 1360
Request: 18, Number of followers: 1440
Request: 19, Number of followers: 1520
Request: 20, Number of followers: 1600
Request: 21, Number of followers: 1680
Request: 22, Number of followers: 1760
Request: 23, Number of followers: 1840
Request: 24, Number of followers: 1920
Request: 25, Number of followers: 2000
Request: 26, Number of followers: 2080
Request:

10630

In [137]:
df_followers = pd.DataFrame(followers)
df_followers.username.nunique()

9851

In [143]:
df_following = pd.DataFrame(get_accounts_followed_by_user(server, username))
df_following

Request: 1, Number of accounts followed: 80
Request: 2, Number of accounts followed: 160
Request: 3, Number of accounts followed: 240
Request: 4, Number of accounts followed: 320
Request: 5, Number of accounts followed: 400
Request: 6, Number of accounts followed: 406


Unnamed: 0,id,username,acct,display_name,locked,bot,discoverable,group,created_at,note,...,header_static,followers_count,following_count,statuses_count,last_status_at,emojis,fields,noindex,roles,moved
0,109248648687911826,jgarforth,jgarforth@mastodon.social,James Garforth,False,False,True,False,2022-10-28T00:00:00.000Z,<p>Teaches ethics / social responsibility to c...,...,https://storage.googleapis.com/hci-social-stor...,21,26,1,,[],[],,,
1,110336571001040048,schock,schock@mas.to,Sasha Costanza-Chock,False,False,True,False,2023-01-02T00:00:00.000Z,"<p>Scholar, troublemaker, author of _Design Ju...",...,https://storage.googleapis.com/hci-social-stor...,31,13,8,2023-05-29,[],"[{'name': 'Pronouns', 'value': 'She/They/Elle'...",,,
2,110312064429057924,tarleton,tarleton@aoir.social,tarleton,False,False,False,False,2023-05-04T00:00:00.000Z,"<p>I'm an independent-minded academic, critica...",...,https://storage.googleapis.com/hci-social-stor...,1849,7,1,2023-05-04,[],[],,,
3,108324261538023333,dsalo,dsalo@digipres.club,"Dorothea ""LibSkrat"" Salo",False,False,True,False,2019-01-26T00:00:00.000Z,"<p>Mutinous librarian, iSchool educator, jack-...",...,https://storage.googleapis.com/hci-social-stor...,1254,857,3804,2023-06-01,[],"[{'name': 'Website', 'value': '<a href=""https:...",,,
4,109362963049802357,ben,ben@learningscienc.es,Ben Shapiro,False,False,True,False,2023-04-26T00:00:00.000Z,<p>Not that one.<br>Learning Sciences research...,...,https://hci.social/headers/original/missing.png,147,281,95,2023-06-01,[],[],,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,108326060209624978,pg,pg,Philip Guo,False,False,False,False,2022-05-19T00:00:00.000Z,,...,https://storage.googleapis.com/hci-social-stor...,400,37,392,2023-06-01,[],"[{'name': 'url', 'value': '<a href=""https://pg...",False,[],
402,108280517522446278,jbigham,jbigham,Jeffrey P. Bigham 🔥🔥,False,False,True,False,2022-05-11T00:00:00.000Z,<p>Professor of HCII and LTI at CMU SCS. Manag...,...,https://storage.googleapis.com/hci-social-stor...,2722,1117,2917,2023-06-02,[],"[{'name': 'www', 'value': '<a href=""https://ww...",False,[],
403,108233111945670878,andresmh,andresmh,Andrés Monroy-Hernández,False,False,True,False,2022-05-02T00:00:00.000Z,<p>Director of Princeton HCI. Faculty at Princ...,...,https://storage.googleapis.com/hci-social-stor...,2392,786,4533,2023-06-02,[],"[{'name': '🌎 Website', 'value': '<a href=""http...",False,"[{'id': '3', 'name': 'Owner', 'color': ''}]",
404,108236223653826827,axz,axz,Amy Zhang,False,False,True,False,2022-05-03T00:00:00.000Z,"<p>UW CSE professor 👩🏻‍🏫 PI <span class=""h-car...",...,https://storage.googleapis.com/hci-social-stor...,2276,1177,703,2023-06-01,[],"[{'name': 'Homepage', 'value': '<a href=""https...",False,"[{'id': '3', 'name': 'Owner', 'color': ''}]",


In [122]:
username = '@cfiesler@hci.social'
account = parse(client.get("https://hci.social/api/v1/accounts/lookup?acct={}".format(username)))
id_ = account['id']
results = client.get("https://hci.social/api/v1/accounts/{}/followers?limit=80".format(id_))
results

<Response [200 OK]>

In [123]:
followers = parse(results)
len(followers), id_

(80, '109248525508716712')

In [124]:
results.headers

Headers([('date', 'Fri, 02 Jun 2023 09:06:48 GMT'), ('content-type', 'application/json; charset=utf-8'), ('transfer-encoding', 'chunked'), ('connection', 'keep-alive'), ('vary', 'Accept-Encoding'), ('server', 'Mastodon'), ('x-frame-options', 'DENY'), ('x-content-type-options', 'nosniff'), ('x-xss-protection', '0'), ('permissions-policy', 'interest-cohort=()'), ('referrer-policy', 'same-origin'), ('x-ratelimit-limit', '300'), ('x-ratelimit-remaining', '296'), ('x-ratelimit-reset', '2023-06-02T09:10:00.897672Z'), ('cache-control', 'private, no-store'), ('vary', 'Accept, Origin'), ('link', '<https://hci.social/api/v1/accounts/109248525508716712/followers?limit=80&max_id=204507>; rel="next", <https://hci.social/api/v1/accounts/109248525508716712/followers?limit=80&since_id=205246>; rel="prev"'), ('etag', 'W/"9c7d75e5408ce18cabccec2b30fe6908"'), ('content-security-policy', "default-src 'none'; frame-ancestors 'none'; form-action 'none'"), ('x-request-id', 'f0881614-b115-4f99-994c-59e9c98b36

In [125]:
next_url = results.headers['link'].split('<')[1].split('>')[0]
next_url

'https://hci.social/api/v1/accounts/109248525508716712/followers?limit=80&max_id=204507'

In [126]:
page_2 = client.get(next_url)
page_2

<Response [200 OK]>

In [127]:
page_2_followers = parse(page_2)
len(page_2_followers)

80

In [117]:
page_2_followers

[{'id': '109368104477705782',
  'username': 'kelly',
  'acct': 'kelly',
  'display_name': 'kelly quinn',
  'locked': False,
  'bot': False,
  'discoverable': True,
  'group': False,
  'created_at': '2022-11-19T00:00:00.000Z',
  'note': '<p>contemplates all things new media, comm geek, thinks about <a href="https://sciences.social/tags/privacy" class="mention hashtag" rel="tag">#<span>privacy</span></a>, loves mountains</p>',
  'url': 'https://sciences.social/@kelly',
  'avatar': 'https://cdn.masto.host/sciencessocial/accounts/avatars/109/368/104/477/705/782/original/d9cc5b35058f8dba.jpeg',
  'avatar_static': 'https://cdn.masto.host/sciencessocial/accounts/avatars/109/368/104/477/705/782/original/d9cc5b35058f8dba.jpeg',
  'header': 'https://sciences.social/headers/original/missing.png',
  'header_static': 'https://sciences.social/headers/original/missing.png',
  'followers_count': 11,
  'following_count': 22,
  'statuses_count': 3,
  'last_status_at': '2023-01-04',
  'noindex': False,
 

In [63]:
instance = "https://sciences.social"
mastodon = Mastodon( api_base_url=instance, access_token=token)

In [66]:
mastodon.account_lookup('@cfiesler@hci.social')

{'id': 109274913869213582,
 'username': 'cfiesler',
 'acct': 'cfiesler@hci.social',
 'display_name': 'Dr. Casey Fiesler',
 'locked': False,
 'bot': False,
 'discoverable': True,
 'group': False,
 'created_at': datetime.datetime(2022, 10, 28, 0, 0, tzinfo=tzutc()),
 'note': '<p>information science prof at university of colorado boulder, social computing / tech ethics researcher, exceptionally minuscule tiktok star, fangirl / geek, she/her</p>',
 'url': 'https://hci.social/@cfiesler',
 'avatar': 'https://cdn.masto.host/sciencessocial/cache/accounts/avatars/109/274/913/869/213/582/original/a565172c9eb85a78.jpeg',
 'avatar_static': 'https://cdn.masto.host/sciencessocial/cache/accounts/avatars/109/274/913/869/213/582/original/a565172c9eb85a78.jpeg',
 'header': 'https://cdn.masto.host/sciencessocial/cache/accounts/headers/109/274/913/869/213/582/original/ea18f1315f81f8fb.gif',
 'header_static': 'https://cdn.masto.host/sciencessocial/cache/accounts/headers/109/274/913/869/213/582/static/ea18f

In [81]:
followers = mastodon.account_followers('109274913869213582')
len(followers)

40

In [82]:
followers

[{'id': 109334421628967389,
  'username': 'anmolpanda',
  'acct': 'anmolpanda',
  'display_name': '',
  'locked': False,
  'bot': False,
  'discoverable': None,
  'group': False,
  'created_at': datetime.datetime(2022, 11, 13, 0, 0, tzinfo=tzutc()),
  'note': '',
  'url': 'https://sciences.social/@anmolpanda',
  'avatar': 'https://sciences.social/avatars/original/missing.png',
  'avatar_static': 'https://sciences.social/avatars/original/missing.png',
  'header': 'https://sciences.social/headers/original/missing.png',
  'header_static': 'https://sciences.social/headers/original/missing.png',
  'followers_count': 0,
  'following_count': 4,
  'statuses_count': 22,
  'last_status_at': datetime.datetime(2023, 5, 31, 0, 0),
  'noindex': False,
  'emojis': [],
  'roles': [],
  'fields': []},
 {'id': 110383805124278721,
  'username': 'jcschwieter',
  'acct': 'jcschwieter',
  'display_name': 'Christian Schwieter',
  'locked': False,
  'bot': False,
  'discoverable': True,
  'group': False,
  'c

In [78]:
followers_page_2 = mastodon.account_followers(id='109274913869213582', min_id=followers[-1]['id'])

In [75]:
len(followers_page_2)

40

In [79]:
followers[-1]

{'id': 109398305181061564,
 'username': 'ReneCNielsen',
 'acct': 'ReneCNielsen',
 'display_name': 'René Clausen Nielsen',
 'locked': False,
 'bot': False,
 'discoverable': True,
 'group': False,
 'created_at': datetime.datetime(2022, 11, 24, 0, 0, tzinfo=tzutc()),
 'note': '<p>Researching, designing, and prototyping how we can reach the <a href="https://sciences.social/tags/SDGs" class="mention hashtag" rel="tag">#<span>SDGs</span></a> and prevent and respond to crises through <a href="https://sciences.social/tags/DataScience" class="mention hashtag" rel="tag">#<span>DataScience</span></a> and <a href="https://sciences.social/tags/ArtificialIntelligence" class="mention hashtag" rel="tag">#<span>ArtificialIntelligence</span></a> and their intersection with <a href="https://sciences.social/tags/SocialInnovation" class="mention hashtag" rel="tag">#<span>SocialInnovation</span></a>, <a href="https://sciences.social/tags/SocialScience" class="mention hashtag" rel="tag">#<span>SocialScience<

In [80]:
followers_page_2[-1]

{'id': 109398305181061564,
 'username': 'ReneCNielsen',
 'acct': 'ReneCNielsen',
 'display_name': 'René Clausen Nielsen',
 'locked': False,
 'bot': False,
 'discoverable': True,
 'group': False,
 'created_at': datetime.datetime(2022, 11, 24, 0, 0, tzinfo=tzutc()),
 'note': '<p>Researching, designing, and prototyping how we can reach the <a href="https://sciences.social/tags/SDGs" class="mention hashtag" rel="tag">#<span>SDGs</span></a> and prevent and respond to crises through <a href="https://sciences.social/tags/DataScience" class="mention hashtag" rel="tag">#<span>DataScience</span></a> and <a href="https://sciences.social/tags/ArtificialIntelligence" class="mention hashtag" rel="tag">#<span>ArtificialIntelligence</span></a> and their intersection with <a href="https://sciences.social/tags/SocialInnovation" class="mention hashtag" rel="tag">#<span>SocialInnovation</span></a>, <a href="https://sciences.social/tags/SocialScience" class="mention hashtag" rel="tag">#<span>SocialScience<

In [62]:
start_date = datetime(2023,5,1)
end_date = datetime(2023,5,31)
username = '@cfiesler@hci.social'
toots_bw_dates = get_toots_of_user_between_dates_w_mastodon(username, start_date, end_date)

TypeError: got an unexpected keyword argument 'id'

In [60]:
len(toots_bw_dates)

1229

In [51]:
df_toots_bw_dates = pd.DataFrame(toots_bw_dates)
df_toots_bw_dates

Unnamed: 0,id,created_at,in_reply_to_id,in_reply_to_account_id,sensitive,spoiler_text,visibility,language,uri,url,...,content,filtered,reblog,account,media_attachments,mentions,tags,emojis,card,poll
0,110310727158934216,2023-05-04 13:47:35+00:00,1.103106e+17,1.092644e+17,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110310723092690475,...,"<p><span class=""h-card""><a href=""https://hci.s...",[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],"[{'id': 109264446373403895, 'username': 'jbigh...",[],[],,
1,110310435274511771,2023-05-04 12:34:07+00:00,1.103104e+17,1.101739e+17,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110310434238747237,...,"<p><span class=""h-card""><a href=""https://mas.t...",[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],"[{'id': 110173943220150153, 'username': 'edros...",[],[],,
2,110310386627468464,2023-05-04 12:21:45+00:00,1.103104e+17,1.092749e+17,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110310385594698939,...,<p>casey.prof was actually gifted to me by Goo...,[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],[],[],[],{'url': 'https://www.blog.google/products/regi...,
3,110310371903844024,2023-05-04 12:17:49+00:00,,,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110310370173540166,...,<p>I have a new social media landing page! <a ...,[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],[],[],[],"{'url': 'https://casey.prof', 'title': 'Profes...",
4,110307552862407741,2023-05-04 00:21:18+00:00,1.103061e+17,1.094162e+17,False,,public,en,https://hci.social/users/cfiesler/statuses/110...,https://hci.social/@cfiesler/110307552719077735,...,"<p><span class=""h-card""><a href=""https://wande...",[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],"[{'id': 109416203379935459, 'username': 'jdnic...",[],[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1188,109257699985414271,2022-10-30 14:29:57+00:00,1.092518e+17,1.092749e+17,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109257699997620515,...,"<p>for more on the challenges of migration, ch...",[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],"[{'id': 109274914176958397, 'username': 'plura...",[],[],{'url': 'https://doctorow.medium.com/how-to-le...,
1189,109257244402166230,2022-10-30 12:34:05+00:00,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109257244353278456,...,<p>I saw a tiktok that played this scene from ...,[],,"{'id': 109274913869213582, 'username': 'cfiesl...","[{'id': 109314182725328826, 'type': 'image', '...",[],"[{'name': 'TheGoodPlace', 'url': 'https://scie...",[],,
1190,109252899682302929,2022-10-29 18:09:10+00:00,,,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109252899659082452,...,"<p>an <a href=""https://hci.social/tags/introdu...",[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],[],"[{'name': 'Introduction', 'url': 'https://scie...",[],,
1191,109251845340402435,2022-10-29 13:41:02+00:00,1.092487e+17,1.092749e+17,False,,public,en,https://hci.social/users/cfiesler/statuses/109...,https://hci.social/@cfiesler/109251845321040242,...,<p>Someone unrolled my bird thread about this ...,[],,"{'id': 109274913869213582, 'username': 'cfiesl...",[],[],"[{'name': 'twittermigration', 'url': 'https://...",[],{'url': 'https://threadreaderapp.com/thread/15...,


In [52]:
df_toots_bw_dates.created_at.describe()

  df_toots_bw_dates.created_at.describe()


count                          1193
unique                         1193
top       2023-05-04 13:47:35+00:00
freq                              1
first     2022-10-29 00:09:33+00:00
last      2023-05-04 13:47:35+00:00
Name: created_at, dtype: object

In [44]:
test_id = df_toots_bw_dates.id.tolist()[0]
test_id

110310925158294068

In [45]:
bit_shifted = 110310925158294068 >> 16
bit_shifted

1683211138279

In [46]:
datetime.fromtimestamp(1683211138279/1000)
# datetime.datetime(2022, 11, 11, 18, 16, 4, 630000)

datetime.datetime(2023, 5, 4, 20, 8, 58, 279000)

In [54]:
(int( end_date.timestamp()) << 16)*1000

110459053670400000

In [24]:
result = parse(client.get("https://sciences.social/api/v1/accounts/109334421628967389"))
result["username"]

'anmolpanda'

In [25]:
statuses = parse(client.get("https://sciences.social/api/v1/timelines/tag/democrats?limit=100"))
len(statuses)

40

In [26]:
display.HTML(statuses[5]['content'])