# Minimal reproducible example for Reddit API

Copied from https://towardsdatascience.com/how-to-use-the-reddit-api-in-python-5e05ddfd1e5c

Sign up [here](https://www.reddit.com/prefs/apps), create an app and place a "credentials.json" file in data folder with following structure:

```
{
    "personal_use_script": APP_CODE,
    "secret": APP_SECRET,
    "username": YOUR_USERNAME, 
    "password": YOUR_PASSWORD
}
```

In [1]:
import json
import requests
import pandas as pd
from datetime import datetime

In [2]:
# Load credentials data
with open("data/credentials.json") as f:
    credentials = json.load(f)

# Authenticate API
client_auth = requests.auth.HTTPBasicAuth(credentials["personal_use_script"], credentials["secret"])

credentials = {
    'grant_type': 'password',
    'username': credentials["username"],
    'password': credentials["password"]
}
headers = {'User-Agent': 'myBot/0.0.1'}

# send authentication request for OAuth token
res = requests.post('https://www.reddit.com/api/v1/access_token',
                    auth=client_auth, data=credentials, headers=headers)
# extract token from response and format correctly
token = f"bearer {res.json()['access_token']}"
# update API headers with authorization (bearer token)
headers = {**headers, **{'Authorization': token}}

params = {'limit': 100}

## First request example (uninteresting)

In [None]:
# we use this function to convert responses to dataframes
def df_from_response(res):
    # initialize temp dataframe for batch of data in response
    df = pd.DataFrame()

    # loop through each post pulled from res and append to df
    for post in res.json()['data']['children']:
        df = df.append({
            'subreddit': post['data']['subreddit'],
            'title': post['data']['title'],
            'selftext': post['data']['selftext'],
            'upvote_ratio': post['data']['upvote_ratio'],
            'ups': post['data']['ups'],
            'downs': post['data']['downs'],
            'score': post['data']['score'],
            'link_flair_css_class': post['data']['link_flair_css_class'],
            'created_utc': datetime.fromtimestamp(post['data']['created_utc']).strftime('%Y-%m-%dT%H:%M:%SZ'),
            'id': post['data']['id'],
            'kind': post['kind']
        }, ignore_index=True)

    return df

# initialize dataframe and parameters for pulling data in loop
data = pd.DataFrame()

# loop through 10 times (returning 1K posts)
for i in range(1):
    # make request
    res = requests.get("https://oauth.reddit.com/r/python/new",
                       headers=headers,
                       params=params)

    # get dataframe from response
    new_df = df_from_response(res)
    # take the final row (oldest entry)
    row = new_df.iloc[len(new_df)-1]
    # create fullname
    fullname = row['kind'] + '_' + row['id']
    # add/update fullname in params
    params['after'] = fullname
    
    # append new_df to data
    data = data.append(new_df, ignore_index=True)
    
res.json()

## Getting user data
Unfortunately, the full account name is needed and not anonymized version as returned via API

In [26]:
# https://github.com/reddit-archive/reddit/wiki/JSON
# https://www.reddit.com/dev/api/#GET_user_{username}_{where}

#user_example = "t2_aj1ll"
user_example = "fk_uni_account"
user_example = "Forsaken_Citron9931"

res = requests.get(f"https://oauth.reddit.com/user/{user_example}/about.json",
                    headers=headers,
                    params=params)
res.json()

{'kind': 't2',
 'data': {'is_employee': False,
  'is_friend': False,
  'subreddit': {'default_set': True,
   'user_is_contributor': False,
   'banner_img': '',
   'allowed_media_in_comments': [],
   'user_is_banned': False,
   'free_form_reports': True,
   'community_icon': None,
   'show_media': True,
   'icon_color': '',
   'user_is_muted': None,
   'display_name': 'u_Forsaken_Citron9931',
   'header_img': None,
   'title': 'Praveen',
   'previous_names': [],
   'over_18': False,
   'icon_size': [256, 256],
   'primary_color': '',
   'icon_img': 'https://styles.redditmedia.com/t5_4bwkmf/styles/profileIcon_snoo62bb8599-05b2-47e8-aa3e-f209732744e2-headshot.png?width=256&amp;height=256&amp;crop=256:256,smart&amp;v=enabled&amp;s=aee0d74dcdff894380bac44033b08b65097af581',
   'description': '',
   'submit_link_label': '',
   'header_size': None,
   'restrict_posting': True,
   'restrict_commenting': False,
   'subscribers': 0,
   'submit_text_label': '',
   'is_default_icon': False,
   'li

## Getting post data

Includes a lot of different (mainly uninteresting) features for requested post AND comments (with unique but anonymized author ids)

In [9]:
# Sample posts
# https://old.reddit.com/r/DemocraticSocialism/comments/x2smmw/cities_without_water_is_our_future_the_leaders/
# https://old.reddit.com/r/EduWriters/comments/x2slk5/how_to_start_a_climate_change_essay/ # Banned
# https://old.reddit.com/r/NoStupidQuestions/comments/x2slxy/sharks_and_climate_change/

# Checking posts of same user
# https://www.reddit.com/r/ATBGE/comments/12jiij7/vietnamese_brewery_vinaken_makes_a_lager_vinaken/
# https://www.reddit.com/r/mildlyinfuriating/comments/12rp9o7/my_genuine_leather_calvin_klein_belt_got_home/
# https://www.reddit.com/r/AskSF/comments/o7fujj/best_alcatraz_tour_for_a_first_timer/h30fph1/?context=3
# anonymized username stays identical!

res = requests.get(f"https://oauth.reddit.com/r/mildlyinfuriating/comments/12rp9o7",
                    headers=headers,
                    params=params)
res.json()

[{'kind': 'Listing',
  'data': {'after': None,
   'dist': 1,
   'modhash': None,
   'geo_filter': '',
   'children': [{'kind': 't3',
     'data': {'approved_at_utc': None,
      'subreddit': 'mildlyinfuriating',
      'selftext': '',
      'user_reports': [],
      'saved': False,
      'mod_reason_title': None,
      'gilded': 0,
      'clicked': False,
      'title': 'My "genuine leather" Calvin Klein belt. Got home, tore off the tag which peeled back the plastic coating revealing the rubber core.',
      'link_flair_richtext': [],
      'subreddit_name_prefixed': 'r/mildlyinfuriating',
      'hidden': False,
      'pwls': 6,
      'link_flair_css_class': None,
      'downs': 0,
      'thumbnail_height': 140,
      'top_awarded_type': None,
      'parent_whitelist_status': 'all_ads',
      'hide_score': False,
      'name': 't3_12rp9o7',
      'quarantine': False,
      'link_flair_text_color': 'dark',
      'upvote_ratio': 0.89,
      'author_flair_background_color': None,
      'su

In [49]:
# Check https headers for access limits
# https://github.com/reddit-archive/reddit/wiki/API
dict(res.headers)#["X-Ratelimit-Remaining"] # FIXME Not available!

{'Connection': 'keep-alive',
 'Content-Length': '2918',
 'x-ua-compatible': 'IE=edge',
 'content-type': 'application/json; charset=UTF-8',
 'expires': '-1',
 'cache-control': 'private, s-maxage=0, max-age=0, must-revalidate, no-store',
 'x-ratelimit-remaining': '599.0',
 'x-ratelimit-used': '1',
 'x-ratelimit-reset': '487',
 'content-encoding': 'gzip',
 'x-moose': 'majestic',
 'Accept-Ranges': 'bytes',
 'Date': 'Sun, 16 Apr 2023 18:41:53 GMT',
 'Via': '1.1 varnish',
 'Vary': 'accept-encoding',
 'Strict-Transport-Security': 'max-age=31536000; includeSubdomains',
 'X-Content-Type-Options': 'nosniff',
 'X-Frame-Options': 'SAMEORIGIN',
 'X-XSS-Protection': '1; mode=block',
 'Set-Cookie': 'loid=000000000800jg427h.2.1679942404000.Z0FBQUFBQmtQRUZ4TUwwVlRDcGhHVkFwZXd2LTZaX0NXa1pZX2w5akhXbDBZZXV0bG9odVNra3JTNXdRbXoydzJUUVNScFYzbVRyaDI5alczdmdOa1pYeFdfa09ESHAwNjBQbGJxaDBwdXNuUG0xc3E2aVBKdlZkTkNkcEpZRVJ6dUpJaE9vVVFvQ1Y; Domain=reddit.com; Max-Age=63071999; Path=/; expires=Tue, 15-Apr-2025 18:41:5