In [1]:
import os
import time
import re
import praw
import json
import numpy as np
import pandas as pd
import pickle

In [2]:
# Initialize the Python Reddit API wrapper (PRAW)
PRAW_CLIENT_ID = os.getenv('PRAW_CLIENT_ID')
PRAW_CLIENT_SECRET = os.getenv('PRAW_CLIENT_SECRET')
PRAW_USER_AGENT = os.getenv('PRAW_USER_AGENT')

reddit = praw.Reddit(client_id=PRAW_CLIENT_ID,
                     client_secret=PRAW_CLIENT_SECRET,
                     user_agent=PRAW_USER_AGENT)

In [3]:
states_list = [
['Alabama', 'AL', 1], ['Alaska', 'AK', 2], ['Arizona', 'AZ', 4], ['Arkansas', 'AR', 5],
['California', 'CA', 6], ['Colorado', 'CO', 8], ['Connecticut', 'CT', 9], ['Delaware', 'DE', 10],
['Washington DC', 'DC', 11], ['Florida', 'FL', 12], ['Georgia', 'GA', 13],
['Hawaii', 'HI', 15], ['Idaho', 'ID', 16], ['Illinois', 'IL', 17], ['Indiana', 'IN', 18],
['Iowa', 'IA', 19], ['Kansas', 'KS', 20], ['Kentucky', 'KY', 21], ['Louisiana', 'LA', 22],
['Maine', 'ME', 23], ['Maryland', 'MD', 24], ['Massachusetts', 'MA', 25], ['Michigan', 'MI', 26],
['Minnesota', 'MN', 27], ['Mississippi', 'MS', 28],
['Missouri', 'MO', 29], ['Montana', 'MT', 30], ['Nebraska', 'NE', 31], ['Nevada', 'NV', 32],
['New Hampshire', 'NH', 33], ['New Jersey', 'NJ', 34], ['New Mexico', 'NM', 35], 
['New York', 'NY', 36], ['North Carolina', 'NC', 37],
['North Dakota', 'ND', 38], ['Ohio', 'OH', 39], ['Oklahoma', 'OK', 40], ['Oregon', 'OR', 41],
['Pennsylvania', 'PA', 42], ['Rhode Island', 'RI', 44],
['South Carolina', 'SC', 45], ['South Dakota', 'SD', 46], ['Tennessee', 'TN', 47], ['Texas', 'TX', 48],
['Utah', 'UT', 49], ['Vermont', 'VT', 50], ['Virginia', 'VA', 51], ['Washington', 'WA', 53],
['West Virginia', 'WV', 54], ['Wisconsin', 'WI', 55], ['Wyoming', 'WY', 56]
]

In [4]:
# https://www.reddit.com/dev/api/#POST_api_search_subreddits
sub_state_dict = {}
for row in states_list:
    state = row[0]
    state_abbr = row[1]
    state_fip = row[2]

    sub = reddit.post('api/search_subreddits', data={'query': state})
    if len(sub['subreddits']) > 0:
        sub_state_dict[state_fip] = [
                                        state,
                                        state_abbr,
                                        state_fip,
                                        sub['subreddits'][0]['name'].strip(),
                                        sub['subreddits'][0]['subscriber_count']
                                    ]
    else:
        print(f'Subreddit not found for: {state}')

In [5]:
df_state_subs = pd.DataFrame.from_dict(sub_state_dict, orient='index').reset_index(drop=True)
df_state_subs = df_state_subs.rename({0: 'state_name', 1: 'state_abbr', 2: 'state_fip', 3: 'state_sub', 4: 'sub_cnt'}, axis=1)

Note that the retrieved sub-reddits may not be valid so manually check later

In [8]:
pickle.dump(df_state_subs, open('../data/df_state_subs_raw.pkl', 'wb'))