In [1]:
%load_ext dotenv
%dotenv

In [4]:
import praw # reddit API wrapper
import re
import numpy as np
import pandas as pd
import os

In [5]:
reddit = praw.Reddit(client_id=os.environ.get('REDDIT_CLIENT_ID'),
                     client_secret=os.environ.get('REDDIT_CLIENT_SECRET'),
                     user_agent=os.environ.get('REDDIT_USER_AGENT'))

## List of Spotify User IDs

In [4]:
def get_spotify_user_ids(sub):
    '''
    Searches a subreddit to obtain spotify user IDs from each spotify playlist embedding
    
    Parameters:
    sub is the name of the subreddit to search through for spotify playlist posts
    
    Returns:
    a list of spotify user IDs. One ID for each playlist post that is found
    
    Notes:
    The reddit API only allows viewing of about 1000 posts. This function uses each of the subreddit post sorting
    methods in order to partially get around this but I have not yet confirmed that at least a few unique results
    are being obtained from each additional sorting method.
    '''
    
    sorters = ['controversial', 'gilded', 'hot', 'new', 'rising', 'top']
    sub_instance = reddit.subreddit(sub)
    users = []
    for sorter in sorters:
        gen = getattr(sub_instance, sorter)()
        for post in gen:
            try:
                curr = post.media
                if curr == None:
                    continue
                if curr['type'] != 'open.spotify.com':
                    continue
                x = re.findall("user%2F(.*?)%2F", post.media['oembed']['html'])
                users.append(x[0])
            except Exception as e:
                print(e)
    return users

In [5]:
def add_new_users(filename, potential_new_users, old_users_filename):
    '''
    Adds new users to a list of users if the new users are not already in the old list.
    This function also prints the number of new users that were actually added.
    
    Parameters:
    filename is the name of the json that will be saved after combining users (str)
    potential_new_users is a list of the new users to be considered for adding (list)
    old_users_filename is a json containing all of the old users (str)
    
    Returns:
    Nothing. The combined list is saved as a json.
    '''
    
    old = pd.read_json(old_users_filename)[0]
    combined = list(set(potential_new_users)|set(old))
    pd.DataFrame(combined).to_json(filename)
    print(f'{len(combined) - len(old)} users added')

In [6]:
def confirm_and_add_users(filename, old_users_filename, subreddit):
    '''
    Searches a subreddit to obtain spotify user IDs from each spotify playlist embedding. 
    The function then combines existing user IDs with the new IDs, keeping only unique IDs.
    This function also prints the number of new users that were actually added.
    
    Parameters:
    filename is the name of the json that will be saved after combining users (str)
    old_users_filename is a json containing all of the old users (str)
    subreddit is the name of the subreddit to search through for spotify playlist posts (str)
    
    Returns:
    Nothing. The combined list is saved as a json.
    '''
    potential_new_users = get_spotify_user_ids(subreddit)
    add_new_users(filename, potential_new_users, old_users_filename)

In [14]:
confirm_and_add_users('users_9_15_19.json', 'sunday_users', 'spotifyplaylists')

list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
'Comment' object has no attribute 'media'
'Comment' object has no attribute 'media'
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list