In [1]:
import requests
from requests.api import request
import time
import json
import re
import random
import os
import pickle
import trio
from enum import Enum

PREVIOUS_REQUESTS_BACKUP_PATH = "./previousRequests/"

def get_valid_filename(name):
    s = str(name).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w.]", "", s)
    if s in {"", ".", ".."}:
        return "ErrorFileName" + str(random.randrange(100000))
    return s

def get_valid_path(path):
    s = str(path).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w./]", "", s)
    if s in {"", ".", ".."}:
        return "ErrorFileName" + str(random.randrange(100000))
    return s

def dumpAPIRequest(fileName, data, path=PREVIOUS_REQUESTS_BACKUP_PATH):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".bin"
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "wb") as outputFile:
        pickle.dump(data, outputFile)
    return fullFileName

def loadAPIRequest(fileName):
    fullFileName = PREVIOUS_REQUESTS_BACKUP_PATH + get_valid_filename(fileName) +".bin"
    if not os.path.exists(fullFileName):
        return {}
    with open(fullFileName, "rb") as inputFile:
        data = pickle.load(inputFile)
    return data

def checkForAPIRequest(url):
    return os.path.exists(PREVIOUS_REQUESTS_BACKUP_PATH + get_valid_filename(url) +".bin")

def dumpJSON(fileName, data, path="./"):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".json"
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "w") as outputFile:
        json.dump(data, outputFile, indent = 6)
    return fullFileName

def loadJSON(fullFileName):
    if not os.path.exists(fullFileName):
        return {}
    with open(fullFileName, "r") as inputFile:
        data = json.load(inputFile)
    return data

def checkForJSON(fileName, path):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".json"
    return loadJSON(fullFileName) if os.path.exists(fullFileName) else {}

def nurseryReturn(nursery, assignee, location, func, *args):
    async def getReturn():
        assignee[location] = await func(*args)
    nursery.start_soon(getReturn)

In [2]:
SCRATCH_API = "https://api.scratch.mit.edu"
WAIT_TIME = 0.15
lastRequestTime = 0
timerLock = trio.Lock()
markLock = trio.Lock()
AUTH_ADDITION = "?x-token="
MARK_DATA = {"mark"}

async def apiRequest(url):
    async with markLock:
        check = checkForAPIRequest(url)
        if not check:
            dumpAPIRequest(url, MARK_DATA)
    if check:
        data = loadAPIRequest(url)
        while data == MARK_DATA:
            await trio.sleep(0.1)
            data = loadAPIRequest(url)
        return data
    
    async with timerLock:
        global lastRequestTime
        sleepTime = max(lastRequestTime+WAIT_TIME-time.time(), 0)
        # while(time.time() < lastRequestTime+WAIT_TIME):
        #     pass
        lastRequestTime = time.time()
    await trio.sleep(sleepTime)

    response = requests.get(url)
    data = response.json() if response.ok else {}

    dumpAPIRequest(url, data)

    return data

async def getAllResults(url):
    limit = 40
    url = url + "?limit=" + str(limit) + "&offset="
    offset = 0
    singleList = await apiRequest(url + str(offset))
    all = singleList
    while(len(singleList) >= limit):
        offset += limit
        singleList = await apiRequest(url + str(offset))
        if(len(singleList) > 0):
            all += singleList
    return all

async def getAllResultsDateBased(url):
    limit = 40
    url = url + "?limit=" + str(limit)
    singleList = await apiRequest(url)
    all = singleList
    while(len(singleList) >= limit):
        dateLimit = all[-1].datetime_created
        singleList = await apiRequest(url + "?dateLimit=" + str(dateLimit))
        if(len(singleList) > 0):
            overlapIndex = singleList.index(all[-1])+1
            if(overlapIndex < len(singleList)):
                truncatedList = singleList[(singleList.index(all[-1])+1):]
                if(len(truncatedList) > 0):
                    all += truncatedList
    return all

async def getCommentsWithReplies(url):
    comments = await getAllResults(url)
    async with trio.open_nursery() as nursery:
        for comment in comments:
            if("reply_count" in comment.keys() and comment["reply_count"] > 0):
                # async def getReplies():
                #     comment["replies"] = await getAllResults(url + "/" + str(comment["id"]) + "/replies")
                # nursery.start_soon(getReplies)
                nurseryReturn(nursery, comment, "replies", getAllResults, url + "/" + str(comment["id"]) + "/replies")
                
            else:
                comment["replies"] = {}
                comment["reply_count"] = 0
    return comments


In [3]:
# project calls

projectsInfoAPIAddition = "/projects/"
projectInfoAPI = SCRATCH_API + projectsInfoAPIAddition

async def getProjectInfo(projectID):
    return await apiRequest(projectInfoAPI + str(projectID))

projectRemixesAPIAddition = "/remixes"
async def getProjectRemixes(projectID):
    return await getAllResults(projectInfoAPI + str(projectID) + projectRemixesAPIAddition)


# bonus helper function
userIDs = {}
async def getProjectUserName(projectID, userID=None):
    if userID is None or userID not in userIDs.keys():
        projectInfo = await getProjectInfo(projectID)
        if(projectInfo != {}):
            userIDs[userID] = projectInfo["author"]["username"]
            return userIDs[userID]
        else:
            return ""
    else:
        return userIDs[userID]


In [4]:
# studio calls

studioInfoAPIAddition = "/studios/"
studioInfoAPI = SCRATCH_API + studioInfoAPIAddition
async def getStudioInfo(studioID):
    return await apiRequest(studioInfoAPI + str(studioID))

studioActivityAPIAddition = "/activity"
async def getStudioActivity(studioID):
    return await getAllResults(studioInfoAPI + str(studioID) + studioActivityAPIAddition)

studioCommentsAPIAddition = "/comments"
async def getStudioComments(studioID):
    return await getCommentsWithReplies(studioInfoAPI + str(studioID) + studioCommentsAPIAddition)

studioCuratorsAPIAddition = "/curators"
async def getStudioCurators(studioID):
    return await getAllResults(studioInfoAPI + str(studioID) + studioCuratorsAPIAddition)

studioManagersAPIAddition = "/managers"
async def getStudioManagers(studioID):
    return await getAllResults(studioInfoAPI + str(studioID) + studioManagersAPIAddition)

studioProjectsAPIAddition = "/projects"
async def getStudioProjects(studioID):
    studioProjects = await getAllResults(studioInfoAPI + str(studioID) + studioProjectsAPIAddition)
    async with trio.open_nursery() as nursery:
        for i in range(len(studioProjects)):
            nurseryReturn(nursery, studioProjects, i, getProjectInfo, studioProjects[i]["id"])
    return studioProjects

studioUserRoleAPIAddition = "/users/"
async def getStudioUserRole(studioID, userName, authToken):
    return await getAllResults(studioInfoAPI + str(studioID) + studioUserRoleAPIAddition + userName + AUTH_ADDITION + authToken)

In [5]:
# user calls

userInfoAPIAddition = "/users/"
userInfoAPI = SCRATCH_API + userInfoAPIAddition
async def getUserInfo(userName):
    return await apiRequest(userInfoAPI + str(userName))

userFavoritesAPIAddition = "/favorites"
async def getUserFavorites(userName):
    projects = await getAllResults(userInfoAPI + str(userName) + userFavoritesAPIAddition)
    async with trio.open_nursery() as nursery:
        for project in projects:
            nurseryReturn(nursery, project["author"], "username", getProjectUserName, project["id"], project["author"]["id"])
    return projects

userFollowersAPIAddition = "/followers"
async def getUserFollowers(userName):
    return await getAllResults(userInfoAPI + str(userName) + userFollowersAPIAddition)

userFollowingAPIAddition = "/following"
async def getUserFollowing(userName):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingAPIAddition)

userFollowingStudiosAPIAddition = userFollowingAPIAddition + "/studios/projects"
async def getUserFollowingStudios(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingStudiosAPIAddition + AUTH_ADDITION + authToken)

userFollowingUsersActivityAPIAddition = userFollowingAPIAddition + "/users/activity"
async def getUserFollowingUsersActivity(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingUsersActivityAPIAddition + AUTH_ADDITION + authToken)

userFollowingUsersLovesAPIAddition = userFollowingAPIAddition + "/users/loves"
async def getUserFollowingUsersLoves(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingUsersLovesAPIAddition + AUTH_ADDITION + authToken)

userFollowingUsersProjectsAPIAddition = userFollowingAPIAddition + "/users/projects"
async def getUserFollowingUsersProjects(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingUsersProjectsAPIAddition + AUTH_ADDITION + authToken)

userInvitesAPIAddition = "/invites"
async def getUserInvites(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userInvitesAPIAddition + AUTH_ADDITION + authToken)

userMessagesAPIAddition = "/messages"
async def getUserMessages(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userMessagesAPIAddition + AUTH_ADDITION + authToken)

userAlertsAPIAddition = "/messages/admin"
async def getUserAlerts(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userAlertsAPIAddition + AUTH_ADDITION + authToken)

userUnreadMessagesCountAPIAddition = "/messages/count"
async def getUserUnreadMessagesCount(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userUnreadMessagesCountAPIAddition + AUTH_ADDITION + authToken)

userProjectsAPIAddition = "/projects"
async def getUserProjects(userName):
    projects = await getAllResults(userInfoAPI + str(userName) + userProjectsAPIAddition)
    for project in projects:
        project["author"]["username"] = userName
    return projects

userRecentlyViewedAPIAddition = "/projects/recentlyviewed"
async def getUserRecentlyViewed(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userRecentlyViewedAPIAddition + AUTH_ADDITION + authToken)

userProjectCommentsAPIAddition = "/comments"
userProjectsAPIAdditionMiddle = "/projects/"
async def getUserProjectComments(userName, projectID):
    return await getCommentsWithReplies(userInfoAPI + str(userName) + userProjectsAPIAdditionMiddle + str(projectID) + userProjectCommentsAPIAddition)

userProjectStudiosAPIAddition = "/studios"
async def getUserProjectStudiosAPI(userName, projectID):
    return await getAllResults(userInfoAPI + str(userName) + userProjectsAPIAdditionMiddle + str(projectID) + userProjectStudiosAPIAddition)

userUnsharedProjectAPIAddition = "/visibility"
async def getUserUnsharedProject(userName, projectID):
    return await getCommentsWithReplies(userInfoAPI + str(userName) + userProjectsAPIAdditionMiddle + str(projectID) + userUnsharedProjectAPIAddition)

userStudiosAPIAddition = "/studios/curate"
async def getUserStudios(userName):
    return await getAllResults(userInfoAPI + str(userName) + userStudiosAPIAddition)

In [6]:
async def getProjectData(userName, projectID, level=0, outputPath="./", authUserName=None, authToken=None):
    project = {}
    project["projectInfo"] = await getProjectInfo(projectID)
    if(project["projectInfo"] == {}):
        return ""
    
    outputPath += get_valid_filename(project["projectInfo"]["title"]) + "/"
    print(outputPath)

    fileName = project["projectInfo"]["title"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    
    async with trio.open_nursery() as nursery:
        nurseryReturn(nursery, project, "projectRemixes", getProjectRemixes, projectID)
        nurseryReturn(nursery, project, "projectStudios", getUserProjectStudiosAPI, userName, projectID)
        nurseryReturn(nursery, project, "projectComments", getUserProjectComments, userName, projectID)
    
    if level > 0:
        level -= 1
        async with trio.open_nursery() as nursery:
            for projectRemix in project["projectRemixes"]:
                nurseryReturn(nursery, projectRemix, "projectData", getProjectData, projectRemix["author"]["username"], 
                                                            projectRemix["id"],
                                                            level,
                                                            outputPath + "remixes/",
                                                            authUserName,
                                                            authToken)
            for projectStudio in project["projectStudios"]:
                nurseryReturn(nursery, projectStudio, "studioData", getStudioData, projectStudio["id"],
                                                            level,
                                                            outputPath + "studios/",
                                                            authUserName,
                                                            authToken)
            for projectComment in project["projectComments"]:
                nurseryReturn(nursery, projectComment, "userData", getUserData, projectComment["author"]["username"],
                                                            level,
                                                            outputPath + "commentUsers/",
                                                            authUserName,
                                                            authToken)
    
    outputFileName = dumpJSON(fileName, project, outputPath)
    return outputFileName

async def getStudioData(studioID, level=0, outputPath="./", authUserName=None, authToken=None):
    studio = {}
    studio["studioInfo"] = await getStudioInfo(studioID)
    
    outputPath += get_valid_filename(studio["studioInfo"]["title"]) + "/"
    print(outputPath)

    fileName = studio["studioInfo"]["title"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    
    async with trio.open_nursery() as nursery:
        nurseryReturn(nursery, studio, "studioActivity", getStudioActivity, studioID)
        nurseryReturn(nursery, studio, "studioComments", getStudioComments, studioID)
        nurseryReturn(nursery, studio, "studioCurators", getStudioCurators, studioID)
        nurseryReturn(nursery, studio, "studioManagers", getStudioManagers, studioID)
        nurseryReturn(nursery, studio, "studioProjects", getStudioProjects, studioID)
        if authUserName is not None:
            nurseryReturn(nursery, studio, "studioUserRole", getStudioUserRole, studioID, authUserName, authToken)
    
    if level > 0:
        level -= 1
        async with trio.open_nursery() as nursery:
            for studioCurator in studio["studioCurators"]:
                nurseryReturn(nursery, studioCurator, "userData", getUserData, studioCurator["username"],
                                                            level,
                                                            outputPath + "curators/",
                                                            authUserName,
                                                            authToken)
            for studioManager in studio["studioManagers"]:
                nurseryReturn(nursery, studioManager, "userData", getUserData, studioManager["username"],
                                                            level,
                                                            outputPath + "managers/",
                                                            authUserName,
                                                            authToken)
            for studioProject in studio["studioProjects"]:
                nurseryReturn(nursery, studioProject, "projectData", getProjectData, studioProject["author"]["username"], 
                                                            studioProject["id"],
                                                            level,
                                                            outputPath + "projects/",
                                                            authUserName,
                                                            authToken)
    
    outputFileName = dumpJSON(fileName, studio, outputPath)
    return outputFileName

async def getUserData(userName, level=0, outputPath="./", authUserName=None, authToken=None):
    user = {}
    user["userInfo"] = await getUserInfo(userName)
    
    outputPath += get_valid_filename(userName) + "/"
    print(outputPath)
    fileName = userName
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    
    async with trio.open_nursery() as nursery:
        nurseryReturn(nursery, user, "userFavorites", getUserFavorites, userName)
        nurseryReturn(nursery, user, "userFollowers", getUserFollowers, userName)
        nurseryReturn(nursery, user, "userFollowing", getUserFollowing, userName)
        nurseryReturn(nursery, user, "userProjects", getUserProjects, userName)
        nurseryReturn(nursery, user, "userStudios", getUserStudios, userName)
        if userName == authUserName:
            frontPageStuffName = "userCurrentFrontPage"
            user[frontPageStuffName] = {}
            nurseryReturn(nursery, user[frontPageStuffName], "Projects in Studios I'm Following", getUserFollowingStudios, authUserName, authToken)
            nurseryReturn(nursery, user[frontPageStuffName], "What's Happening?", getUserFollowingUsersActivity, authUserName, authToken)
            nurseryReturn(nursery, user[frontPageStuffName], "Projects Loved by Scratchers I'm Following", getUserFollowingUsersLoves, authUserName, authToken)
            nurseryReturn(nursery, user[frontPageStuffName], "Projects by Scratchers I'm Following", getUserFollowingUsersProjects, authUserName, authToken)
            notifications = "userNotifications"
            user[notifications] = {}
            nurseryReturn(nursery, user[notifications], "invites", getUserInvites, authUserName, authToken)
            nurseryReturn(nursery, user[notifications], "messages", getUserMessages, authUserName, authToken)
            nurseryReturn(nursery, user[notifications], "alerts", getUserAlerts, authUserName, authToken)
            nurseryReturn(nursery, user[notifications], "unreadMessagesCount", getUserUnreadMessagesCount, authUserName, authToken)
            nurseryReturn(nursery, user, "userRecentlyViewed", getUserRecentlyViewed, authUserName, authToken)
    if level > 0:
        level -= 1
        async with trio.open_nursery() as nursery:
            for userFavorite in user["userFavorites"]:
                nurseryReturn(nursery, userFavorite, "projectData", getProjectData, userFavorite["author"]["username"], 
                                                            userFavorite["id"],
                                                            level,
                                                            outputPath + "favorites/",
                                                            authUserName,
                                                            authToken)
            for userFollower in user["userFollowers"]:
                nurseryReturn(nursery, userFollower, "userData", getUserData, userFollower["username"],
                                                            level,
                                                            outputPath + "followers/",
                                                            authUserName,
                                                            authToken)
            for userFollow in user["userFollowing"]:
                nurseryReturn(nursery, userFollow, "userData", getUserData, userFollow["username"],
                                                            level,
                                                            outputPath + "following/",
                                                            authUserName,
                                                            authToken)
            for userProject in user["userProjects"]:
                nurseryReturn(nursery, userProject, "projectData", getProjectData, userProject["author"]["username"], 
                                                            userProject["id"],
                                                            level,
                                                            outputPath + "projects/",
                                                            authUserName,
                                                            authToken)
            for userStudio in user["userStudios"]:
                nurseryReturn(nursery, userStudio, "studioData", getStudioData, userStudio["id"],
                                                            level,
                                                            outputPath + "studios/",
                                                            authUserName,
                                                            authToken)
    
    outputFileName = dumpJSON(fileName, user, outputPath)
    return outputFileName

In [11]:
for fileName in os.listdir(PREVIOUS_REQUESTS_BACKUP_PATH):
    fileDirectory = os.path.join(PREVIOUS_REQUESTS_BACKUP_PATH, fileName)
    delete = False
    with open(fileDirectory, "rb") as inputFile:
        delete = pickle.load(inputFile) == MARK_DATA
    if delete:
        os.remove(fileDirectory)

In [12]:
authData = loadJSON("scratchAuthenticationToken.json")
trio.run(getUserData, authData["username"], 4, "./", authData["username"], authData["x-token"])


./fespadea/
./fespadea/favorites/mario_vs_luigi_part_2/
./fespadea/favorites/SuperKart/
./fespadea/favorites/3D_Perspective_Drawing_Tool/
./fespadea/favorites/Extreme_Animating/
./fespadea/favorites/Mr._Ski/
./fespadea/favorites/Jetpack_Racer/
./fespadea/favorites/Bit.Byte/
./fespadea/favorites/Invisible/
./fespadea/favorites/Pixel_Battle/
./fespadea/favorites/You-Topia/
./fespadea/favorites/Mr._Ski_2/
./fespadea/favorites/The_Man/
./fespadea/favorites/Hideout_Tycoon_-_Broken_with_Scratch_3/
./fespadea/favorites/Explosion_the_Racer_v2.8/
./fespadea/favorites/Youre_Driving_me_Crazy_AMV/
./fespadea/favorites/Minecraft_3D_v9/
./fespadea/favorites/Switchy/
./fespadea/favorites/How_to_Lip_Sync/
./fespadea/favorites/Learning_Simulator_with_Michael_Bolton/
./fespadea/favorites/Interesting_Facts/
./fespadea/favorites/Basketball/
./fespadea/favorites/Pixel_Village_v1.0.0/
./fespadea/favorites/Flappy_Bird_Scratch_Edition_remix/
./fespadea/favorites/Mario_Party_Remix_Beta_IDontCareImReleasingThis