In [12]:
import requests
from requests.api import request
import time
import json
import re
import random
import os
import pickle
import trio

previousRequestsBackupPath = "./previousRequests/"

def get_valid_filename(name):
    s = str(name).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w.]", "", s)
    if s in {"", ".", ".."}:
        return "ErrorFileName" + str(random.randrange(100000))
    return s

def get_valid_path(path):
    s = str(path).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w./]", "", s)
    if s in {"", ".", ".."}:
        return "ErrorFileName" + str(random.randrange(100000))
    return s

def dumpAPIRequest(fileName, data, path=previousRequestsBackupPath):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".bin"
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "wb") as outputFile:
        pickle.dump(data, outputFile)
    return fullFileName

def loadAPIRequest(fileName):
    fullFileName = previousRequestsBackupPath + get_valid_filename(fileName) +".bin"
    if not os.path.exists(fullFileName):
        return {}
    with open(fullFileName, "rb") as inputFile:
        data = pickle.load(inputFile)
    return data

def checkForAPIRequest(url):
    return os.path.exists(previousRequestsBackupPath + get_valid_filename(url) +".bin")

def dumpJSON(fileName, data, path="./"):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".json"
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "w") as outputFile:
        json.dump(data, outputFile, indent = 6)
    return fullFileName

def loadJSON(fullFileName):
    if not os.path.exists(fullFileName):
        return {}
    with open(fullFileName, "r") as inputFile:
        data = json.load(inputFile)
    return data

def checkForJSON(fileName, path):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".json"
    return loadJSON(fullFileName) if os.path.exists(fullFileName) else {}


In [14]:
scratchAPI = "https://api.scratch.mit.edu"
lastRequestTime = 0
WAIT_TIME = 0.15
timerLock = trio.Lock()

async def apiRequest(url):
    if checkForAPIRequest(url):
        return loadAPIRequest(url)
    
    async with timerLock:
        sleepTime = max(lastRequestTime+WAIT_TIME-time.time(), 0)
        # while(time.time() < lastRequestTime+WAIT_TIME):
        #     pass
        lastRequestTime = time.time()
    await trio.sleep(sleepTime)

    response = requests.get(url)
    data = response.json() if response.ok else {}

    dumpAPIRequest(url, data)

    return data

async def getAllResults(url):
    limit = 40
    url = url + "?limit=" + str(limit) + "&offset="
    offset = 0
    singleList = await apiRequest(url + str(offset))
    all = singleList
    while(len(singleList) >= limit):
        offset += limit
        singleList = await apiRequest(url + str(offset))
        if(len(singleList) > 0):
            all += singleList
    return all

async def getAllResultsDateBased(url):
    limit = 40
    url = url + "?limit=" + str(limit)
    singleList = await apiRequest(url)
    all = singleList
    while(len(singleList) >= limit):
        dateLimit = all[-1].datetime_created
        singleList = await apiRequest(url + "?dateLimit=" + str(dateLimit))
        if(len(singleList) > 0):
            overlapIndex = singleList.index(all[-1])+1
            if(overlapIndex < len(singleList)):
                truncatedList = singleList[(singleList.index(all[-1])+1):]
                if(len(truncatedList) > 0):
                    all += truncatedList
    return all

async def getCommentsWithReplies(url):
    comments = await getAllResults(url)
    async with trio.open_nursery() as nursery:
        for comment in comments:
            if("reply_count" in comment.keys() and comment["reply_count"] > 0):
                async def getReplies():
                    comment["replies"] = await getAllResults(url + "/" + str(comment["id"]) + "/replies")
                nursery.start_soon(getReplies)
                
            else:
                comment["replies"] = {}
                comment["reply_count"] = 0
    return comments


projectInfoAPI = "/projects/"
async def getProjectInfo(projectID):
    return await apiRequest(scratchAPI + projectInfoAPI + str(projectID))

async def getProjectUserName(projectID):
    return await getProjectInfo(projectID)["author"]["username"]


userInfoAPI = "/users/"
async def getUserInfo(userName):
    return await apiRequest(scratchAPI + userInfoAPI + str(userName))

userFavoritesAPI = "/favorites"
async def getUserFavorites(userName):
    projects = await getAllResults(scratchAPI + userInfoAPI + str(userName) + userProjectsAPI)
    async with trio.open_nursery() as nursery:
        for project in projects:
            async def getUserNames():
                project["author"]["username"] = await getProjectUserName(project["id"])
            nursery.start_soon(getUserNames)
    return projects

userFollowersAPI = "/followers"
async def getUserFollowers(userName):
    return await getAllResults(scratchAPI + userInfoAPI + str(userName) + userFollowersAPI)

userFollowingAPI = "/following"
async def getUserFollowing(userName):
    return await getAllResults(scratchAPI + userInfoAPI + str(userName) + userFollowingAPI)

userProjectsAPI = "/projects"
async def getUserProjects(userName):
    projects = await getAllResults(scratchAPI + userInfoAPI + str(userName) + userProjectsAPI)
    for project in projects:
        project["author"]["username"] = userName
    return projects

userStudiosAPI = "/studios/curate"
async def getUserStudios(userName):
    return await getAllResults(scratchAPI + userInfoAPI + str(userName) + userStudiosAPI)


projectRemixesAPI = "/remixes"
async def getProjectRemixes(projectID):
    return await getAllResults(scratchAPI + projectInfoAPI + str(projectID) + projectRemixesAPI)

userProjectStudiosAPI = "/studios"
async def getUserProjectStudiosAPI(userName, projectID):
    return await getAllResults(scratchAPI + userInfoAPI + str(userName) + projectInfoAPI + str(projectID) + userProjectStudiosAPI)

userProjectCommentsAPI = "/comments"
async def getUserProjectComments(userName, projectID):
    return await getCommentsWithReplies(scratchAPI + userInfoAPI + str(userName) + projectInfoAPI + str(projectID) + userProjectCommentsAPI)


studioInfoAPI = "/studios/"
async def getStudioInfo(studioID):
    return await apiRequest(scratchAPI + studioInfoAPI + str(studioID))

studioProjectsAPI = "/projects"
async def getStudioProjects(studioID):
    return await getAllResults(scratchAPI + studioInfoAPI + str(studioID) + studioProjectsAPI)

studioManagersAPI = "/managers"
async def getStudioManagers(studioID):
    return await getAllResults(scratchAPI + studioInfoAPI + str(studioID) + studioManagersAPI)

studioCuratorsAPI = "/curators"
async def getStudioCurators(studioID):
    return await getAllResults(scratchAPI + studioInfoAPI + str(studioID) + studioCuratorsAPI)

studioCommentsAPI = "/comments"
async def getStudioComments(studioID):
    return await getCommentsWithReplies(scratchAPI + studioInfoAPI + str(studioID) + studioCommentsAPI)


In [6]:
outputPath = "./"

def getUserDataBasic(userName):
    print(outputPath)
    user = {}
    user["userInfo"] = getUserInfo(userName)
    user["userProjects"] = getUserProjects(userName)
    user["userFavorites"] = getUserFavorites(userName)
    user["userFollowers"] = getUserFollowers(userName)
    user["userFollowing"] = getUserFollowing(userName)
    user["userStudios"] = getUserStudios(userName)
    return user

def getProjectDataBasic(userName, projectID):
    print(outputPath)
    project = {}
    project["projectInfo"] = getProjectInfo(projectID)
    project["projectRemixes"] = getProjectRemixes(projectID)
    project["projectStudios"] = getUserProjectStudiosAPI(userName, projectID)
    project["projectComments"] = getUserProjectComments(userName, projectID)
    return project

def getStudioDataBasic(studioID):
    print(outputPath)
    studio = {}
    studio["studioInfo"] = getStudioInfo(studioID)
    studio["studioProjects"] = getStudioProjects(studioID)
    studio["studioManagers"] = getStudioManagers(studioID)
    studio["studioCurators"] = getStudioCurators(studioID)
    studio["studioComments"] = getStudioComments(studioID)
    return studio

def getStudioDataShallow(studioID):
    studio = getStudioDataBasic(studioID)
    global outputPath
    outputPathBackup = outputPath
    outputPath += get_valid_filename(studio["studioInfo"]["title"]) + "/"
    print(outputPath)
    fileName = studio["studioInfo"]["title"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    for studioProject in studio["studioProjects"]:
        outputPathSubBackup = outputPath
        outputPath += "projects/"
        studioProject["projectData"] = getProjectDataBasic(studioProject["username"], studioProject["id"])
        outputPath = outputPathSubBackup
    for studioManager in studio["studioManagers"]:
        outputPathSubBackup = outputPath
        outputPath += "managers/"
        studioManager["userData"] = getUserDataBasic(studioManager["id"])
        outputPath = outputPathSubBackup
    for studioCurator in studio["studioCurators"]:
        outputPathSubBackup = outputPath
        outputPath += "studios/"
        studioCurator["userData"] = getUserDataBasic(studioCurator["id"])
        outputPath = outputPathSubBackup
    outputFileName = dumpJSON(fileName, studio, outputPath)
    outputPath = outputPathBackup
    return outputFileName

def getProjectDataShallow(userName, projectID):
    project = getProjectDataBasic(userName, projectID)
    global outputPath
    outputPathBackup = outputPath
    outputPath += get_valid_filename(project["projectInfo"]["title"]) + "/"
    print(outputPath)
    fileName = project["projectInfo"]["title"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    for projectComment in project["projectComments"]:
        outputPathSubBackup = outputPath
        outputPath += "commentUsers/"
        projectComment["userData"] = getUserDataBasic(projectComment["author"]["username"])
        outputPath = outputPathSubBackup
    for projectRemix in project["projectRemixes"]:
        outputPathSubBackup = outputPath
        outputPath += "remixes/"
        projectRemix["projectData"] = getProjectDataBasic(projectRemix["author"]["username"], projectRemix["id"])
        outputPath = outputPathSubBackup
    for projectStudio in project["projectStudios"]:
        outputPathSubBackup = outputPath
        outputPath += "studios/"
        projectStudio["studioData"] = getStudioDataBasic(projectStudio["id"])
        outputPath = outputPathSubBackup
    outputFileName = dumpJSON(fileName, project, outputPath)
    outputPath = outputPathBackup
    return outputFileName

def getUserDataShallow(userName):
    user = getUserDataBasic(userName)
    global outputPath
    outputPathBackup = outputPath
    outputPath += get_valid_filename(userName) + "/"
    print(outputPath)
    fileName = user["userInfo"]["username"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    for userProject in user["userProjects"]:
        outputPathSubBackup = outputPath
        outputPath += "projects/"
        userProject["projectData"] = getProjectDataBasic(userProject["author"]["username"], userProject["id"])
        outputPath = outputPathSubBackup
    for userFavorite in user["userFavorites"]:
        outputPathSubBackup = outputPath
        outputPath += "favorites/"
        userFavorite["projectData"] = getProjectDataBasic(userFavorite["author"]["username"], userFavorite["id"])
        outputPath = outputPathSubBackup
    for userFollower in user["userFollowers"]:
        outputPathSubBackup = outputPath
        outputPath += "followers/"
        userFollower["userData"] = getUserDataBasic(userFollower["id"])
        outputPath = outputPathSubBackup
    for userFollow in user["userFollowing"]:
        outputPathSubBackup = outputPath
        outputPath += "following/"
        userFollow["userData"] = getUserDataBasic(userFollow["id"])
        outputPath = outputPathSubBackup
    for userStudio in user["userStudios"]:
        outputPathSubBackup = outputPath
        outputPath += "studios/"
        userStudio["studioData"] = getStudioDataBasic(userStudio["id"])
        outputPath = outputPathSubBackup
    outputFileName = dumpJSON(fileName, user, outputPath)
    outputPath = outputPathBackup
    return outputFileName

def getStudioDataDeep(studioID):
    studio = getStudioDataBasic(studioID)
    global outputPath
    outputPathBackup = outputPath
    outputPath += get_valid_filename(studio["studioInfo"]["title"]) + "/"
    print(outputPath)
    fileName = studio["studioInfo"]["title"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    for studioProject in studio["studioProjects"]:
        outputPathSubBackup = outputPath
        outputPath += "projects/"
        studioProject["projectData"] = getProjectDataShallow(studioProject["username"], studioProject["id"])
        outputPath = outputPathSubBackup
    for studioManager in studio["studioManagers"]:
        outputPathSubBackup = outputPath
        outputPath += "managers/"
        studioManager["userData"] = getUserDataShallow(studioManager["id"])
        outputPath = outputPathSubBackup
    for studioCurator in studio["studioCurators"]:
        outputPathSubBackup = outputPath
        outputPath += "curators/"
        studioCurator["userData"] = getUserDataShallow(studioCurator["id"])
        outputPath = outputPathSubBackup
    outputFileName = dumpJSON(fileName, studio, outputPath)
    outputPath = outputPathBackup
    return outputFileName

def getProjectDataDeep(userName, projectID):
    project = getProjectDataBasic(userName, projectID)
    global outputPath
    outputPathBackup = outputPath
    outputPath += get_valid_filename(project["projectInfo"]["title"]) + "/"
    print(outputPath)
    fileName = project["projectInfo"]["title"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    for projectComment in project["projectComments"]:
        outputPathSubBackup = outputPath
        outputPath += "commentUsers/"
        projectComment["userData"] = getUserDataShallow(projectComment["author"]["username"])
        outputPath = outputPathSubBackup
    for projectRemix in project["projectRemixes"]:
        outputPathSubBackup = outputPath
        outputPath += "remixes/"
        projectRemix["projectData"] = getProjectDataShallow(projectRemix["author"]["username"], projectRemix["id"])
        outputPath = outputPathSubBackup
    for projectStudio in project["projectStudios"]:
        outputPathSubBackup = outputPath
        outputPath += "studios/"
        projectStudio["studioData"] = getStudioDataShallow(projectStudio["id"])
        outputPath = outputPathSubBackup
    outputFileName = dumpJSON(fileName, project, outputPath)
    outputPath = outputPathBackup
    return outputFileName

def getUserDataDeep(userName):
    user = getUserDataBasic(userName)
    global outputPath
    outputPathBackup = outputPath
    outputPath += get_valid_filename(userName) + "/"
    print(outputPath)
    fileName = user["userInfo"]["username"]
    jsonCheck = checkForJSON(fileName, outputPath)
    if(jsonCheck != {}):
        return jsonCheck
    for userProject in user["userProjects"]:
        outputPathSubBackup = outputPath
        outputPath += "projects/"
        userProject["projectData"] = getProjectDataDeep(userProject["author"]["username"], userProject["id"])
        outputPath = outputPathSubBackup
    for userFavorite in user["userFavorites"]:
        outputPathSubBackup = outputPath
        outputPath += "favorites/"
        userFavorite["projectData"] = getProjectDataDeep(userFavorite["author"]["username"], userFavorite["id"])
        outputPath = outputPathSubBackup
    for userFollower in user["userFollowers"]:
        outputPathSubBackup = outputPath
        outputPath += "followers/"
        userFollower["userData"] = getUserDataShallow(userFollower["id"])
        outputPath = outputPathSubBackup
    for userFollow in user["userFollowing"]:
        outputPathSubBackup = outputPath
        outputPath += "following/"
        userFollow["userData"] = getUserDataShallow(userFollow["id"])
        outputPath = outputPathSubBackup
    for userStudio in user["userStudios"]:
        outputPathSubBackup = outputPath
        outputPath += "studios/"
        userStudio["studioData"] = getStudioDataDeep(userStudio["id"])
        outputPath = outputPathSubBackup
    outputFileName = dumpJSON(fileName, user, outputPath)
    outputPath = outputPathBackup
    return outputFileName


In [7]:
fespadeaUserData = getUserDataDeep("fespadea")
fespadeaUserData


./


KeyboardInterrupt: 

In [None]:
tailsdroid819UserData = getUserDataDeep("tailsdroid819")
tailsdroid819UserData


In [None]:
fespadea_testerUserData = getUserDataDeep("fespadea_testerUserData")
fespadea_testerUserData
