In [19]:
import requests
from requests.adapters import HTTPAdapter, Retry
import time
import json
import re
import random
import os
import pickle
import trio
import subprocess
import shutil
from enum import Enum
from functools import partial
# import win32file
# win32file._setmaxstdio(8192)

PREVIOUS_REQUESTS_BACKUP_PATH = "./previousRequests/"

requestSession = requests.Session()
retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[ 500, 502, 503, 504 ])
requestSession.mount("", HTTPAdapter(max_retries=retries))


def get_valid_filename(name):
    s = str(name).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w.]", "", s)
    if s in {"", ".", ".."}:
        return "ErrorFileName" + str(random.randrange(100000))
    return s


def get_valid_path(path):
    s = str(path).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w./]", "", s)
    if s in {"", ".", ".."}:
        return "ErrorFileName" + str(random.randrange(100000))
    if s[-1] == ".":
        s += "_"
    return s


async def dumpAPIRequest(fileName, data, path=PREVIOUS_REQUESTS_BACKUP_PATH):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".pkl"
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "wb") as outputFile:
        pickle.dump(data, outputFile)
    return fullFileName


async def loadAPIRequest(fileName, path=PREVIOUS_REQUESTS_BACKUP_PATH):
    fullFileName = path + get_valid_filename(fileName) + ".pkl"
    if not os.path.exists(fullFileName):
        return {}
    with open(fullFileName, "rb") as inputFile:
        data = pickle.load(inputFile)
    return data


async def checkForAPIRequest(url, path=PREVIOUS_REQUESTS_BACKUP_PATH):
    return os.path.exists(
        path + get_valid_filename(url) + ".pkl"
    )


async def dumpJSON(fileName, data, path="./"):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".json"
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "w") as outputFile:
        json.dump(data, outputFile, indent=6)
    return fullFileName
    


async def loadJSON(fullFileName):
    if not os.path.exists(fullFileName):
        return {}
    with open(fullFileName, "r") as inputFile:
        data = json.load(inputFile)
    return data


async def checkForJSON(fileName, path):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + ".json"
    try:
        return loadJSON(fullFileName) if os.path.exists(fullFileName) else {}, fullFileName
    except Exception as e:
        print(fullFileName)
        raise e
    # return fullFileName if os.path.exists(fullFileName) else {}


async def dumpGeneric(fileName, list, type="txt", path="./"):
    fullFileName = get_valid_path(path) + get_valid_filename(fileName) + "." + type
    os.makedirs(os.path.dirname(fullFileName), exist_ok=True)
    with open(fullFileName, "w") as outputFile:
        for item in list:
            outputFile.write(str(item) + "\n")
    return fullFileName


async def nurseryReturn(nursery, assignee, location, func, *args, **kwargs):
    async def getReturn():
        assignee[location] = await func(*args, **kwargs)

    nursery.start_soon(getReturn)

In [20]:
SCRATCH_API = "https://api.scratch.mit.edu"
# WAIT_TIME = 0.1
# lastRequestTime = 0
markedRequests = set({})
markedImages = set({})
FAILED_IMAGE = b'FAILED_IMAGE'
AUTH_ADDITION = "?x-token="
progressChecker = 0
lastCheckerTime = time.time()



async def apiRequest(url):
    if checkForAPIRequest(url):
        return await loadAPIRequest(url)
    
    if url in markedRequests:
        while url in markedRequests:
            await trio.sleep(10)
        if checkForAPIRequest(url):
            return await loadAPIRequest(url)
    else:
        markedRequests.add(url)
    
    # global lastRequestTime
    # sleepTime = max(lastRequestTime + WAIT_TIME - time.time(), 0)
    # await trio.sleep(sleepTime)
    # lastRequestTime = time.time()

    # response = requestSession.get(url)
    notDone = True
    while notDone:
        try:
            response = await requestSession.get(url)
            if not response.ok and response.status_code != 404:
                raise Exception
        except Exception:
            print("-----------------------------------ConnectionError with url: " + url)
            await trio.sleep(10)
        else:
            notDone = False
    data = response.json()# if response.ok else {}

    await dumpAPIRequest(url, data)
    markedRequests.remove(url)

    global progressChecker
    progressChecker += 1
    if progressChecker % 100 == 0:
        currentTime = time.time()
        global lastCheckerTime
        print(str(progressChecker) + " requests in " + str(currentTime - lastCheckerTime) + " seconds since last check.")
        lastCheckerTime = currentTime

    return data

async def imageGet(url, path="./", backupPath = PREVIOUS_REQUESTS_BACKUP_PATH):
    imageName = re.search("[^/]*.png", url).group()
    imageBackupPath = backupPath + imageName

    if url in markedImages:
        while url in markedImages:
            await trio.sleep(10)
    elif not os.path.exists(imageBackupPath):
        markedImages.add(url)
        
        notDone = True
        async def requestImage():
            return requestSession.get(url, stream=True)
        while notDone:
            try:
                imageRequest = await requestImage()
                if not imageRequest.ok and imageRequest.status_code != 668 and imageRequest.status_code != 500:
                    raise Exception
            except Exception as e:
                print("-----------------------------------ConnectionError with url: " + url)
                await trio.sleep(10)
            else:
                notDone = False

        os.makedirs(backupPath, exist_ok=True)
        with open(imageBackupPath, 'wb') as imageFile:
            if imageRequest.ok or imageRequest.status_code == 668:
                imageRequest.raw.decode_content = True
                shutil.copyfileobj(imageRequest.raw, imageFile)
                markedImages.remove(url)
            else:
                imageFile.write(FAILED_IMAGE)
                markedImages.remove(url)
                print("Image not found: " + url)
                return
        
    with open(imageBackupPath, 'rb') as imageFile:
        if imageFile.read() == FAILED_IMAGE:
            return
    imagePath = path + imageName
    os.makedirs(path, exist_ok=True)
    shutil.copy(imageBackupPath, imagePath)


async def getAllResults(url):
    limit = 40
    url = url + "?limit=" + str(limit) + "&offset="
    offset = 0
    singleList = await apiRequest(url + str(offset))
    all = singleList
    while len(singleList) >= limit:
        offset += limit
        singleList = await apiRequest(url + str(offset))
        if len(singleList) > 0:
            all += singleList
    return all


async def getAllResultsDateBased(url):
    limit = 40
    url = url + "?limit=" + str(limit)
    singleList = await apiRequest(url)
    all = singleList
    while len(singleList) >= limit:
        dateLimit = all[-1].datetime_created
        singleList = await apiRequest(url + "?dateLimit=" + str(dateLimit))
        if len(singleList) > 0:
            overlapIndex = singleList.index(all[-1]) + 1
            if overlapIndex < len(singleList):
                truncatedList = singleList[(singleList.index(all[-1]) + 1) :]
                if len(truncatedList) > 0:
                    all += truncatedList
    return all


async def getCommentsWithReplies(url):
    comments = await getAllResults(url)
    async with trio.open_nursery() as nursery:
        for comment in comments:
            if "reply_count" in comment.keys() and comment["reply_count"] > 0:
                # async def getReplies():
                #     comment["replies"] = await getAllResults(url + "/" + str(comment["id"]) + "/replies")
                # nursery.start_soon(getReplies)
                nurseryReturn(
                    nursery,
                    comment,
                    "replies",
                    getAllResults,
                    url + "/" + str(comment["id"]) + "/replies",
                )

            else:
                comment["replies"] = {}
                comment["reply_count"] = 0
    return comments

In [21]:
# project calls

projectsInfoAPIAddition = "/projects/"
projectInfoAPI = SCRATCH_API + projectsInfoAPIAddition

async def getProjectInfo(projectID):
    return await apiRequest(projectInfoAPI + str(projectID))

projectRemixesAPIAddition = "/remixes"
async def getProjectRemixes(projectID):
    return await getAllResults(projectInfoAPI + str(projectID) + projectRemixesAPIAddition)


# bonus helper function
userIDs = {}
async def getProjectUserName(projectID, userID=None):
    if userID is None or userID not in userIDs.keys():
        projectInfo = await getProjectInfo(projectID)
        if("author" in projectInfo):
            userIDs[userID] = projectInfo["author"]["username"]
            return userIDs[userID]
        else:
            return ""
    else:
        return userIDs[userID]


In [22]:
# studio calls

studioInfoAPIAddition = "/studios/"
studioInfoAPI = SCRATCH_API + studioInfoAPIAddition
async def getStudioInfo(studioID):
    return await apiRequest(studioInfoAPI + str(studioID))

studioActivityAPIAddition = "/activity"
async def getStudioActivity(studioID):
    return await getAllResults(studioInfoAPI + str(studioID) + studioActivityAPIAddition)

studioCommentsAPIAddition = "/comments"
async def getStudioComments(studioID):
    return await getCommentsWithReplies(studioInfoAPI + str(studioID) + studioCommentsAPIAddition)

studioCuratorsAPIAddition = "/curators"
async def getStudioCurators(studioID):
    return await getAllResults(studioInfoAPI + str(studioID) + studioCuratorsAPIAddition)

studioManagersAPIAddition = "/managers"
async def getStudioManagers(studioID):
    return await getAllResults(studioInfoAPI + str(studioID) + studioManagersAPIAddition)

studioProjectsAPIAddition = "/projects"
async def getStudioProjects(studioID):
    studioProjects = await getAllResults(studioInfoAPI + str(studioID) + studioProjectsAPIAddition)
    # async with trio.open_nursery() as nursery:
    #     for i in range(len(studioProjects)):
    #         nurseryReturn(nursery, studioProjects, i, getProjectInfo, studioProjects[i]["id"])
    return studioProjects

studioUserRoleAPIAddition = "/users/"
async def getStudioUserRole(studioID, userName, authToken):
    return await getAllResults(studioInfoAPI + str(studioID) + studioUserRoleAPIAddition + userName + AUTH_ADDITION + authToken)

In [23]:
# user calls

userInfoAPIAddition = "/users/"
userInfoAPI = SCRATCH_API + userInfoAPIAddition
async def getUserInfo(userName):
    return await apiRequest(userInfoAPI + str(userName))

userFavoritesAPIAddition = "/favorites"
async def getUserFavorites(userName):
    projects = await getAllResults(userInfoAPI + str(userName) + userFavoritesAPIAddition)
    async with trio.open_nursery() as nursery:
        for project in projects:
            nurseryReturn(nursery, project["author"], "username", getProjectUserName, project["id"], project["author"]["id"])
    return projects

userFollowersAPIAddition = "/followers"
async def getUserFollowers(userName):
    return await getAllResults(userInfoAPI + str(userName) + userFollowersAPIAddition)

userFollowingAPIAddition = "/following"
async def getUserFollowing(userName):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingAPIAddition)

userFollowingStudiosAPIAddition = userFollowingAPIAddition + "/studios/projects"
async def getUserFollowingStudios(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingStudiosAPIAddition + AUTH_ADDITION + authToken)

userFollowingUsersActivityAPIAddition = userFollowingAPIAddition + "/users/activity"
async def getUserFollowingUsersActivity(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingUsersActivityAPIAddition + AUTH_ADDITION + authToken)

userFollowingUsersLovesAPIAddition = userFollowingAPIAddition + "/users/loves"
async def getUserFollowingUsersLoves(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingUsersLovesAPIAddition + AUTH_ADDITION + authToken)

userFollowingUsersProjectsAPIAddition = userFollowingAPIAddition + "/users/projects"
async def getUserFollowingUsersProjects(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userFollowingUsersProjectsAPIAddition + AUTH_ADDITION + authToken)

userInvitesAPIAddition = "/invites"
async def getUserInvites(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userInvitesAPIAddition + AUTH_ADDITION + authToken)

userMessagesAPIAddition = "/messages"
async def getUserMessages(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userMessagesAPIAddition + AUTH_ADDITION + authToken)

userAlertsAPIAddition = "/messages/admin"
async def getUserAlerts(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userAlertsAPIAddition + AUTH_ADDITION + authToken)

userUnreadMessagesCountAPIAddition = "/messages/count"
async def getUserUnreadMessagesCount(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userUnreadMessagesCountAPIAddition + AUTH_ADDITION + authToken)

userProjectsAPIAddition = "/projects"
async def getUserProjects(userName):
    projects = await getAllResults(userInfoAPI + str(userName) + userProjectsAPIAddition)
    for project in projects:
        project["author"]["username"] = userName
    return projects

userRecentlyViewedAPIAddition = "/projects/recentlyviewed"
async def getUserRecentlyViewed(userName, authToken):
    return await getAllResults(userInfoAPI + str(userName) + userRecentlyViewedAPIAddition + AUTH_ADDITION + authToken)

userProjectCommentsAPIAddition = "/comments"
userProjectsAPIAdditionMiddle = "/projects/"
async def getUserProjectComments(userName, projectID):
    return await getCommentsWithReplies(userInfoAPI + str(userName) + userProjectsAPIAdditionMiddle + str(projectID) + userProjectCommentsAPIAddition)

userProjectStudiosAPIAddition = "/studios"
async def getUserProjectStudiosAPI(userName, projectID):
    return await getAllResults(userInfoAPI + str(userName) + userProjectsAPIAdditionMiddle + str(projectID) + userProjectStudiosAPIAddition)

userUnsharedProjectAPIAddition = "/visibility"
async def getUserUnsharedProject(userName, projectID):
    return await getCommentsWithReplies(userInfoAPI + str(userName) + userProjectsAPIAdditionMiddle + str(projectID) + userUnsharedProjectAPIAddition)

userStudiosAPIAddition = "/studios/curate"
async def getUserStudios(userName):
    return await getAllResults(userInfoAPI + str(userName) + userStudiosAPIAddition)

In [24]:
projectsToDownload = {}
urlsToDownload = set({})

scratchBaseURL = "https://scratch.mit.edu/"
userURL = scratchBaseURL + "users/"
projectURL = scratchBaseURL + "projects/"
studioURL = scratchBaseURL + "studios/"

levelCount = {}

class LevelCountTypes(Enum):
    DONE = "Done"
    TOTAL = "Total"


def handleLevelCount(level, countType, outputPath=""):
    if level not in levelCount.keys():
        levelCount[level] = {}
        for levelCountType in LevelCountTypes:
            levelCount[level][levelCountType] = 0
    levelCount[level][countType] += 1
    print(
        ("\t" * level)
        + str(levelCount[level][LevelCountTypes.DONE])
        + " / "
        + str(levelCount[level][LevelCountTypes.TOTAL])
        + "\t"
        + outputPath
    )


async def getData(
    dataID,
    baseURL,
    baseInfoFunction,
    dataName="data",
    level=0,
    outputPath="./",
    authUserName=None,
    authToken=None,
    backupDataInfo=None,
    folderNameLocation=None,
    imageURLLocations=[],
    dataEntries=[],
    subDataEntries=[],
    forceCompleteJSON=False,
    markProjectForDownload=False,
):
    handleLevelCount(level, LevelCountTypes.TOTAL)
    urlsToDownload.add(baseURL + str(dataID))

    if(markProjectForDownload):
        if dataID not in projectsToDownload.keys():
            projectsToDownload[dataID] = []
        projectsToDownload[dataID].append(outputPath)

    data = {}
    data["level"] = level
    dataInfoString = dataName + "Info"
    data[dataInfoString] = await baseInfoFunction(dataID)
    dataCollectionFailed = "id" not in data[dataInfoString]
    useBackup = dataCollectionFailed and backupDataInfo is not None
    badData = dataCollectionFailed and backupDataInfo is None
    if useBackup:
        data[dataInfoString] = backupDataInfo

    fileID = (
        dataID
        if folderNameLocation is None
        else data[dataInfoString][folderNameLocation]
    )
    fileName = get_valid_filename(fileID)
    outputPath += get_valid_path(fileID) + "/"
    print(outputPath)

    async with trio.open_nursery() as nursery:
        for imageURLLocation in imageURLLocations:
            imageURL = data[dataInfoString]
            for imageURLSubLocation in imageURLLocation:
                imageURL = imageURL[imageURLSubLocation]
            if type(imageURL) == str:
                nursery.start_soon(imageGet, imageURL, outputPath)
            elif type(imageURL) == dict:
                for subImageURL in imageURL.values():
                    nursery.start_soon(imageGet, subImageURL, outputPath)
            else:
                print("This is not an image URL: " + imageURL)

    jsonCheck, jsonFileName = await checkForJSON(fileName, outputPath)
    if (
        "level" in jsonCheck
        and jsonCheck["level"] >= level
        and (
            badData
            or (
                dataInfoString in jsonCheck
                and jsonCheck[dataInfoString] is data[dataInfoString]
            )
        )
    ):
        handleLevelCount(level, LevelCountTypes.DONE, outputPath)
        return jsonCheck if forceCompleteJSON else jsonFileName

    # check if call was successful
    if dataCollectionFailed:
        if useBackup:
            dumpJSON(fileName, data, outputPath)
        else:
            data = data[dataInfoString]
        handleLevelCount(level, LevelCountTypes.DONE, outputPath)
        return data

    async with trio.open_nursery() as nursery:
        for dataEntry in dataEntries:
            dataLocation = data
            if "locations" in dataEntry:
                for location in dataEntry["locations"]:
                    if location not in dataLocation:
                        dataLocation[location] = {}
                    dataLocation = dataLocation[location]
            nurseryReturn(
                nursery,
                dataLocation,
                dataEntry["subLocation"],
                dataEntry["function"],
                *dataEntry["arguements"],
            )

    if level > 0:
        newLevel = level - 1

        totalTasks = 0
        for subDataEntry in subDataEntries:
            dataLocation = data
            if "locations" in subDataEntry:
                for location in subDataEntry["locations"]:
                    if location not in dataLocation:
                        dataLocation[location] = {}
                    dataLocation = dataLocation[location]
            totalTasks += len(dataLocation)
        print(
            ("\t" * level)
            + fileID
            + " Total Tasks: "
            + str(totalTasks)
            + "----------------------------------------------"
        )

        async with trio.open_nursery() as nursery:
            for subDataEntry in subDataEntries:
                dataLocation = data
                if "locations" in subDataEntry:
                    for location in subDataEntry["locations"]:
                        if location not in dataLocation:
                            dataLocation[location] = {}
                        dataLocation = dataLocation[location]
                for dataPoint in dataLocation:
                    arguments = []
                    if "argumentLocations" in subDataEntry:
                        for argumentLocation in subDataEntry["argumentLocations"]:
                            arguments.append(dataPoint)
                            newIndex = len(arguments) - 1
                            for location in argumentLocation:
                                arguments[newIndex] = arguments[newIndex][location]
                    nurseryReturn(
                        nursery,
                        dataPoint,
                        subDataEntry["subLocation"],
                        subDataEntry["function"],
                        *arguments,
                        authUserName=authUserName,
                        authToken=authToken,
                        level=newLevel,
                        outputPath=outputPath + subDataEntry["subFolder"] + "/",
                        backupDataInfo=dataPoint,
                    )

    outputFileName = dumpJSON(fileName, data, outputPath)
    handleLevelCount(level, LevelCountTypes.DONE, outputPath)
    if forceCompleteJSON:
        return data
    else:
        return outputFileName

In [25]:
async def getProjectData(
    userName, projectID, authUserName=None, authToken=None, **kwargs
):
    dataEntries = []
    dataEntries.append(
        {
            "subLocation": "projectRemixes",
            "function": getProjectRemixes,
            "arguements": [projectID],
        }
    )
    dataEntries.append(
        {
            "subLocation": "projectStudios",
            "function": getUserProjectStudiosAPI,
            "arguements": [userName, projectID],
        }
    )
    dataEntries.append(
        {
            "subLocation": "projectComments",
            "function": getUserProjectComments,
            "arguements": [userName, projectID],
        }
    )

    subDataEntries = []
    subDataEntries.append(
        {
            "locations": ["projectRemixes"],
            "subLocation": "projectData",
            "function": getProjectData,
            "argumentLocations": [["author", "username"], ["id"]],
            "subFolder": "remixes",
        }
    )
    subDataEntries.append(
        {
            "locations": ["projectStudios"],
            "subLocation": "studioData",
            "function": getStudioData,
            "argumentLocations": [["id"]],
            "subFolder": "studios",
        }
    )
    subDataEntries.append(
        {
            "locations": ["projectComments"],
            "subLocation": "userData",
            "function": getUserData,
            "argumentLocations": [["author", "username"]],
            "subFolder": "commentUsers",
        }
    )

    return await getData(
        dataID=projectID,
        baseURL=projectURL,
        baseInfoFunction=getProjectInfo,
        dataName="project",
        authUserName=authUserName,
        authToken=authToken,
        folderNameLocation="title",
        imageURLLocations=[["image"], ["images"]],
        dataEntries=dataEntries,
        subDataEntries=subDataEntries,
        markProjectForDownload=True,
        **kwargs
    )

async def getStudioData(
    studioID, authUserName=None, authToken=None, **kwargs
):
    dataEntries = []
    dataEntries.append(
        {
            "subLocation": "studioActivity",
            "function": getStudioActivity,
            "arguements": [studioID],
        }
    )
    dataEntries.append(
        {
            "subLocation": "studioComments",
            "function": getStudioComments,
            "arguements": [studioID],
        }
    )
    dataEntries.append(
        {
            "subLocation": "studioCurators",
            "function": getStudioCurators,
            "arguements": [studioID],
        }
    )
    dataEntries.append(
        {
            "subLocation": "studioManagers",
            "function": getStudioManagers,
            "arguements": [studioID],
        }
    )
    dataEntries.append(
        {
            "subLocation": "studioProjects",
            "function": getStudioProjects,
            "arguements": [studioID],
        }
    )
    if authUserName is not None and authToken is not None:
        dataEntries.append(
            {
                "subLocation": "studio" + authUserName + "Role",
                "function": getStudioUserRole,
                "arguements": [studioID, authUserName, authToken],
            }
        )

    subDataEntries = []
    subDataEntries.append(
        {
            "locations": ["studioCurators"],
            "subLocation": "userData",
            "function": getUserData,
            "argumentLocations": [["username"]],
            "subFolder": "curators",
        }
    )
    subDataEntries.append(
        {
            "locations": ["studioManagers"],
            "subLocation": "userData",
            "function": getUserData,
            "argumentLocations": [["username"]],
            "subFolder": "managers",
        }
    )
    subDataEntries.append(
        {
            "locations": ["studioProjects"],
            "subLocation": "projectData",
            "function": getProjectData,
            "argumentLocations": [["username"], ["id"]],
            "subFolder": "projects",
        }
    )

    return await getData(
        dataID=studioID,
        baseURL=studioURL,
        baseInfoFunction=getStudioInfo,
        dataName="studio",
        authUserName=authUserName,
        authToken=authToken,
        folderNameLocation="title",
        imageURLLocations=[["image"]],
        dataEntries=dataEntries,
        subDataEntries=subDataEntries,
        **kwargs
    )

async def getUserData(
    userName, authUserName=None, authToken=None, **kwargs
):
    dataEntries = []
    dataEntries.append(
        {
            "subLocation": "userFavorites",
            "function": getUserFavorites,
            "arguements": [userName],
        }
    )
    dataEntries.append(
        {
            "subLocation": "userFollowers",
            "function": getUserFollowers,
            "arguements": [userName],
        }
    )
    dataEntries.append(
        {
            "subLocation": "userFollowing",
            "function": getUserFollowing,
            "arguements": [userName],
        }
    )
    dataEntries.append(
        {
            "subLocation": "userProjects",
            "function": getUserProjects,
            "arguements": [userName],
        }
    )
    dataEntries.append(
        {
            "subLocation": "userStudios",
            "function": getUserStudios,
            "arguements": [userName],
        }
    )
    if authUserName is userName and authToken is not None:
        dataEntries.append(
            {
                "locations": "userCurrentFrontPage",
                "subLocation": "Projects in Studios I'm Following",
                "function": getUserFollowingStudios,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userCurrentFrontPage",
                "subLocation": "What's Happening?",
                "function": getUserFollowingUsersActivity,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userCurrentFrontPage",
                "subLocation": "Projects Loved by Scratchers I'm Following",
                "function": getUserFollowingUsersLoves,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userCurrentFrontPage",
                "subLocation": "Projects by Scratchers I'm Following",
                "function": getUserFollowingUsersProjects,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userNotifications",
                "subLocation": "invites",
                "function": getUserInvites,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userNotifications",
                "subLocation": "messages",
                "function": getUserMessages,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userNotifications",
                "subLocation": "alerts",
                "function": getUserAlerts,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "locations": "userNotifications",
                "subLocation": "unreadMessagesCount",
                "function": getUserUnreadMessagesCount,
                "arguements": [authUserName, authToken],
            }
        )
        dataEntries.append(
            {
                "subLocation": "userRecentlyViewed",
                "function": getUserRecentlyViewed,
                "arguements": [authUserName, authToken],
            }
        )

    subDataEntries = []
    subDataEntries.append(
        {
            "locations": ["userFavorites"],
            "subLocation": "projectData",
            "function": getProjectData,
            "argumentLocations": [["author", "username"], ["id"]],
            "subFolder": "favorites",
        }
    )
    subDataEntries.append(
        {
            "locations": ["userFollowers"],
            "subLocation": "userData",
            "function": getUserData,
            "argumentLocations": [["username"]],
            "subFolder": "followers",
        }
    )
    subDataEntries.append(
        {
            "locations": ["userFollowing"],
            "subLocation": "userData",
            "function": getUserData,
            "argumentLocations": [["username"]],
            "subFolder": "following",
        }
    )
    subDataEntries.append(
        {
            "locations": ["userProjects"],
            "subLocation": "projectData",
            "function": getProjectData,
            "argumentLocations": [["author", "username"], ["id"]],
            "subFolder": "projects",
        }
    )
    subDataEntries.append(
        {
            "locations": ["userStudios"],
            "subLocation": "studioData",
            "function": getStudioData,
            "argumentLocations": [["id"]],
            "subFolder": "studios",
        }
    )

    return await getData(
        dataID=userName,
        baseURL=userURL,
        baseInfoFunction=getUserInfo,
        dataName="user",
        authUserName=authUserName,
        authToken=authToken,
        imageURLLocations=[["profile", "images"]],
        dataEntries=dataEntries,
        subDataEntries=subDataEntries,
        **kwargs
    )

In [28]:
# fileNames = os.listdir(PREVIOUS_REQUESTS_BACKUP_PATH)
# fileCount = 0
# fileTotal = len(fileNames)


# for fileName in fileNames:
#     if re.search(".pkl", fileName):
#         fileDirectory = os.path.join(PREVIOUS_REQUESTS_BACKUP_PATH, fileName)
#         if os.path.getsize(fileDirectory) == 5:
#             os.remove(fileDirectory)
#     if re.search(".png", fileName):
#         fileDirectory = os.path.join(PREVIOUS_REQUESTS_BACKUP_PATH, fileName)
#         if os.path.getsize(fileDirectory) == 12:
#             os.remove(fileDirectory)
#     fileCount += 1
#     if fileCount % 1000 == 0:
#         print(f"{fileCount} / {fileTotal}")

In [27]:
authData = trio.run(loadJSON("scratchAuthenticationToken.json"))
if(authData["x-token"] is None):
    trio.run(partial(getUserData, authData["username"], level=authData["level"], outputPath="./"))
else:
    trio.run(partial(getUserData, authData["username"], authData["username"], authData["x-token"], level=authData["level"], outputPath="./"))

TypeError: Trio was expecting an async function, but instead it got a coroutine object <coroutine object loadJSON at 0x0000023084F34040>

Probably you did something like:

  trio.run(loadJSON(...))            # incorrect!
  nursery.start_soon(loadJSON(...))  # incorrect!

Instead, you want (notice the parentheses!):

  trio.run(loadJSON, ...)            # correct!
  nursery.start_soon(loadJSON, ...)  # correct!

In [None]:
print(trio.run(dumpJSON, authData["username"] + "_projects", projectsToDownload))
print(trio.run(dumpGeneric, authData["username"] + "_urls", urlsToDownload, type="txt"))

./swifty2_projects.json
./swifty2_urls.txt


In [None]:
p = subprocess.Popen(['node', './downloadProject.js'], stdout=subprocess.PIPE)
for line in iter(p.stdout.readline, b""):
        print(line.decode(), end="")
# for line in iter(p.stderr.readline, b""):
#         print(line.decode(), end="")

metadata [33m0[39m
Done
metadata [33m1[39m
project [33m0[39m
project [33m0.012793133685279854[39m
project [33m0.025586267370559707[39m
project [33m0.03837940105583956[39m
project [33m0.051172534741119415[39m
project [33m0.06396566842639927[39m
project [33m0.07675880211167913[39m
project [33m0.08955193579695897[39m
project [33m0.10234506948223883[39m
project [33m0.11513820316751869[39m
project [33m0.12793133685279853[39m
project [33m0.14072447053807838[39m
project [33m0.15351760422335825[39m
project [33m0.1663107379086381[39m
project [33m0.17910387159391794[39m
project [33m0.1918970052791978[39m
project [33m0.20469013896447766[39m
project [33m0.2174832726497575[39m
project [33m0.23027640633503738[39m
project [33m0.24306954002031722[39m
project [33m0.25586267370559707[39m
project [33m0.2686558073908769[39m
project [33m0.28144894107615676[39m
project [33m0.29424207476143666[39m
project [33m0.3070352084467165[39m
project [33m0.31982834