# Parsing 2 - Working with APIs

We will parse 3d models from https://www.myminifactory.com/

They have API to make apps with their data. 

You can read the docs here https://www.myminifactory.com/api-doc/index.html

## Step 0 - Preparation

In [None]:
import os, shutil # working with filesystem
import requests # get data from internet
from math import ceil # some math
from time import sleep # some time to sleep :)

Set parameters

In [None]:
data_dir = "myminifactory" # directory to store the data
query = "portrait" # search query
per_page = 5 # results per page
page_start = 1 # first page with search results
total_pages = 1 # total amount of pages (will be changed to actual number)
skip_img_download = False # if we already downloaded images we can skip this step
skip_model_download = False # if we don't want to download models we can skip this step
items = [] # models to download

Create directories to store the data if they not exist

In [None]:
if not os.path.exists(data_dir):
    os.makedirs(data_dir)
if not os.path.exists(f"{data_dir}/images"):
    os.makedirs(f"{data_dir}/images")
if not os.path.exists(f"{data_dir}/models"):
    os.makedirs(f"{data_dir}/models")

Create cookies (website wants to be sure that you're human)

In [None]:
request_cookie = requests.post('https://www.myminifactory.com/')

Get total amount of search results and calculate total pages

In [None]:
data_url = f"https://www.myminifactory.com/api/v2/search?q={query}&page=0&per_page=0"
response = requests.get(data_url, cookies=request_cookie.cookies) 
data = response.json()
total_pages = int(data["total_count"]/per_page)+1
print("Total amount of results: ", data["total_count"])
print("Total amount of pages: ", total_pages)

In [None]:
total_pages = 1 # this is just for the demo, we'll download only 5 objects

## Step 1 - Download images of models

Get results page by page and download all images to data directory

In [None]:
if not skip_img_download:
    for page in range(page_start,total_pages+1):
        data_url = f"https://www.myminifactory.com/api/v2/search?q={query}&page={page}&per_page={per_page}"
        response = requests.get(data_url, cookies=request_cookie.cookies)
        data = response.json()
        print(f"Parsing page #{page}")
        for item in data["items"]:
            item_dir = f'{data_dir}/images/{item["id"]}'
            print(f'Downloading {item["id"]}')
            if not os.path.exists(item_dir):
                os.makedirs(item_dir)
            for image in item["images"]:
                img_url = image["original"]["url"]
                img_id = image["id"]
                img_ext = img_url.split(".")[-1]
                response = requests.get(img_url, stream=True)
                with open(f'{item_dir}/{img_id}.{img_ext}', 'wb') as out_file:
                    shutil.copyfileobj(response.raw, out_file)
                del response
                sleep(5) # wait n seconds before download next file
        sleep(10) # wait n seconds before get next page

Now, when we got all the images, we can check if we need this models.

Sometimes search result contains some useless models which we don't want to download.

Just check the image directory and delete unnecessary items

## Step 2 - Download models

Get ids of the models we want to download

In [None]:
if not skip_model_download:
    for dirname in os.listdir(f"{data_dir}/images"):
        if dirname[0] != ".":
            items.append(dirname)
    print(items)

In [None]:
for item in items:
    data_url = f"https://www.myminifactory.com/api/v2/objects/{item}"
    response = requests.get(data_url, cookies=request_cookie.cookies)
    data = response.json()
    if not os.path.exists(f'{data_dir}/models/{data["id"]}'):
        os.makedirs(f'{data_dir}/models/{data["id"]}')
    for file_item in data["files"]["items"]:
        temp_url = file_item["viewer_url"]
        filedata = temp_url.split("/")[-2:]
        fileid = file_item["id"]
        download_url = f"https://cdn.myminifactory.com/assets/object-assets/{filedata[0]}/threedfiles/{filedata[1]}"
        print(f"Downloading {data['id']} to {data_dir}/models/{data['id']}/{filedata[1]}")
        response = requests.get(download_url, stream=True)
        with open(f'{data_dir}/models/{data["id"]}/{filedata[1]}', 'wb') as out_file:
            shutil.copyfileobj(response.raw, out_file)
        del response
        sleep(10) # wait n seconds before download next file
    sleep(10) # wait n seconds before get next page

Now you can check your data directory and decide which models do you need

## Thank you!