In [None]:
import os
import sys
sys.path.append(os.path.dirname(os.path.realpath('__file__')) + '/../../../')
from acaisdk.file import File
from acaisdk.project import Project
from acaisdk.fileset import FileSet
from acaisdk.job import Job
from acaisdk.meta import *
from acaisdk.utils import utils
from acaisdk import credentials
# from acaisdk import automl

utils.DEBUG = True  # print debug messages
workspace = os.path.dirname(os.path.realpath('__file__'))  # get current directory

In [None]:
# Setting up project
# 
# Either:
# 1) Provide your existing token for some project
# 2) Fill in information for new project creation (project_name, project_admin, project_user, optional [csp, budget])
# Do not change root_token!
token = ''
force_new_project = False
    
project_name = ''
project_admin = ''
project_user = ''
csp = 'AZURE' # AWS/AZURE/GCP/PRIVATE
budget = 10 # Default, I have no idea what other options there are

try:
    p
except NameError:
    pass
else:
    if not force_new_project:
        if token == '':
            print("User token {} already exists, saving to variable 'token'. If you want to enforce new project, set 'force_new_project=True'.".format(p['user_token']))
            token = p['user_token']
    else:
        print("User token {} already exists but forcing new project.".format(p['user_token']))


if token != '':
    print("Logging in with existing credentials.")
    credentials.login(token)
else:
    print("Creating new project.")
    if project_name == '' or project_admin == '' or project_user == '':
        raise ValueError("Some of the 'project_name', 'project_admin', 'project_user' not provided!")
    
    root_token = 'EmDlCTBF1ppONSciYVd03M9xkmF6hFqW' 
    p = Project.create_project(project_name, root_token, project_admin, csp=csp, budget=budget)
    p = Project.create_user(project_name, p['project_admin_token'], project_user)
    token = p['user_token']
    print("New user token {}, saved to variable 'token'".format(token))

In [None]:
# Set env variable needed for Phoebe
USE_PHOEBE = False
if USE_PHOEBE:
    os.environ["CLUSTER"] = 'PHOEBE'

In [None]:
# The structure of input data was done in a slightly weird way
# After downloading kaggle dataset I untarred the file (tar -xvf cifar10.zip -C inputs)
# and there was another zip inside. I untarred that as well (tar -xvf inputs/cifar10.tgz -C inputs)
# which yielded following structure
#
# 
# > tree inputs -d 1
# inputs
# └── cifar10
#     ├── test
#     │   ├── airplane
#     │   ├── automobile
#     │   ├── bird
#     │   ├── cat
#     │   ├── deer
#     │   ├── dog
#     │   ├── frog
#     │   ├── horse
#     │   ├── ship
#     │   └── truck
#     └── train
#         ├── airplane
#         ├── automobile
#         ├── bird
#         ├── cat
#         ├── deer
#         ├── dog
#         ├── frog
#         ├── horse
#         ├── ship
#         └── truck
# └── cifar10.tgz
#
# The reason is that inputs are too large and zipping them is essential for any handling
# The fileset uploaded to S3 should be the inputs/cifar10.tgz
# The job's command will have to untar that fileset in the input path in container
# The training code adds test and train path inside the code so it's enough to provide the job with input path
# which will extract the files to the correct structure

In [None]:
outputs = os.path.join(workspace, 'outputs/')
if not os.path.isdir(outputs):
    os.mkdir(outputs)
inputs = os.path.join(workspace, 'inputs/')
if not os.path.isdir(inputs):
    os.mkdir(inputs)
inf = [f for f in os.listdir(inputs)]
print(inf)
if "cifar10" not in inf:
    raise FileNotFoundError("Data is not extracted in the inputs directory! Extract the data cifar10.zip to inputs directory (e.g. tar -xvf cifar10.zip -C inputs) and then extract the zip once again!")

In [None]:
# In case dataset is not downloaded at all = cifar10-ResNet/cifar10.zip not present
# do the following:
#
# Install kaggle (pip3 or pip)
# pip3 install kaggle
#
# Download dataset to the workdir of Sentiment-IMDB
# kaggle datasets download dineshsaini/cifar10
#
# Untar downloaded dataset to inputs dir
# tar -xvf cifar10.zip -C inputs
# tar -xvf inputs/cifar10.tgz -C inputs <- important

In [None]:
File.convert_to_file_mapping([inputs + "cifar10.tgz"], '/cifar10-inputs/') \
    .files_to_upload \
    .upload() \
    .as_new_file_set('cifar10t.inputs')

In [None]:
File.list_dir('/cifar10-inputs/')

In [None]:
# Upload code
code = os.path.join(workspace, 'cifar10-code.zip')
File.upload({code: 'cifar10-code.zip'})

In [None]:
File.list_dir('/')

In [None]:
# All 3GB shm:
# 1. CPU: 5 MEM: 20Gi GPU:0 ===> Time to Finish:  805m
# 2. CPU: 20 MEM: 20Gi GPU:0 ===> Time to Finish: 140.8m
# 3. CPU: 5 MEM: 20Gi GPU:1 ===> Time to Finish: 2.9m
# 4. CPU: 2 MEM: 5Gi GPU:1 ===> Time to Finish: 12.1m
# 5. Locally on macbook pro m1:
# CPU times: user 3h 9min 50s, sys: 24min 51s, total: 3h 34min 41s
# Wall time: 1h 14min 11s


job_setting = {
    "v_cpu": "20",
    "memory": "20Gi",
    "gpu": "1",
    "command": "mkdir -p ./cifar10-output/ && (mkdir -p ./cifar10-inputs/) && (tar -xvf ./cifar10-inputss/cifar10.tgz -C ./cifar10-inputs) && (pip install -r requirements.txt) && (python3 cifar10.py ./cifar10-inputs/ ./cifar10-output/)",
    "container_image": "python:3.10.9",
    'input_file_set': 'cifar10t.inputs',
    'output_path': './cifar10-output/',
    'code': 'cifar10-code.zip:2',
    'description': 'CIFAR10 with ResNet9 Arch',
    'name': 'cifar10-resnet9',
    'shm': '3',
}

j = Job().with_attributes(job_setting).run()

In [None]:
j.check_job_status(3315)

In [None]:
File.list_dir('/cifar10-output')

In [None]:
File.download({'/cifar10-output/epochs.txt': outputs})