In [None]:
import os
import sys
sys.path.append(os.path.dirname(os.path.realpath('__file__')) + '/../../../')
from acaisdk.file import File
from acaisdk.project import Project
from acaisdk.fileset import FileSet
from acaisdk.job import Job
from acaisdk.meta import *
from acaisdk.utils import utils
from acaisdk import credentials
# from acaisdk import automl

utils.DEBUG = True  # print debug messages
workspace = os.path.dirname(os.path.realpath('__file__'))  # get current directory

In [None]:
# Setting up project
# 
# Either:
# 1) Provide your existing token for some project
# 2) Fill in information for new project creation (project_name, project_admin, project_user, optional [csp, budget])
# Do not change root_token!
token = ''
force = False
    
project_name = ''
project_admin = ''
project_user = ''
csp = 'AZURE' # AWS/AZURE/GCP/PRIVATE
budget = 10 # Default, I have no idea what other options there are

try:
    p
except NameError:
    pass
else:
    if not force:
        if token == '':
            print("User token {} already exists, saving to variable 'token'. If you want to enforce new project, set 'force=true'.".format(p['user_token']))
            token = p['user_token']
    else:
        print("User token {} already exists but forcing new project.".format(p['user_token']))


if token != '':
    print("Logging in with existing credentials.")
    credentials.login(token)
else:
    print("Creating new project.")
    if project_name == '' or project_admin == '' or project_user == '':
        raise ValueError("Some of the 'project_name', 'project_admin', 'project_user' not provided!")
    
    root_token = 'EmDlCTBF1ppONSciYVd03M9xkmF6hFqW' 
    p = Project.create_project(project_name, root_token, project_admin, csp=csp, budget=budget)
    p = Project.create_user(project_name, p['project_admin_token'], project_user)
    token = p['user_token']
    print("New user token {}, saved to variable 'token'".format(token))

In [None]:
# Set env variable needed for Phoebe
USE_PHOEBE = False
if USE_PHOEBE:
    os.environ["CLUSTER"] = 'PHOEBE'

In [None]:
# This command must live in its own cell because chaining with another command trunctuates its output
File.list_dir('/')

In [None]:
outputs = os.path.join(workspace, 'outputs/')
if not os.path.isdir(outputs):
    os.mkdir(outputs)
inputs = os.path.join(workspace, 'inputs/')
if not os.path.isdir(inputs):
    os.mkdir(inputs)
inf = [f for f in os.listdir(inputs) if os.path.isfile(os.path.join(inputs, f))]
print(inf)

if inf == []:
    raise FileNotFoundError("Data is not extracted in the inputs directory! Extract the data NLP-GloVe/fake-and-real-news-dataset.zip CICIDS2018/ids-intrusion-csv.zip to inputs directory (e.g. tar -xvf ids-intrusion-csv.zip -C inputs)!")

In [None]:
# In case dataset is not downloaded
# = *.csv not present in the inputs dir (shouldn't happen)
# = rapids.* not present in the inputs dir (just for GPU usage)
# do the following:
#
# Install kaggle (pip3 or pip)
# pip3 install kaggle
#
# Download dataset to the workdir of Sentiment-IMDB
# kaggle datasets download solarmainframe/ids-intrusion-csv
#
# Untar downloaded dataset to inputs dir
# tar -xvf ids-intrusion-csv.zip -C inputs
#
# Download RAPIDS in case of GPU
# kaggle datasets dowload cdeotte/rapids

In [None]:
File.convert_to_file_mapping([inputs], '/cicids-inputs/') \
    .files_to_upload \
    .upload() \
    .as_new_file_set('cicids.inputs')

In [None]:
File.list_dir('/cicids-inputs/')

In [None]:
# Upload code
code = os.path.join(workspace, 'cicids-code.zip')
File.upload({code: 'cicids-code.zip'})

In [None]:
File.list_dir('/')

In [None]:
# Job setting was OOM with 3Gi, 10Gi
# I went ahead with LARGE resources, I don't know how much was actually used

job_setting = {
    "v_cpu": "3",
    "memory": "5Gi",
    "gpu": "0",
    "command": "mkdir -p ./cicids-output/ && (pip install -r requirements.txt) && (python3 cicids.py ./cicids-inputs/ ./cicids-output/ 02-14-2018.csv)",
    "container_image": "python:3.10.9",
    'input_file_set': 'cicids.inputs',
    'output_path': './cicids-output/',
    'code': 'cicids-code.zip',
    'description': 'CICIDS2018',
    'name': 'cicids2018'
}

j = Job().with_attributes(job_setting).run()

In [None]:
j.check_job_status(3423)

In [None]:
from acaisdk.job import Job, JobStatus
status = j.wait()
if status == JobStatus.FINISHED:
    output_file_set = j.output_file_set
    print("Job done. output file set id:", output_file_set)
else:
    print("Job went wrong:", status)

In [None]:
File.list_dir('/nlp-output/')

In [None]:
File.download({'/nlp-output/accuracy.txt': outputs})