## SAMPLE - Create a file 100 MB

In [None]:
for i in range(1):
    filename = f"/tmp/f{i+1}.txt"
    with open(filename, "wb") as f:
        f.seek((1024 * 1024 * (i+1)*100*1) - 1)
        f.write(b"\0")
    

## SAMPLE - Make IRSA calls proactively and generate AWS CONFIG FILE

In [None]:
#Emulate Side-Car
import requests
import os


#Get Access Token
access_token_endpoint='http://localhost:8899/access-token'
resp = requests.get(access_token_endpoint)
token = resp.text

# Get AWS Config File
os.environ['SSL_CERT_DIR']='/etc/ssl/certs/irsa'
headers = {
             "Content-Type": "application/json",
             "Authorization": "Bearer " + token,
        }
endpoint='https://irsa-svc.domino-field/map_iam_roles_to_pod_sa'
print(f"Domino Run Id{os.environ['DOMINO_RUN_ID']}")
data = {"run_id": os.environ['DOMINO_RUN_ID'], "irsa_workload_type":"cluster-edge"} ## It fetches this fom the downward api
resp = requests.post(endpoint,headers=headers,json=data,verify=False)

# Write the AWS Config file contents to the AWS_CONFIG_FILE
aws_config_file_contents=''
if resp.status_code == 200:
    aws_config_file_contents=resp.text
    print('Write to config file')
    config_file = os.environ["AWS_CONFIG_FILE"]
    with open(config_file, "w") as f:
        f.write(resp.content.decode())
aws_config_file_contents

In [None]:
!cat $AWS_CONFIG_FILE

## SAMPLE - Generate AWS Session Tokens from a AWS Profile

In [None]:
import boto3
session = boto3.Session(profile_name="sw-domino-project3-role")
region_name=session.region_name
aws_access_key_id=session.get_credentials().access_key
aws_secret_access_key=session.get_credentials().secret_key
aws_session_token= session.get_credentials().token


## SAMPLE - Ray Worker method

1. You pass the AWS Config File Contents directly to the worker
2. The worker writes the contents to the path AWS_CONFIG_FILE
3. The worker generates AWS Credentials based on AWS PROFILE
4. The worker creates a 100 MB file
5. The worker writes the 100 MB file to a custom bucket which is accessible by the chosen AWS Profile

Note - The experiment is created in the workspace. It is passed directly to the worker as a parameter

In [None]:
import os
import time
import ray
import mlflow
import boto3
@ray.remote
def mlflow_write_remote(exp_name,worker_id,aws_config_file_contents):
    config_file = os.environ["AWS_CONFIG_FILE"]
    with open(config_file, "w") as f:
        f.write(aws_config_file_contents)
    session = boto3.Session(profile_name="sw-domino-project3-role")
    region_name=session.region_name
    aws_access_key_id=session.get_credentials().access_key
    aws_secret_access_key=session.get_credentials().secret_key
    aws_session_token= session.get_credentials().token
    #os.environ['AWS_ROLE_ARN']='arn:aws:iam::946429944765:role/sw-domino-project-based-mlflow-6526f64938a634604600664a'
    os.environ['AWS_ACCESS_KEY_ID'] = aws_access_key_id
    os.environ['AWS_SECRET_ACCESS_KEY'] = aws_secret_access_key
    os.environ['AWS_SESSION_TOKEN'] = aws_session_token
    #First create the file of 100 mb
    for i in range(1):
        filename = f"/tmp/f{i+1}.txt"
        with open(filename, "wb") as f:
            f.seek((1024 * 1024 * (i+1)*100) - 1)
            f.write(b"\0")
        
    st = time.time()
    
    exp = mlflow.get_experiment_by_name(exp_name)    
    EXPERIMENT_ID = exp.experiment_id    
    with mlflow.start_run(experiment_id=EXPERIMENT_ID) as run:
        mlflow.log_artifact(filename, artifact_path='large_files')
    end = time.time()
    duration = str(end-st)
    return duration

## SAMPLE - Initialize the Ray Connection

In [None]:
import os
import time
import ray
import mlflow
import boto3
if not ray.is_initialized():
    service_host = os.environ["RAY_HEAD_SERVICE_HOST"]
    service_port = os.environ["RAY_HEAD_SERVICE_PORT"]
    address=f"ray://{service_host}:{service_port}"
    temp_dir='/mnt/data//{}/'.format(os.environ['DOMINO_PROJECT_NAME']) #set to a dataset
    ray.init(address=address)

## SAMPLE - Configure number of workers and start job

In [None]:
no_of_workers=1
cols = []
for i in range(no_of_workers):
    cols.append(str(i+1))
cols.append('Total Run Duration')

In [None]:
import pandas as pd
exp_name = 'MY_TEST_RAY_SCALING_EXPERIMENT'
exp = None
try:
    exp = mlflow.get_experiment_by_name(exp_name)    
except:
    print('Experiment Not Found Create it')

if not exp:
    mlflow.create_experiment(exp_name)
    exp = mlflow.get_experiment_by_name(exp_name)    
#print('\n\n\n')
#print(f'MLFLOW Tracking Server URI For Scenario 4(Bridgewaters) {scenario_4_tracking_uri}')
start_time = time.time()
results=[]
results_s4 = ray.get([mlflow_write_remote.remote(exp_name,worker_id,aws_config_file_contents) for worker_id in range(no_of_workers)])
duration = time.time() - start_time
print('Durations')
results_s4.append(str(duration))

s4_pd = pd.DataFrame.from_dict({'worker_index': cols, 'durations': results_s4})
display(s4_pd)
print(s4_pd.to_markdown())

