In [1]:
from azureml.core import Workspace, Experiment, Environment, Dataset, ScriptRunConfig, Run
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import RandomParameterSampling, uniform, choice
from azureml.core.compute import ComputeTarget
from azureml.train.sklearn import SKLearn 
%reload_ext sql
import pandas as pd

In [2]:
ws = Workspace.from_config()

In [3]:
cpu_cluster_name = "cpuclusternew"

from azureml.core.compute import AmlCompute

compute = AmlCompute(ws, cpu_cluster_name)

In [4]:
param_sampling = RandomParameterSampling({
    '--stop_loss': choice([1,2,3]),
    '--take_profit': choice([1,2,3,4]),
    '--n_estimators': choice([50,100,120,150,200,250]),
    '--class_weight_0': choice([1, 5, 6, 10, 15,20]),
    '--class_weight_1': choice([1, 5, 6, 10, 15,20]),
    '--max_features': choice('sqrt','log2'),
    '--random_state': choice([0,8,26,42]),
    '--n_components': choice ([10, 15, 20, 25, 30, 40]),
    '--threshold': choice([0.5,0.55,0.56,0.57,0.6]),
    '--fdr_level': choice([0.05,0.06,0.08,0.10]),
    '--highcorr':choice([0.8,0.9])
})

In [5]:
arguments = ['--dataset_name', 'EURUSD_D1_2010to2023']

forex_env = Environment.get(workspace=ws,name='forex_env')

# Create a ScriptRunConfig
src = ScriptRunConfig(source_directory='./scripts',
                      script='train_script_sell.py',
                      arguments=arguments,
                      environment=forex_env,
                      compute_target=compute)

In [6]:
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling,
                                     primary_metric_name='overall_precision',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=500,
                                     max_concurrent_runs=50)

In [7]:
experiment = Experiment(workspace=ws, name='EURUSD_D1_2010to2023sell')
hyperdrive_run = experiment.submit(hyperdrive_config)
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03
Web View: https://ml.azure.com/runs/HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03?wsid=/subscriptions/a960077c-b037-4e6a-9491-8d9d841776de/resourcegroups/forex_resource/workspaces/forexbuild&tid=34406be1-a157-487e-a6f0-25db6b86ea91

Streaming azureml-logs/hyperdrive.txt

[2025-01-08T00:22:54.4441303Z][GENERATOR][DEBUG]Sampled 50 jobs from search space 
[2025-01-08T00:22:54.7970905Z][SCHEDULER][INFO]Scheduling job, id='HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_2' 
[2025-01-08T00:22:54.7576537Z][SCHEDULER][INFO]Scheduling job, id='HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_0' 
[2025-01-08T00:22:54.7605818Z][SCHEDULER][INFO]Scheduling job, id='HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_1' 
[2025-01-08T00:22:55.4556157Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_0' 
[2025-01-08T00:22:55.4004604Z][SCHEDULER][INFO]Scheduling job, id='HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_7' 
[2025-01-08T00:22:55.4069532Z][SCHE

{'runId': 'HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03',
 'target': 'cpuclusternew',
 'status': 'Completed',
 'startTimeUtc': '2025-01-08T00:22:52.86392Z',
 'endTimeUtc': '2025-01-08T09:09:19.566859Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"overall_precision","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'e6c0a482-14af-4560-a7e4-0e16108cabc9',
  'user_agent': 'python/3.8.5 (Linux-5.15.0-1040-azure-x86_64-with-glibc2.10) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.51.0',
  'best_child_run_id': 'HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_4',
  'score': '1',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_4'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClientType': 'azureml-sdk-train',
  

In [8]:
import pandas as pd

# Get all child runs
child_runs = hyperdrive_run.get_children()

# Initialize a list to store aggregated metrics and hyperparameters
aggregated_info = []

# Iterate over child runs and fetch their metrics and hyperparameters
for child_run in child_runs:
    child_metrics = child_run.get_metrics()
    child_hyperparameters = child_run.get_details()['runDefinition']['arguments']
    
 
    hyperparameters_dict = {child_hyperparameters[i]: child_hyperparameters[i+1] for i in range(0, len(child_hyperparameters), 2)}
    
    precision_splits = [child_metrics.get(f'precision_split_{i}', None) for i in range(1, 6)]  # Adjust the range as necessary
    overall_precision = child_metrics.get('overall_precision', None)
    no_of_trades = child_metrics.get('no_of_trades')
    adj_takeprofit = child_metrics.get('adj_takeprofit')
    WIN_LOSS_Diff_4 = child_metrics.get('WIN/LOSS-Diff_4')
    WIN_LOSS_Diff_5 = child_metrics.get('WIN/LOSS-Diff_5')
    BreakEvenRatio = child_metrics.get('BreakEvenRatio',None)
 
    run_details = {
        'run_id': child_run.id,
        'adj_takeprofit':adj_takeprofit,
        'WIN/LOSS-Diff_4':WIN_LOSS_Diff_4,
        'WIN/LOSS-Diff_5':WIN_LOSS_Diff_5,
        'overall_precision': overall_precision,
        'BreakEvenRatio':BreakEvenRatio,
        'no_of_trades':no_of_trades,
        **hyperparameters_dict,  
        **{f'precision_split_{i}': precision_splits[i-1] for i in range(1, 6)}, 
    }
    
    aggregated_info.append(run_details)

# Convert the aggregated information to a pandas DataFrame
df = pd.DataFrame(aggregated_info)

# Sort the DataFrame by overall_precision in descending order to get the best run on top
df = df.sort_values(by='WIN/LOSS-Diff_4', ascending=False)

# Display the DataFrame
print(df)

# Save the DataFrame to a CSV file
csv_file_path = 'EURUSD_D1_2010to2023sell.csv'  # Specify your desired path
df.to_csv(csv_file_path, index=False)

                                          run_id             adj_takeprofit  \
431   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_68  [3.0, 3.0, 3.0, 3.0, 3.0]   
422   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_77  [4.0, 4.0, 4.0, 4.0, 4.0]   
333  HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_166  [4.0, 4.0, 4.0, 4.0, 4.0]   
384  HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_112  [4.0, 4.0, 4.0, 4.0, 4.0]   
368  HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_131  [4.0, 4.0, 4.0, 4.0, 4.0]   
..                                           ...                        ...   
488   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_44            [4.0, 4.0, 4.0]   
490   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_41                        4.0   
494   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_40                        4.0   
495   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_25                 [4.0, 4.0]   
497   HD_ca18cd57-a4fa-4c4d-9122-03ecc023be03_16            [4.0, 4.0, 4.0]   

     WIN/LOSS-Diff_4  WIN/LOSS-Diff_5  overall_prec

In [None]:
df

In [None]:
df_dropped = df.dropna()

In [None]:
df_dropped

In [None]:
droppedsell = df_dropped[df_dropped['precision_split_5'] > df_dropped['BreakEvenRatio']] 

In [None]:
droppedsell

In [None]:
csv_file_path = 'dropped_ICMT5USDCAD_2010_3112_D1_Sell.csv'
droppedsell.to_csv(csv_file_path,index=False)