In [None]:
from azureml.core import Workspace

ws = Workspace.create(
    name='myworkspace',
    subscription_id='<azure-subscription-id>',
    resource_group='myresourcegroup',
    create_resource_group=True,
    location='eastus2'
)

# save the config file to local directory used for other creation of datastore and so on
ws.write_config(path='\config')

In [8]:
from config import Config

from azure.ai.ml import MLClient, command
from azure.identity import DefaultAzureCredential

### Azure Connection

In [None]:

config = Config()

# setup azureML authentication
ml_client = MLClient(
    DefaultAzureCredential(), config.SUBSCIRPTION_ID, config.RESOURCE_GROUP, config.WORKSPACE_NAME
)

# connecto to azureML workspace
job = command(
    code=,
    command=,
    environment=,
    compute="MLModels",
    experiment_name=
)

returned_job = ml_client.create_or_update(job)

### Create datastore

**Three types of datastore**:
* Azure Blob Storage Container - azureml (protocol)
* Azure File share
* Azure Data Lake (Gen2) - abf(s) (protocol)

### Create data assets

**Three types of data assets**: 
* URI files
* URI folder
* ML Tables points to file or folders -> include a schema to read as a tabular data

In [None]:
# create URI file data asset
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path = '<supported-path>'

my_data = Data(
    path=my_path,
    type=AssetTypes.URI_FILE,
    description='<description>',
    name='<name>',
    version='<version>'
)

ml_client.data.create_or_update(my_data)

# access to input data through URI FILE
import argparse
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument('--input_data', type=str)
args = parser.parse_args()

df = pd.read_csv(args.input_data)
df.head()

In [None]:
# Create URI FOLDER
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path = '<supported-path'

my_data = Data(
    path=my_path,
    type=AssetTypes.URI_FOLDER,
    description='<description>',
    name='<name>',
    version='<version>'
)

ml_client.data.create_or_update(my_data)

# Access input data through URI FOLDER
import argparse
import glob
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument('--input_daeta', type=str)
args = parser.parse_args()

data_path = args.input_data
all_files = glob.glob(data_path + '/*.csv')
df = pd.concat((pd.read_csv(f) for f in all_files), srot=False)

In [None]:
# Creste MLTable
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path = '<path-including-mltable-files>'

my_data = Data(
    path=my_path,
    type=AssetTypes.MLTABLE,
    description='<description>',
    name='<name>',
    version='<version>'
)

ml_client.data.create_or_update(my_data)

# Access data through MLTable
from argparse
import mltable
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument('--input_data', type=str)
args = parser.parse_args()



### Create a comput target

* Compute Instance - virtual machine -> jupyter notebook -> experiment
* Compute Cluster -> large scale dataset -> on demand -> parallel processing
* Kebernete Cluster - > use Kebernete technology
* Attached Compute -> attached to existing Azure Databricks cluster/Azure VM
* Serverless Compute -> on demand, fully managed

**When to use>**
* experiment : compute instance
* production: -> pipeline jobs -> computer cluster / serverless commpute
* deployment: 
  * batch prediction -> compute cluster / serverless compute
  * real-time prediciton -> kebernete cluster


In [None]:
# Create compute instance
from azure.ai.ml.entities import ComputeInstance

ci_basic_name = "basic-ci=123"
ci_basic = ComputeInstance(
    name=ci_basic_name,
    size = "STANDARD_DS3_v2"
)

ml_client.begin_create_or_update(ci_basic).result()

In [None]:
# create a computer cluster
from azure.ai.ml.entities import AmlCompute

cluster_basic = AmlCompute(
    name="cpu-cluster",
    type="amlcompute",
    size="STANDARD_DS3_v2",
    location="westus",
    min_instances=0,
    max_instances=2,
    idle_time_before_scale_down=120,
    tier="low_priority"
)
ml_client.begin_create_or_update(cluster_basic).resuot()

* **node**: cluster scale to, each node can execute a parallel workloads
* ```size```: speicifies virtual machine type of each node of compute cluster
* ```max_instance```: number of maximum number of nodes(parallel workloads)
* ```tier```: whether low priority or not.
  

In [None]:
# Use a compute cluster - bind to a job
from azure.ai.ml import command

# configure a job
job = command(
    code='./src',
    command="python diabetes-training.py",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="cpu-cluster",
    display_name="train-with-cluster",
    experiment_name="diabetes-training"
)

# submit job
returned_job = ml_client.create_or_update(job)
anl_url = returned_job.studio_url
aml_url


### Environment  - Azure Machine Learning Environment 
**Docker container** -> Environment.


1. Curated environment

The existing environment will combined to the workspace while creating workspace.



In [None]:
# Check environemtn
envs = ml_client.environments.list()
for env in envs:
    print(env)

# Review details of environment
env = ml_client.environments.get(name="env-name", version="1")
print(env)

# Use a curated environment in job
from azure.ai.ml import command

job = command(
    code="./src",
    command="python train.py",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute='aml-cluster',
    display_name="train-with-curated-environment",
    environment_name="train-with-curated-environment"
)

returned_job = ml_client.create_or_update(job)

2. Custom environment 

Can careate a custom environments in Docker Hub through SDK or conda yml file

In [None]:
# create an environment
from azure.ai.ml.entities import Environment

env_docker_image = Environment(
    image="pytorch/pytorch:latest",
    name="public-docker-image-example",
    description="Environment created from a public Docker image."
)

ml_client.environments.create_or_update(env_docker_image)

# Or create an environmemt through base docker image
from azure.ai.ml.entities import Environment

env_docker_image = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    name="aml-docker-image-example",
    description="Environment created from a Azure ML Docker image.",
)
ml_client.environments.create_or_update(env_docker_image)

# Or create an environment with conda specification file
from azure.ai.ml.entities import Environment

env_docker_conda = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="./conda-env.yml",
    name="docker-image-plus-conda-example",
    description="Environment created from a Docker image plus Conda environment.",
)
ml_client.environments.create_or_update(env_docker_conda)

In [None]:
# example of yml used in conda file
name: basic-env-cpu
channels:
  - conda-forge
dependencies:
  - python=3.7
  - scikit-learn
  - pandas
  - numpy
  - matplotlib

### Classification job

In [19]:
# config environment
def sum_primes(n):
    if n < 2:
        return 0
    
    primes = [True] * (n + 1)
    primes[0] = primes[1] = False
    
    for i in range(2, int(n**0.5) + 1):
        if primes[i]:
            for j in range(i*i, n+1, i):
                primes[j] = False
    
    return sum(i for i in range(2, n+1) if primes[i])


from collections import defaultdict

def group_anagrams(strs):
    anagram_groups = defaultdict(list)
    
    for s in strs:
        sorted_s = ''.join(sorted(s))
        anagram_groups[sorted_s].append(s)
    
    return list(anagram_groups.values())

In [23]:
def to_title_case(s):
    result = []
    capitalize_next = True
    for char in s:
        if char.isspace():
            result.append(char)
            capitalize_next = True
        elif char.isalnum():
            if capitalize_next:
                result.append(char.upper())
                capitalize_next = False
            else:
                result.append(char.lower())
        else:
            result.append(char)
            capitalize_next = True
    return ''.join(result)

In [27]:
to_title_case('     extra spaces here     - extra spaces He42352 re + extra spaces Here ? + + +good+')

'     Extra Spaces Here     - Extra Spaces He42352 Re + Extra Spaces Here ? + + +Good+'

In [28]:
def to_title_case(s):
    result = []
    capitalize_next = False
    for i, char in enumerate(s):
        if char.isspace():
            result.append(char)
            capitalize_next = True
        elif char.isalpha():
            if capitalize_next or (i == 0 and not result):
                result.append(char.upper())
                capitalize_next = False
            else:
                result.append(char.lower())
        else:
            result.append(char)
            capitalize_next = False
    return ''.join(result)

In [29]:
to_title_case('     extra spaces here     - extra spaces He42352 re + extra spaces Here ? + + +good+')

'     Extra Spaces Here     - Extra Spaces He42352 Re + Extra Spaces Here ? + + +good+'