# SDK v1 code

In [None]:
import pandas as pd
import numpy as np
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.compute import AmlCompute,ComputeTarget, ComputeInstance
from azureml.exceptions import ComputeTargetException
from azureml.core.datastore import Datastore
from azureml.widgets import RunDetails
from azureml.core.environment import CondaDependencies

In [None]:
ws = Workspace.from_config(path='../../config/config.json')

## Environment

In [None]:
env=Environment.from_pip_requirements("conv_sum",  '../config/requirements.txt')
env.register(ws)

## Compute

In [None]:
from ComputeManagement import create_cluster, create_instance, delete_compute

In [None]:
cc=create_cluster(
    workspaceRef=ws,
    name="q34",
    vmSize="Standard_DS3_v2",
    minNodes=0,
    maxNodes=4,
    idleTime=180
)

## Training

In [None]:
# training_script_config = ScriptRunConfig(
#     source_directory = 'src',
#     script = 'training_script.py',
#     arguments=['--data',___],
#     environment = env,
#     compute_target = cc
# )
# experiment = Experiment(
#     workspace = ws,
#     name="maiden_experiment"
# )
# run = experiment.submit(config=training_script_config, tags=[])

# RunDetails(run).show()
# run.wait_for_completion(show_output=True)

In [None]:
import yfinance as yf

In [None]:
sensexTickerYFinance = ['HDFCLIFE.NS, NESTLEIND.NS, KOTAKBANK.NS, INDUSINDBK.NS, TATASTEEL.NS, ITC.NS, ONGC.NS, TITAN.NS, ULTRACEMCO.NS, BAJAJFINSV.NS, BAJFINANCE.NS, BRITANNIA.NS, BAJAJ-AUTO.NS, COALINDIA.NS, BHARTIARTL.NS, TATACONSUM.NS, LTI.NS, CIPLA.NS, MARUTI.NS, ICICIBANK.NS, APOLLOHOSP.NS, NTPC.NS, HEROMOTOCO.NS, HINDALCO.NS, WIPRO.NS, TCS.NS, ADANIENT.NS, MM.NS, TECHM.NS, RELIANCE.NS']
stock_data = yf.download(tickers=sensexTickerYFinance, start='2000-01-01', end='2022-12-31', interval='1mo')

In [None]:
stock_data.loc[:,'Adj Close']

 TODO
1. Read about SOTA for stock price prediction and what determines success of model trying to predict price
2. Choose stocks to monitor - Nifty 50
3. Build as below

Think of a common use-case where data would update regularly and model would shift
1. Stock price prediction
2. Automatic data retrieval using API to store into Azure storage
3. Automatic model training at intervals depending on error rate

Tie everything up in a RL portfolio optimization application

In [None]:
import yfinance as yf

In [None]:
tickerData= yf.download(tickers="RELIANCE.NS", start="2022-01-01", end="2023-01-10", period="1d")
tickerData['Date'] = [str(x)[:10] for x in tickerData.index]
tickerData['Ticker'] = "RELIANCE.NS"
tickerDataToPersist = list(tickerData.transpose().to_dict().values())

In [None]:
from src.TickerData import query, download

In [None]:
download(ticker="RELIANCE.NS", start="2022-12-01",end="2023-01-10", period="1d")

In [None]:
train_data.to_csv()

In [None]:
train_data = query(ticker="RELIANCE.NS", start="2022-12-01",end="2023-01-10")

In [None]:
train_data

In [None]:
import pandas as pd

In [None]:
import torch

In [None]:
ticker="RELIANCE.NS"

In [None]:
train_data = train_data[f"{ticker}_Close"]

In [None]:
import pandas as pd

In [None]:
pd.Series(train_data).to_csv('./data/ril.csv')

In [None]:
import numpy as np

In [None]:
def training_data(series, loookaheadSize=5):
    X,y = [],[]
    for i in np.arange(5,len(series)-1):
        X.append(series[i-loookaheadSize:i])
        y.append(series[i+1])
    X = np.array(X)
    y = np.array(y)
    X = X.reshape(len(series)-loookaheadSize-1,1,5)
    y=y.reshape(-1,1)

    train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X), torch.from_numpy(y))

    return train_dataset

In [None]:
tx=training_data(train_data)

In [None]:
next(iter(tx))

In [None]:
torch.save(tx,'txx.pt')

In [None]:
typ = torch.load('txx.pt')

In [None]:
next(iter(typ))

In [None]:
next(iter(tx))

In [None]:
import numpy as np

In [None]:
np.array([0.0026]).shape

In [None]:
from datetime import datetime
str(datetime.now().date())

# SDK v2 code

In [None]:
import json
import numpy as np
from azure.ai.ml import MLClient, Input, Output, command
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.entities import AmlCompute, Environment, Model, Data, CodeConfiguration, ManagedOnlineEndpoint, ManagedOnlineDeployment
from azure.ai.ml.constants import AssetTypes
from datetime import datetime

## Init

In [None]:
with open('../config/config.json','r') as f:
    configs=json.loads(f.read())

subscription_id, resource_group, workspace = configs['subscription_id'], configs['resource_group'], configs['workspace_name']

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

ml_client = MLClient(
    credential, subscription_id, resource_group, workspace
)

## Upload data to Azure

In [None]:
my_path = '../data/ril.csv'



In [None]:
ril_data = ml_client.data.get(name='ril', version=1)

In [None]:
ril_data.path

## Training

## Run training job

In [None]:
def version_iter(n=20):
    i=0
    for i in np.arange(21,50):
        yield i
x = iter(version_iter())
next(x)

In [None]:
local_model_name = f"modelstock_pred_{str(datetime.now().date())}"
registered_model_name = "stock_pred_v1"

job = command(   
    name=f"stock_pred_job_{next(x)}",
    inputs={
        "data": Input(type=AssetTypes.URI_FILE, mode="ro_mount", path=ril_data.path),
        "test_train_ratio": 0.25,
        "registered_model_name":registered_model_name,
        "local_model_name":local_model_name
        },
    code="../src/",  # location of source code
    command="python train.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --local_model_name ${{inputs.local_model_name}} --registered_model_name ${{inputs.registered_model_name}}",
    environment=env,
    compute=compute.name,
    experiment_name="train_model_stock_price_prediction",
    display_name="stock_price_prediction",
)

ml_client.create_or_update(job)

## Register the trained model

In [None]:
local_model_name=f"modelstock_pred_{str(datetime.now().date())}"

In [None]:
local_model_name

In [None]:
model = Model(
        path=f"azureml://jobs/{job.name}/outputs/artifacts/paths/outputs/",
        name="model-path",
        description="Model created from run.",
        type="custom_model",
    )

In [None]:
ml_client.models.create_or_update(model)

## Deployment

In [None]:
# Create endpoint
online_endpoint_name = "modeldir-logged-init"

endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint"
)

ml_client.begin_create_or_update(endpoint).wait()

In [None]:
ml_client.online_endpoints.get(name=online_endpoint_name).provisioning_state

In [None]:
## Create deployment only after endpoint has provisioned

In [None]:
# Deployment script
code_config = CodeConfiguration(
        code=src_dir, scoring_script="deployment.py"
    )

# Create deployment
yellow_deployment = ManagedOnlineDeployment(
    name="yellow",
    endpoint_name=online_endpoint_name,
    model=modeldir,
    environment=Environment(
            conda_file=f"./{dependencies_dir}/conda.yml",
            image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04"),
    code_configuration=code_config,
    instance_type="Standard_DS2_v2",
    instance_count=1,
)
# create the deployment:
ml_client.begin_create_or_update(yellow_deployment)
# blue deployment takes 100 traffic
# endpoint.traffic = {"yellow": 100}
# ml_client.begin_create_or_update(endpoint)

## Test the endpoint

In [None]:
%%writefile inputs.json
{"d1":2663,"d2":2654.4,"d3":2698,"d4":2690,"d5":2698.12}

In [None]:
# test the endpoint (the request will route to blue deployment as set above)
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="yellow",
    request_file="inputs.json",
)

In [None]:
ml_client.compute.begin_delete(name=compute.name).wait()

## Deployed endpoint logs

In [None]:
ml_client.online_deployments.get_logs(
    name="yellow", endpoint_name=online_endpoint_name, lines=50
)

# Testing the code

In [11]:
from datetime import datetime, timedelta
import yfinance as yf

In [15]:
start = str(datetime.today().date()-timedelta(days=14))
end = str(datetime.today().date()-timedelta(days=7))

In [23]:
def get_ticker_data(TICKER="LT.NS"):

    try:
        start = str(datetime.today().date()-timedelta(days=366))
        end = str(datetime.today().date()-timedelta(days=1))

        tickerData=yf.download(TICKER,start=start, end=end, period='1d')
        tickerData['Date']=[str(x)[:10] for x in tickerData.index]
        tickerData['Ticker']=TICKER
        tickerData = tickerData['Adj Close']
    
        if tickerData.shape[0]==0:
            raise ValueError("No data found via YFinance.")
    except:
        # logging.error("Problem with downloading data from YFinance.")
        tickerData=None
    finally:
        return tickerData

In [24]:
tickerData=get_ticker_data()

[*********************100%***********************]  1 of 1 completed


In [29]:
import re

In [30]:
re.sub('-','',str(datetime.today().date()))

'20230206'

In [34]:
len(tickerData)

248

In [36]:
tags={'Length':len(tickerData),'Median':tickerData.median(),'SD':tickerData.std()}

In [39]:
str(tickerData.index[0].date())

'2022-02-07'

In [40]:
str(tickerData.index[-1].date())

'2023-02-03'

In [43]:
import pandas as pd

In [45]:
td = pd.read_csv("../data/LT.NS.csv")

In [47]:
td.index = td["Date"]

In [50]:
td.drop(["Date"],axis=1,inplace=True)

In [58]:
td.index[-1][:10]

'2023-02-03'

In [70]:
import json

In [77]:
def get_ml_client():

    with open('../config/config.json','r') as f:
        configs=json.loads(f.read())

    subscription_id, resource_group, workspace = configs['subscription_id'], configs['resource_group'], configs['workspace_name']

    try:
        credential = DefaultAzureCredential()
        # Check if given credential can get token successfully.
        credential.get_token("https://management.azure.com/.default")

        ml_client = MLClient(
        credential, subscription_id, resource_group, workspace
        )
        return ml_client
    except Exception as ex:

        print("error")


In [78]:
mlc=get_ml_client()

error


In [61]:
from azure.ai.ml.entities import Data

In [62]:
path='../data/LT.NS.csv'

In [64]:
TICKER="LT.NS"

In [65]:
ticker_data = pd.read_csv(path)
ticker_data.index=ticker_data['Date']
ticker_data.drop(["Date"],axis=1,inplace=True)
data_to_upload=Data(
        name=TICKER,
        version=re.sub('-','',str(datetime.today().date())),
        description=f"Stock data for {TICKER} during {str(ticker_data.index[0][:10])}:{str(ticker_data.index[-1][:10])} in 1d interval.",
        type='uri_file',
        ml_client=ml_client,
        path=path,
        tags={
            'Length':len(ticker_data),
            'Start':str(ticker_data.index[0][:10]),
            'End':str(ticker_data.index[-1][:10]),
            'Median':ticker_data.median(),
            'SD':ticker_data.std()}
    )

NameError: name 'ml_client' is not defined

In [83]:
import re
'LT.NS'[:'LT.NS'.index('.')]

'LT'

# Training yaml editing

In [1]:
import pandas as pd
import re

In [2]:
with open('../jobs/train.yml','r') as f:
    train_yml=f.read()

In [15]:
train_yml=re.sub('\$job_name','ga-run-16',train_yml)
train_yml=re.sub('\$path','azureml:LT@latest',train_yml)
train_yml=re.sub('\$compute','computer1',train_yml)
train_yml=re.sub('\$local_model','modelx',train_yml)

In [18]:
with open('../jobs/new_train.yml','w') as f:
    f.write(train_yml)

# Works!

In [None]:
{
    "data":
    {
        'ticker':'CIPLA.NS',
        'start':366,
        'end':1,
        'path':'../data/CIPLA.NS.csv'
    },
    "train":
    {
        'job_name':'ga-run-1', #Needs to be updated each time
        'compute_name':'computer456',
        'azureml_path':'azureml:CIPLA@latest',
        
    }
}