# SDK v1 code

In [None]:
import pandas as pd
import numpy as np
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.compute import AmlCompute,ComputeTarget, ComputeInstance
from azureml.exceptions import ComputeTargetException
from azureml.core.datastore import Datastore
from azureml.widgets import RunDetails
from azureml.core.environment import CondaDependencies

In [None]:
ws = Workspace.from_config(path='../../config/config.json')

## Environment

In [None]:
env=Environment.from_pip_requirements("conv_sum",  '../config/requirements.txt')
env.register(ws)

## Compute

In [None]:
from ComputeManagement import create_cluster, create_instance, delete_compute

In [None]:
cc=create_cluster(
    workspaceRef=ws,
    name="q34",
    vmSize="Standard_DS3_v2",
    minNodes=0,
    maxNodes=4,
    idleTime=180
)

## Training

In [None]:
# training_script_config = ScriptRunConfig(
#     source_directory = 'src',
#     script = 'training_script.py',
#     arguments=['--data',___],
#     environment = env,
#     compute_target = cc
# )
# experiment = Experiment(
#     workspace = ws,
#     name="maiden_experiment"
# )
# run = experiment.submit(config=training_script_config, tags=[])

# RunDetails(run).show()
# run.wait_for_completion(show_output=True)

In [None]:
import yfinance as yf

In [None]:
sensexTickerYFinance = ['HDFCLIFE.NS, NESTLEIND.NS, KOTAKBANK.NS, INDUSINDBK.NS, TATASTEEL.NS, ITC.NS, ONGC.NS, TITAN.NS, ULTRACEMCO.NS, BAJAJFINSV.NS, BAJFINANCE.NS, BRITANNIA.NS, BAJAJ-AUTO.NS, COALINDIA.NS, BHARTIARTL.NS, TATACONSUM.NS, LTI.NS, CIPLA.NS, MARUTI.NS, ICICIBANK.NS, APOLLOHOSP.NS, NTPC.NS, HEROMOTOCO.NS, HINDALCO.NS, WIPRO.NS, TCS.NS, ADANIENT.NS, MM.NS, TECHM.NS, RELIANCE.NS']
stock_data = yf.download(tickers=sensexTickerYFinance, start='2000-01-01', end='2022-12-31', interval='1mo')

In [None]:
stock_data.loc[:,'Adj Close']

 TODO
1. Read about SOTA for stock price prediction and what determines success of model trying to predict price
2. Choose stocks to monitor - Nifty 50
3. Build as below

Think of a common use-case where data would update regularly and model would shift
1. Stock price prediction
2. Automatic data retrieval using API to store into Azure storage
3. Automatic model training at intervals depending on error rate

Tie everything up in a RL portfolio optimization application

In [None]:
import yfinance as yf

In [None]:
tickerData= yf.download(tickers="RELIANCE.NS", start="2022-01-01", end="2023-01-10", period="1d")
tickerData['Date'] = [str(x)[:10] for x in tickerData.index]
tickerData['Ticker'] = "RELIANCE.NS"
tickerDataToPersist = list(tickerData.transpose().to_dict().values())

In [None]:
from src.TickerData import query, download

In [None]:
download(ticker="RELIANCE.NS", start="2022-12-01",end="2023-01-10", period="1d")

In [None]:
train_data.to_csv()

In [None]:
train_data = query(ticker="RELIANCE.NS", start="2022-12-01",end="2023-01-10")

In [None]:
train_data

In [None]:
import pandas as pd

In [None]:
import torch

In [None]:
ticker="RELIANCE.NS"

In [None]:
train_data = train_data[f"{ticker}_Close"]

In [None]:
import pandas as pd

In [None]:
pd.Series(train_data).to_csv('./data/ril.csv')

In [None]:
import numpy as np

In [None]:
def training_data(series, loookaheadSize=5):
    X,y = [],[]
    for i in np.arange(5,len(series)-1):
        X.append(series[i-loookaheadSize:i])
        y.append(series[i+1])
    X = np.array(X)
    y = np.array(y)
    X = X.reshape(len(series)-loookaheadSize-1,1,5)
    y=y.reshape(-1,1)

    train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X), torch.from_numpy(y))

    return train_dataset

In [None]:
tx=training_data(train_data)

In [None]:
next(iter(tx))

In [None]:
torch.save(tx,'txx.pt')

In [None]:
typ = torch.load('txx.pt')

In [None]:
next(iter(typ))

In [None]:
next(iter(tx))

In [None]:
import numpy as np

In [None]:
np.array([0.0026]).shape

In [None]:
from datetime import datetime
str(datetime.now().date())

# SDK v2 code

In [None]:
import json
import numpy as np
from azure.ai.ml import MLClient, Input, Output, command
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.entities import AmlCompute, Environment, Model, Data, CodeConfiguration, ManagedOnlineEndpoint, ManagedOnlineDeployment
from azure.ai.ml.constants import AssetTypes
from datetime import datetime

## Init

In [None]:
with open('../config/config.json','r') as f:
    configs=json.loads(f.read())

subscription_id, resource_group, workspace = configs['subscription_id'], configs['resource_group'], configs['workspace_name']

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

ml_client = MLClient(
    credential, subscription_id, resource_group, workspace
)

## Upload data to Azure

In [None]:
my_path = '../data/ril.csv'

my_data = Data(
    path=my_path,
    type=AssetTypes.URI_FILE,
    description="ril_stock_data",
    name="ril",
    version='1'
)

ml_client.data.create_or_update(my_data)

In [None]:
ril_data = ml_client.data.get(name='ril', version=1)

In [None]:
ril_data.path

## Training

In [None]:
src_dir = "../src/"

In [None]:
%%writefile {src_dir}train.py
import os
import argparse
import pandas as pd
import torch
from sklearn.preprocessing import  MinMaxScaler
from sklearn.model_selection import train_test_split
import logging
import mlflow
import pickle
import numpy as np

def series_to_tensors(series, lookaheadSize=5):

    X,y = [],[]
    for i in np.arange(5,len(series)-1):
        X.append(series[i-lookaheadSize:i])
        y.append(series[i+1])
    X = np.array(X)
    y = np.array(y)
    X = X.reshape(len(series)-lookaheadSize-1,1,5)
    y=y.reshape(-1,1)

    dataset = torch.utils.data.TensorDataset(torch.from_numpy(X), torch.from_numpy(y))

    return dataset

def dataprep(args):

    stockData = pd.read_csv(args.data, index_col='Unnamed: 0')
    stock_train_df, stock_test_df = train_test_split(stockData, test_size=args.test_train_ratio)

    print(stock_train_df)

    # Instead of this use LayerNorm or BatchNorm in the neural net
    scaler = MinMaxScaler().fit(stock_train_df)
    stock_train_df = scaler.transform(stock_train_df)
    stock_test_df = scaler.transform(stock_test_df)

    train_tensors = series_to_tensors(stock_train_df)
    test_tensors = series_to_tensors(stock_test_df)

    return scaler, train_tensors, test_tensors

class lstm_model(torch.nn.Module):

    def __init__(self):
        super(lstm_model, self).__init__()
        self.lstm1=torch.nn.LSTM(batch_first=True, input_size=5, hidden_size=1)
        self.out=torch.nn.Linear(1,1)

    def forward(self, x, hidden=None):
        x, hidden = self.lstm1(x)
        x = x[:,-1]
        x = self.out(x)
        return x, hidden

def train(trainset):

    seq_model = lstm_model()
    optim = torch.optim.Adam(lr = 0.0001, params=seq_model.parameters())

    epochs = 10

    for epoch in np.arange(epochs):

        Loss=0

        for data in trainset:

            feats, target = data
            optim.zero_grad()

            y_p,_ = seq_model(feats.float())
            loss = torch.nn.functional.mse_loss(y_p.float(), target.float())

            loss.backward()
            optim.step()
            Loss += loss.item()

        print(f"Epoch: {epoch}, loss: {Loss}")
    return seq_model

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--registered_model_name", type=str, help="model name")
    parser.add_argument("--data", type=str, help="Path to input data")
    parser.add_argument("--local_model_name", type=str, required=True, default=0.25)
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)

    args = parser.parse_args()
    
    # Load Scaler object later and send it for scaling data

    scaler, trainset, _ = dataprep(args)

    trainedModel = train(trainset)

    print(os.getcwd())

    pickle.dump(scaler, open('./outputs/scaler.pkl','wb'))
    model_file = f"./outputs/{args.local_model_name}.pth"
    torch.save(trainedModel.state_dict(), model_file)

if __name__ == "__main__":
    main()

## Run training job

In [None]:
def version_iter(n=20):
    i=0
    for i in np.arange(21,50):
        yield i
x = iter(version_iter())
next(x)

In [None]:
local_model_name = f"modelstock_pred_{str(datetime.now().date())}"
registered_model_name = "stock_pred_v1"

job = command(   
    name=f"stock_pred_job_{next(x)}",
    inputs={
        "data": Input(type=AssetTypes.URI_FILE, mode="ro_mount", path=ril_data.path),
        "test_train_ratio": 0.25,
        "registered_model_name":registered_model_name,
        "local_model_name":local_model_name
        },
    code="../src/",  # location of source code
    command="python train.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --local_model_name ${{inputs.local_model_name}} --registered_model_name ${{inputs.registered_model_name}}",
    environment=env,
    compute=compute.name,
    experiment_name="train_model_stock_price_prediction",
    display_name="stock_price_prediction",
)

ml_client.create_or_update(job)

## Register the trained model

In [None]:
local_model_name=f"modelstock_pred_{str(datetime.now().date())}"

In [None]:
local_model_name

In [None]:
model = Model(
        path=f"azureml://jobs/{job.name}/outputs/artifacts/paths/outputs/",
        name="model-path",
        description="Model created from run.",
        type="custom_model",
    )

In [None]:
ml_client.models.create_or_update(model)

## Deployment

In [None]:
%%writefile {src_dir}/deployment.py
import pandas as pd
import numpy as np
import pickle
import torch
import os
import logging
import json

def init():
    
    global model
    global scaler

    class lstm_model(torch.nn.Module):

        def __init__(self):
            super(lstm_model, self).__init__()
            self.lstm1=torch.nn.LSTM(batch_first=True, input_size=5, hidden_size=1)
            self.out=torch.nn.Linear(1,1)

        def forward(self, x, hidden=None):
            x, hidden = self.lstm1(x)
            x = x[:,-1]
            x = self.out(x)
            return x, hidden

    scalerpath = os.path.join(
    os.getenv("AZUREML_MODEL_DIR"), "outputs/scaler.pkl")
    # deserialize the model file back into a sklearn model
    with open(scalerpath, 'rb') as f:
        scaler = pickle.load(f)

    modelpath = os.path.join(
    os.getenv("AZUREML_MODEL_DIR"), "outputs/modelstock_pred_2023-01-25.pth")    
    model = lstm_model()
    model.load_state_dict(torch.load(modelpath))
    model.eval()

def run(raw_data):

    data = json.loads(raw_data)
    data = np.array(list(data.values())).astype(float)
    scaled_data = scaler.transform(data.reshape(-1,1))

    tensor_data = torch.from_numpy(scaled_data.reshape(-1,1,5))

    result, _ = model(tensor_data.float())
    
    result = scaler.inverse_transform(result.detach().numpy())

    return result.tolist()

In [None]:
# Create endpoint
online_endpoint_name = "modeldir-logged-init"

endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint"
)

ml_client.begin_create_or_update(endpoint)

In [None]:
ml_client.online_endpoints.get(name=online_endpoint_name).provisioning_state

In [None]:
# Get model
modeldir = ml_client.models.get(name="model-path", version="2")

In [None]:
## Create deployment only after endpoint has provisioned

In [None]:
# Deployment script
code_config = CodeConfiguration(
        code=src_dir, scoring_script="deployment.py"
    )

# Create deployment
yellow_deployment = ManagedOnlineDeployment(
    name="yellow",
    endpoint_name=online_endpoint_name,
    model=modeldir,
    environment=Environment(
            conda_file=f"./{dependencies_dir}/conda.yml",
            image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04"),
    code_configuration=code_config,
    instance_type="Standard_DS2_v2",
    instance_count=1,
)
# create the deployment:
ml_client.begin_create_or_update(yellow_deployment)
# blue deployment takes 100 traffic
# endpoint.traffic = {"yellow": 100}
# ml_client.begin_create_or_update(endpoint)

## Test the endpoint

In [None]:
%%writefile inputs.json
{"d1":2663,"d2":2654.4,"d3":2698,"d4":2690,"d5":2698.12}

In [None]:
# test the endpoint (the request will route to blue deployment as set above)
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="yellow",
    request_file="inputs.json",
)

In [None]:
ml_client.compute.begin_delete(name=compute.name).wait()

## Deployed endpoint logs

In [None]:
ml_client.online_deployments.get_logs(
    name="yellow", endpoint_name=online_endpoint_name, lines=50
)