## **Prerequisites**

#### 1. Setup Azure Machine Learning Workspace via the Portal
- Note workspace name, resource group, and subscription id
#### 2. Create and populate .env file in the home directory
- Use [.sample.env](../.sample.env) as a guide
#### 3. Create and activate conda virtual env
- Run the following bash commands via the terminal
```bash
    conda env create --name many_models --file=../environment/conda.yaml
    conda activate many_models
    az login
```
- Select the many_models python interpreter and kernel to run the remainder of this notebook
#### 4. Load environment variables by running the cell below

In [1]:
# Load environment variables from .env file
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

#Confirm variabels were loaded
print(os.getenv("WORKSPACE_NAME"))

mm-aml-wksp


#### 5. Load dataset to AML workspace

In [2]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

import os

# authenticate
credential = DefaultAzureCredential(tenantid=os.environ.get('TENANT_ID'))

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id = os.environ.get('SUBSCRIPTION_ID'),
    resource_group_name = os.environ.get('RESOURCE_GROUP_NAME'),
    workspace_name = os.environ.get('WORKSPACE_NAME'),
)

In [3]:
import time

# edit this field to download up to 11,973 files (Each file is a SKU/Store combination; You will fit 1 model per file)
DATASET_MAX_FILES = 15 

# set the version number of the data asset to the current UTC time
v1 = time.strftime("%Y.%m.%d.%H%M%S", time.gmtime())
data_name = "oj-simulated-sales-data"

In [None]:
from azureml.opendatasets import OjSalesSimulated
import os

local_path = "../data/oj_sim_sales"
if not os.path.exists(local_path):
    os.mkdir(local_path)

oj_sales_files = OjSalesSimulated.get_file_dataset().take(DATASET_MAX_FILES)
oj_sales_files.download(local_path, overwrite=True)

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
import time 


my_data = Data(
    name=data_name,
    version=v1,
    description="Chicago area orange juice sales data",
    path=local_path,
    type=AssetTypes.URI_FOLDER,
)

# create data asset
ml_client.data.create_or_update(my_data)

print(f"Data asset created. Name: {my_data.name}, version: {my_data.version}")

In [8]:
import mltable
data_asset = ml_client.data.get(data_name, label="latest")

path = {
  'folder': data_asset.path
}

tbl = mltable.from_delimited_files(paths=[path])
df = tbl.to_pandas_dataframe()
display(df.head(10))

  dataframe = dataframe_reader.complete_incoming_dataframe(


Unnamed: 0,WeekStarting,Store,Brand,Quantity,Advert,Price,Revenue
0,1990-06-14,1000,dominicks,12003,True,2.59,31087.77
1,1990-06-21,1000,dominicks,10239,True,2.39,24471.21
2,1990-06-28,1000,dominicks,17917,True,2.48,44434.16
3,1990-07-05,1000,dominicks,14218,True,2.33,33127.94
4,1990-07-12,1000,dominicks,15925,True,2.01,32009.25
5,1990-07-19,1000,dominicks,17850,True,2.17,38734.5
6,1990-07-26,1000,dominicks,10576,True,1.97,20834.72
7,1990-08-02,1000,dominicks,9912,True,2.26,22401.12
8,1990-08-09,1000,dominicks,9571,True,2.11,20194.81
9,1990-08-16,1000,dominicks,15748,True,2.42,38110.16
