In [14]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.47.0 to work with mlopsprojectNEW


## Generate and upload batch data

In [15]:
from azureml.core import Datastore, Dataset
import pandas as pd
import os

# Set default data store
ws.set_default_datastore('workspaceblobstore')
default_ds = ws.get_default_datastore()

In [16]:
# Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

azureml_globaldatasets - Default = False
workspaceblobstore - Default = True
workspaceartifactstore - Default = False
workspaceworkingdirectory - Default = False
workspacefilestore - Default = False


In [17]:
df_holdout = pd.read_csv('outputs/df_holdout.csv')

In [18]:
sample = df_holdout.sample(n=100).values

In [19]:
# Create a folder
batch_folder = './batch-data'
os.makedirs(batch_folder, exist_ok=True)
print("Folder created!")

# Save each sample as a separate file
print("Saving files...")
for i in range(100):
    fname = str(i+1) + '.csv'
    sample[i].tofile(os.path.join(batch_folder, fname), sep=",")
print("files saved!")

# Upload the files to the default datastore
print("Uploading files to datastore...")
default_ds = ws.get_default_datastore()
default_ds.upload(src_dir="batch-data", target_path="batch-data", overwrite=True, show_progress=True)



Folder created!
Saving files...
files saved!
Uploading files to datastore...
Uploading an estimated of 100 files
Uploading batch-data/80.csv
Uploaded batch-data/80.csv, 1 files out of an estimated total of 100
Uploading batch-data/42.csv
Uploaded batch-data/42.csv, 2 files out of an estimated total of 100
Uploading batch-data/85.csv
Uploaded batch-data/85.csv, 3 files out of an estimated total of 100
Uploading batch-data/1.csv
Uploaded batch-data/1.csv, 4 files out of an estimated total of 100
Uploading batch-data/2.csv
Uploaded batch-data/2.csv, 5 files out of an estimated total of 100
Uploading batch-data/87.csv
Uploaded batch-data/87.csv, 6 files out of an estimated total of 100
Uploading batch-data/93.csv
Uploaded batch-data/93.csv, 7 files out of an estimated total of 100
Uploading batch-data/37.csv
Uploaded batch-data/37.csv, 8 files out of an estimated total of 100
Uploading batch-data/54.csv
Uploaded batch-data/54.csv, 9 files out of an estimated total of 100
Uploading batch-da

$AZUREML_DATAREFERENCE_b5206cedc75f4c4ea5735c221cc74d99

In [20]:
# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(default_ds, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")

Done!
