# Работа с Datastores и Datasets в Azure ML

## Соединение со своим Workspace

Импорт модулей:

In [1]:
import os
import azureml.core
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.datastore import Datastore
from azureml.data.data_reference import DataReference
from msrest.exceptions import HttpOperationError

# Check core SDK version number
print(f'SDK version: {azureml.core.VERSION}')

SDK version: 1.12.0


Создаем соединение со своим Workspace:

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, sep = '\n')

ai-in-cloud-workspace
ai-in-cloud-workshop-rg
westeurope


## Просмотр существующих Datastore

In [3]:
# Get the default datastore
default_ds = ws.get_default_datastore()
print(f'Default Datastore name: {default_ds.name}\n')

# Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

Default Datastore name: demo_datastore

demo_datastore - Default = True
creditcardfraudstore - Default = False
dogsimagesblob - Default = False
azureml_globaldatasets - Default = False
workspaceblobstore - Default = False
workspacefilestore - Default = False


## Создаем новый Datastore

Установка имени нового Datastore и указание информации об Azure Storage Account, где будет распологаться новый Datastore:

In [4]:
datastore_name = 'demo_datastore'

# Azure Storage Account Info
storage_account_name = 'aiclouddata'
storage_container_name = 'demo-container'
storage_account_key = '<azure_account_key>' # WARN: insert your storage account key here

Создаем Datastore, если он уже не существует:

In [5]:
try:
    new_datastore = Datastore.get(ws, datastore_name)
    print(f'Blob Datastore with name {new_datastore.name} was found!')

except HttpOperationError:
    new_datastore = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name=datastore_name,
        account_name=storage_account_name,
        container_name=storage_container_name,
        account_key=storage_account_key)
    print(f'Registered blob datastore with name {new_datastore.name}')

Blob Datastore with name demo_datastore was found!


Получаем информацию о cозданном Datastore:

In [6]:
print(f'Datastore {new_datastore.name} based on {new_datastore.datastore_type} in storage account named {new_datastore.account_name}')

Datastore demo_datastore based on AzureBlob in storage account named aiclouddata


## Загрузка данных из Datastore

Делаем созданный Datastore хранилищем по умолчанию (для удобства дальнейшей работы):

In [7]:
ws.set_default_datastore(new_datastore.name)
ds = ws.get_default_datastore()

print(ds.name)

demo_datastore


Загрузка данных:

In [8]:
ds.upload_files(files=['../data/diabetes_train.csv', '../data/diabetes_test.csv'], # Upload the diabetes csv files in /data
                       target_path='diabetes-data/', # Put it in a folder path in the datastore
                       overwrite=True, # Replace existing files of the same name
                       show_progress=True)

Uploading an estimated of 2 files
Uploading ../data/diabetes_test.csv
Uploading ../data/diabetes_train.csv
Uploaded ../data/diabetes_test.csv, 1 files out of an estimated total of 2
Uploaded ../data/diabetes_train.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_411e9cdddeda492c9936c4880d13c33d

Зарегистрируем загруженные в Datastore данные, как табличный Dataset:

In [9]:
diabetes_ds = Dataset.Tabular.from_delimited_files(path=(ds, 'diabetes-data/*.csv'))
diabetes_ds

{
  "source": [
    "('demo_datastore', 'diabetes-data/*.csv')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ]
}

Convert Azure ML Dataset to Pandas Dataframe and view results:

In [10]:
diabetes_df = diabetes_ds.to_pandas_dataframe()
diabetes_df[1:10]

Unnamed: 0,PatientID,Pregnancies,PlasmaGlucose,DiastolicBloodPressure,TricepsThickness,SerumInsulin,BMI,DiabetesPedigree,Age,Diabetic
1,1823377,0,116,92,16,184,18.60363,0.131156,22,0
2,1916381,8,171,42,29,160,35.482247,0.082671,22,1
3,1247480,3,108,63,45,297,49.375169,0.100979,46,1
4,1516947,8,153,99,15,41,35.062139,0.116191,22,1
5,1703500,0,81,90,17,38,41.658026,0.478649,21,0
6,1040668,0,82,52,45,42,40.757542,0.082352,26,0
7,1358192,4,96,83,26,34,52.945331,0.160199,53,1
8,1023245,3,103,54,47,94,55.149428,0.316475,21,1
9,1363912,11,99,60,12,737,32.898198,0.108512,46,1
