In [1]:
from azureml.core import Workspace, Datastore, Dataset

In [2]:
#Initialize workspace from config file
ws=Workspace.from_config(path='./ConfigFile/')

In [3]:
#Initialize datastore 
az_store=Datastore.get(ws,'azure_sdk_bob01')

In [4]:
#Save your storage account and data file name as tuple
csv_path= [(az_store, 'NID_revised_cleanData.csv')]

In [5]:
#Create a data set
CT_dataset=Dataset.Tabular.from_delimited_files(path=csv_path)

In [7]:
#Register a dataset
CT_dataset=CT_dataset.register(workspace=ws,name='CT_SDK',create_new_version=True)

In [6]:
#List all the WS from subscription: We get a dictionary object
ws_list=ws.list(subscription_id='61c74121-8c33-45a0-8c9d-0335cc850e80')
ws_list

{'AzureML-SDK-ws01': [Workspace.create(name='AzureML-SDK-ws01', subscription_id='61c74121-8c33-45a0-8c9d-0335cc850e80', resource_group='AzureML-SDK-RG1')]}

In [7]:
#to convert into list
ws_list=list(ws_list)
ws_list

['AzureML-SDK-ws01']

In [8]:
#Access all the default datastores in the workspace
az_default_store=ws.get_default_datastore()
az_default_store

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-39c7b142-8c36-4b5a-9c0f-8ae94d831640",
  "account_name": "azuremlsstorage720b02ad9",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [9]:
#List All the datastores
store_list=list(ws.datastores)
store_list

['azure_sdk_bob01', 'workspacefilestore', 'workspaceblobstore']

In [10]:
#list all the datasets
dataset_list=list(ws.datasets)
dataset_list

['CT_SDK']

In [11]:
#list all the datasets
dataset_list=list(ws.datasets.keys())
dataset_list

['CT_SDK']

In [12]:
#Get a single dataset by name in a workspace
az_dataset= Dataset.get_by_name(ws,'CT_SDK' )
az_dataset

{
  "source": [
    "('azure_sdk_bob01', 'NID_revised_cleanData.csv')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "bf344a0b-388b-4cc0-ad63-987cf16ba908",
    "name": "CT_SDK",
    "version": 1,
    "workspace": "Workspace.create(name='AzureML-SDK-ws01', subscription_id='61c74121-8c33-45a0-8c9d-0335cc850e80', resource_group='AzureML-SDK-RG1')"
  }
}

In [14]:
#Load AzureML dataset in pandas Datafream
df=az_dataset.to_pandas_dataframe()
df.head()

Unnamed: 0,Region,Metro,SiteType,DeploymentType,NIDPM,DPM,IsMainstream,FacilityCode,ClassType,CT,CT_Status,CT_Indicator
0,AMEREAST,Des Moines,DC,Expansion,John Simmons,Dhiraj Take (Tata Consultancy Services Ltd),False,DM4,Class A,226.0,Out-of SLA,Yellow
1,AMEREAST,Des Moines,DC,Expansion,John Simmons,Dhiraj Take (Tata Consultancy Services Ltd),False,DM4,Class A,226.0,Out-of SLA,Yellow
2,AMEREAST,Des Moines,DC,Expansion,John Simmons,Dhiraj Take (Tata Consultancy Services Ltd),False,DM4,Class A,226.0,Out-of SLA,Yellow
3,AMEREAST,Campinas,RNG,Aug,Kathy Brown,Madhvi Chand (Tata Consultancy Services),False,CPQ02,Class C,91.0,With-in SLA,Green
4,AMERWEST,Los Angeles,GW,Upgrade,Darren (Norm) Norman,Rajesh Adimoolam (Tata Consultancy Services Ltd),False,LAX30,Class E,117.0,With-in SLA,Green


In [15]:
df.columns

Index(['Region', 'Metro', 'SiteType', 'DeploymentType', 'NIDPM', 'DPM',
       'IsMainstream', 'FacilityCode', 'ClassType', 'CT', 'CT_Status',
       'CT_Indicator'],
      dtype='object')

In [17]:
#upload the datafream to azureml dataset
df_sub=df[['Region', 'Metro', 'SiteType', 'DeploymentType', 'NIDPM', 'DPM',
       'IsMainstream', 'FacilityCode', 'ClassType', 'CT_Indicator']]

In [18]:
df_sub.head()

Unnamed: 0,Region,Metro,SiteType,DeploymentType,NIDPM,DPM,IsMainstream,FacilityCode,ClassType,CT_Indicator
0,AMEREAST,Des Moines,DC,Expansion,John Simmons,Dhiraj Take (Tata Consultancy Services Ltd),False,DM4,Class A,Yellow
1,AMEREAST,Des Moines,DC,Expansion,John Simmons,Dhiraj Take (Tata Consultancy Services Ltd),False,DM4,Class A,Yellow
2,AMEREAST,Des Moines,DC,Expansion,John Simmons,Dhiraj Take (Tata Consultancy Services Ltd),False,DM4,Class A,Yellow
3,AMEREAST,Campinas,RNG,Aug,Kathy Brown,Madhvi Chand (Tata Consultancy Services),False,CPQ02,Class C,Green
4,AMERWEST,Los Angeles,GW,Upgrade,Darren (Norm) Norman,Rajesh Adimoolam (Tata Consultancy Services Ltd),False,LAX30,Class E,Green


In [19]:
##upload the datafream to azureml dataset
az_ds_from_df=Dataset.Tabular.register_pandas_dataframe(dataframe=df_sub,target=az_store,name='CT_from_df')

Method register_pandas_dataframe: This is an experimental method, and may change at any time.<br/>For more information, see https://aka.ms/azuremlexperimental.


Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/846522fc-a80b-4afa-a2db-80ea70088784/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [20]:
#upload local files to storage account using datastore
#(If we have a spesific folder created on storage account, we can give 'Target_path' parameter in below () )
file_list=['./sample_csv1.csv','./sample_csv2.csv']

az_store.upload_files(files=file_list,relative_root='./',overwrite=True)

Uploading an estimated of 2 files
Uploading ./sample_csv1.csv
Uploaded ./sample_csv1.csv, 1 files out of an estimated total of 2
Uploading ./sample_csv2.csv
Uploaded ./sample_csv2.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_azure_sdk_bob01

In [21]:
#upload a local folder or directory to the storage account
#(If we have a spesific folder created on storage account, we can give 'Target_path' parameter in below () )
az_store.upload(src_dir='./sample_data/',overwrite=True)

Uploading an estimated of 2 files
Uploading ./sample_data\sample_csv1.csv
Uploaded ./sample_data\sample_csv1.csv, 1 files out of an estimated total of 2
Uploading ./sample_data\sample_csv2.csv
Uploaded ./sample_data\sample_csv2.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_azure_sdk_bob01