# Common Tasks
- version check
- Directory operation
- File I/O
- Blob storage operation


## Version check

In [2]:
import os
import sys


In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)

#get the current working directory
print(os.getcwd()) 

#list files in current working directory
# os.listdir(os.curdir)

OS:  linux
Python:  3.6.5 |Anaconda, Inc.| (default, Apr 29 2018, 16:14:56) 
[GCC 7.2.0]
/home/mylogin/notebooks/yanzrepo/codebase/common


## Directory operation

In [None]:
# Create Folder Structure
local_path = os.getcwd()
o16n_path = os.path.join(local_path,'o16n') # folder
model_path = os.path.join(o16n_path,'kerastfmodel') # folder
model_file_name = os.path.join(model_path,'kerastfmodel.h5') # file
score_file_name = os.path.join(model_path, 'score.py') # file


if not os.path.exists(local_path):
    os.makedirs(local_path)
if not os.path.exists(o16n_path):
    os.makedirs(o16n_path)
if not os.path.exists(model_path):
    os.makedirs(model_path)

In [19]:
local_path = os.getcwd()
orig_file_path = os.path.join(local_path, os.pardir, os.pardir, os.pardir,'ViennaDocs/PrivatePreview/notebooks/iris.csv')
print(os.path.abspath(orig_file_path))
dest_file_path = os.path.join(local_path, '../share/iris.csv')
print(os.path.abspath(desti_file_path))

/home/mylogin/notebooks/ViennaDocs/PrivatePreview/notebooks/iris.csv
/home/mylogin/notebooks/yanzrepo/codebase/share/iris.csv


In [20]:
# Copy a file from orig_file_path to dest_file_path
import shutil
shutil.copyfile(orig_file_path, dest_file_path)

'/home/mylogin/notebooks/yanzrepo/codebase/common/../share/iris.csv'

## File I/O

### read a csv file from local directory
Reference
- [Loading A CSV Into pandas](https://chrisalbon.com/python/data_wrangling/pandas_dataframe_importing_csv/)

In [3]:
# designate the source data file location
import os
csv_file_path = os.path.join(os.getcwd(), '../share/iris.csv')
os.path.abspath(csv_file_path)

'/home/mylogin/notebooks/yanzrepo/codebase/share/iris.csv'

In [13]:
# read csv file as pandas dataframe
import pandas as pd
df = pd.read_csv(csv_file_path)
print(type(df))
print(df.shape)

<class 'pandas.core.frame.DataFrame'>
(149, 5)


In [14]:
# check column names and found out the original data does not have header
df.columns

Index(['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'], dtype='object')

In [15]:
# define column names
df.columns = ['Petal Length', 'Petal Width', 'Sepal Length', 'Sepal width', 'Class']

In [19]:
# read csv file again with assigned column names
df = pd.read_csv(csv_file_path, names=df.columns)
print(type(df))
print(df.shape)

<class 'pandas.core.frame.DataFrame'>
(150, 5)


In [20]:
df.head()

Unnamed: 0,Petal Length,Petal Width,Sepal Length,Sepal width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### write a file to local directory

In [9]:
conda_dependencies_file = 'scoreenv.yml'

In [10]:
%%writefile $conda_dependencies_file
name: scoreenv
channels:
  - defaults
dependencies:
  - psutil
  - pip:
    - --index-url https://pypi.python.org/simple
    - --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D
    - azureml-sdk
    - azureml-contrib-daskonbatch
    - dask
    - distributed
    - bokeh
    - joblib
    - scikit-learn
    - numpy
    - cntk


Overwriting scoreenv.yml


In [8]:
%%writefile $score_file_name

import numpy as np
import os
import sys
import keras as K
from io import BytesIO
from PIL import Image, ImageOps
import base64
import json

def init():
    
    global model  

    print("Executing init() method...")
    print("Python version: " + str(sys.version) + ", keras version: " + K.__version__)
    # Load the model 
    model = K.models.load_model('kerastfmodel.h5')
    return


def run(inputString):
    
    responses = []
    base64Dict = json.loads(inputString)

    for k, v in base64Dict.items():
        img_file_name, base64Img = k, v
    decoded_img = base64.b64decode(base64Img)
    img_buffer = BytesIO(decoded_img)
    imageData = Image.open(img_buffer).convert("RGB")

    # Evaluate the model using the input data
    img = ImageOps.fit(imageData, (32, 32), Image.ANTIALIAS)
    img_conv = np.array(img) # shape: (32, 32, 3)
    # Scale pixel intensity
    x_test = img_conv / 255.0
    # Reshape
    x_test = np.moveaxis(x_test, -1, 0)
    x_test = np.expand_dims(x_test, 0)  # shape (1, 3, 32, 32)

    y_pred = model.predict(x_test)
    y_pred = np.argmax(y_pred, axis=-1)
    # print(y_pred)
    LABELS = ["airplane", "automobile", "bird", "cat", "deer", "fog", "frog", "horse", "ship", "truck"]
    resp = {img_file_name: str(LABELS[y_pred[0]])}

    responses.append(resp)
    return json.dumps(responses)
    
  
if __name__ == "__main__":
    init()
    # input data
    img_path = 'automobile8.png'
    encoded = None
    with open(img_path, 'rb') as file:
      encoded = base64.b64encode(file.read())
    img_dict = {img_path: encoded.decode('utf-8')}
    body = json.dumps(img_dict)
    resp = run(body)
    print(resp)

Writing /home/mylogin/notebooks/BatchAI/aml/o16n/kerastfmodel/score.py


## Blob Storage Operation

In [16]:
from azureml.core.datastore import Datastore

datastore_name = "scoring1"

blob_data_store = Datastore.register_azure_blob_container(
    workspace=ws,
    datastore_name=datastore_name,
    account_name=storage_account_name,
    container_name=azure_blob_container_name,
    account_key=storage_account_key,
    overwrite=True)

pprint(blob_data_store.__dict__)   

{'_data_reference': $AZUREML_DATAREFERENCE_scoring1,
 '_num_workers': 32,
 'account_key': 'SG4muzp3+SZKk2kW1PKZ4xVkOiy3CJhBY1erouYnUTeJNKiucJo6kq9RuJtXNNu5AFIL07BZJZ3whvFQPRsqKw==',
 'account_name': 'mywsprodstoragegguewxwq',
 'blob_service': <azure.storage.blob.blockblobservice.BlockBlobService object at 0x0000019ADCAEB4A8>,
 'container_name': 'scoringcontainer',
 'datastore_type': 'AzureBlob',
 'name': 'scoring1',
 'sas_token': None,
 'workspace': <azureml.core.workspace.Workspace object at 0x0000019AD28AA320>}


In [20]:
# List the blobs in the container
print("Scoring results in blob storage:")
generator = blob_service.list_blobs(azure_blob_container_name)
for blob in generator:
    if blob.name.startswith(scores_dir + "/"):
        print(blob.name.replace(scores_dir + "/", "") + " - " + blob.properties.last_modified.ctime() + " (UTC)")

Scoring results in blob storage:
Test-28x28_cntk_text_0.txt_output - Thu Jun 14 06:03:53 2018 (UTC)
Test-28x28_cntk_text_1.txt_output - Thu Jun 14 06:03:53 2018 (UTC)
Test-28x28_cntk_text_2.txt_output - Thu Jun 14 06:03:57 2018 (UTC)
Test-28x28_cntk_text_3.txt_output - Thu Jun 14 06:03:56 2018 (UTC)
