# Simple Classification Model with Microsoft Azure
This classification model runs a dataset of broadcast data to classify whether or not a specific segment is a commercial on television. This will compare running the experiment locally and running it with Microsoft Azure Automated Machine Learning.

## Import Libraries

In [10]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# IMPORT AZURE LIBRARIES
# Azure Notebook Libraries
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig

## Accessing the Azure Workspace
The workspace configuration file is one level above the public repository and not availible to those outside of the workspace.

In [12]:
# Load workspace
from azureml.core import Workspace

ws = Workspace.from_config()

Found the config file in: C:\Users\house\Documents\GitHub\config.json


## Create a New Experiment

In [13]:
# Choose a name for the experiment and specify the project folder.
from azureml.core.experiment import Experiment

experiment_name = 'simple_classification'
project_folder = './sample_projects/simple_classification'

experiment = Experiment(ws, experiment_name)


## Read Local Training Script

In [14]:
with open('./train.py', 'r') as f:
    f.read()

## Manage Dependencies

In [15]:
from azureml.core.runconfig import RunConfiguration

# Editing a run configuration property on-fly.
run_config_user_managed = RunConfiguration()

run_config_user_managed.environment.python.user_managed_dependencies = True

## Run Experiment
Run the local `train.py` script

In [16]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)
run = experiment.submit(src)

run.wait_for_completion(show_output=True)

RunId: simple_classification_1551452710_8adc8451

Streaming azureml-logs/60_control_log.txt

Streaming log file azureml-logs/60_control_log.txt
Running: ['python', 'azureml-setup/run_script.py', 'python', 'azureml-setup/context_manager_injector.py', '-i', 'ProjectPythonPath:context_managers.ProjectPythonPath', '-i', 'OutputCollection:context_managers.RunHistory', 'train.py']
Logging experiment running status in history service.
Streaming log file azureml-logs/80_driver_log.txt

Streaming azureml-logs/80_driver_log.txt


Classification of Commercial Blocks
By: Katie House

Importing Data...
Data imported.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PROBLEM 3: DEFAULT CLASSIFIER ACCURACY

Random Forest Classification
Training Model...
Testing Model...

Neural Networks Classification
Training Model...
Testing Model...

K Nearest Neighbor Classification
Training Model...
Testing Model...

Classifier Output Summary:

---------------------------------------------------------

{'runId': 'simple_classification_1551452710_8adc8451',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2019-03-01T15:05:12.202051Z',
 'endTimeUtc': '2019-03-01T15:05:34.170195Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': '583e6bab-78a9-40b7-8b8c-80434e9a42a4'},
 'runDefinition': {'Script': 'train.py',
  'Arguments': [],
  'SourceDirectoryDataStore': None,
  'Framework': 0,
  'Communicator': 0,
  'Target': 'local',
  'DataReferences': {},
  'JobName': None,
  'AutoPrepareEnvironment': True,
  'MaxRunDurationSeconds': None,
  'NodeCount': 1,
  'Environment': {'Python': {'InterpreterPath': 'python',
    'UserManagedDependencies': True,
    'CondaDependencies': {'name': 'project_environment',
     'dependencies': ['python=3.6.2', {'pip': ['azureml-defaults']}]}},
   'EnvironmentVariables': {'EXAMPLE_ENV_VAR': 'EXAMPLE_VALUE'},
   'Docker': {'BaseImage': 'mcr.microsoft.com/azureml/base:0.2.2',
    'Enabled': False,
    'SharedVolumes': True,
   

# Run Classification with Automated Machine Learning

## Import Data from Local Machine

In [18]:
# Data Upload Functions
def get_data(filepath):
    data = load_svmlight_file(filepath)
    return data[0], data[1]

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# IMPORT DATA
print("\nImporting Data...")

X_train, y_train = get_data("Data/HW2.train.txt")
X_test, y_test = get_data("Data/HW2.test.txt")

X_train = X_train.toarray() # convert sparce matrix to array
X_test = X_test.toarray() 
print("Data imported.")




Importing Data...
Data imported.


## Configure AML for classification

In [24]:
import logging

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             primary_metric = 'AUC_weighted',
                             iteration_timeout_minutes = 60,
                             iterations = 5,
                             n_cross_validations = 3,
                             verbosity = logging.INFO,
                             X = X_train, 
                             y = y_train,
                             path = project_folder)

## Run the AML Experiment Locally

In [25]:
local_run = experiment.submit(automl_config, show_output = True)

local_run

Running on local machine
Parent Run ID: AutoML_b97553e4-85d8-402b-9714-21a8ecc54696
********************************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
SAMPLING %: Percent of the training data to sample.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
********************************************************************************************************************

 ITERATION   PIPELINE                                       SAMPLING %  DURATION      METRIC      BEST
         0   StandardScalerWrapper LightGBM                 100.0000    0:00:24       0.9515    0.9515
         1   StandardScalerWrapper LightGBM                 100.0000    0:00:20       0.9580    0.9580
         2   SparseNormalizer LightGBM                      100

Experiment,Id,Type,Status,Details Page,Docs Page
simple_classification,AutoML_b97553e4-85d8-402b-9714-21a8ecc54696,automl,Completed,Link to Azure Portal,Link to Documentation


## Experiment Results

In [26]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Select Best Classification Model

In [28]:
best_run, fitted_model = local_run.get_output()
print(best_run)
print(fitted_model)

Run(Experiment: simple_classification,
Id: AutoML_b97553e4-85d8-402b-9714-21a8ecc54696_1,
Type: None,
Status: Completed)
Pipeline(memory=None,
     steps=[('StandardScalerWrapper', <automl.client.core.common.model_wrappers.StandardScalerWrapper object at 0x00000156979CBB00>), ('LightGBMClassifier', <automl.client.core.common.model_wrappers.LightGBMClassifier object at 0x000001569792AC88>)])
Y_transformer(['LabelEncoder', LabelEncoder()])


## Test Best Model

In [36]:
y_pred = fitted_model.predict(X_test)
y_pred

array([-1.,  1.,  1., ...,  1.,  1., -1.])

In [60]:
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

print('f1 score: %.2f' % f1_score(y_test, y_pred))

'''
# IN PROGRESS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Plot results with ROC curve
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
n_classes = y_test.shape[0]
print(n_classes)
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

'''

f1 score: 0.89


'\n# IN PROGRESS\n# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n# Plot results with ROC curve\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nn_classes = y_test.shape[0]\nprint(n_classes)\nfor i in range(n_classes):\n    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i])\n    roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc["micro"] = auc(fpr["micro"], tpr["micro"])\n\n'