# Import Required Packages

In [1]:
from sklearn import *
import pandas as pd
import numpy as np
from sklearn.preprocessing import *
import random
from sklearn.model_selection import train_test_split
import collections
import os
import sys
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# Data Reading

In [2]:
# Path to the data directory
data_path = os.getcwd() + '/data/'

# Path to the train and the test data
train_data_file = data_path + 'train_data.txt'
test_data_file = data_path + 'test_data.txt'

# Processing Data

In [3]:
def process_data(data_file):
    """
    Function to process the given data and spliit it in the respective required columns
    Arguments:
        data_file: Provided data in the given format
    """
    label_map = {'CLOSED': 0, 'OPEN': 1, 'STATIONARY': 2}
    
    processed_data = []
    with open(data_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            content = line.split(',')
            ax = content[0].split(" ")[2][1:]
            gz = content[5][:-1]
            ay,az,gx,gy = content[1:5]
            label = str(label_map[content[-1][:-1].strip()])
            train_sample = [ax,ay,az,gx,gy,gz,label]

            processed_data.append(list(map(lambda x: float(x.strip()), train_sample)))
    
    return processed_data

In [4]:
def remove_stationary_values(processed_data):
    """
    Function to remove the Stationary values from the data i.e. not required for the training of the model
    Arguments:
        processed_data: Processed data in the form of 2D array
    """
    data = collections.defaultdict(list)
    start = False
    for observation in processed_data:
        label =  observation[-1] 
        if label != 2.0:
            if not start:
                start = True
                data[label].append([])
            data[label][-1].append(observation[:-1])
        else:
            start = False
            continue
    return data

In [5]:
def remove_unncessary_values(data):
    """
    Function to remove the Unnecessary values and the noise from the data
    Arguments:
        data: data in the form of 2D array
    """
    for key, value in data.items():
        rem_index = []
        for i, observation in enumerate(value):
    #         print(len(observation))
            if len(observation) < 5:
                rem_index.append(i)
        data[key] = delete_multiple_element(data[key], rem_index)

In [6]:
def delete_multiple_element(list_object, indices):
    """
    Function to delete the multiple elements present in the data
    Arguments:
        list_object: list of object
        indices: indices to be deleted
    """
    indices = sorted(indices, reverse=True)
    for idx in indices:
        if idx < len(list_object):
            list_object.pop(idx)
    return list_object

In [7]:
def get_samples_and_labels(data_file):
    """
    Function to create the samples and the labels from the given data file
    Arguments:
        data_file: path to the file
    """
    
    processed_data = process_data(data_file)
    data = remove_stationary_values(processed_data)
    remove_unncessary_values(data)

    samples =  data[1.0] + data[0.0]
    labels = [1]*len(data[1.0]) + [0]*len(data[0.0])
    
    return samples, labels

In [8]:
# Fetching the samples and the labesl for the training data
samples, labels = get_samples_and_labels(train_data_file)

In [9]:
# samples[0][0]

In [10]:
# collections.Counter(labels)

# Feature Extraction

In [11]:
def feature1(window):
    """
    Function to fetch 3 featured from the given window
        Feature 1: Mean of Accelerometer x
        Feature 2: Mean of Accelerometer y
        Feature 3: Mean of Gyroscope z
    Arguments:
        window: window for the feature extraction
    """
    if len(window) == 0:
        return None
    feature = []
    mean_values = np.mean(window, axis=0)
    
    feature.append(mean_values[1])
    feature.append(mean_values[3])
    feature.append(mean_values[5])
    
    return feature

def feature2(window):
    """
    Function to fetch 4 featured from the given window
        Feature 1: Mean of Accelerometer Values
        Feature 2: Mean of Gyroscope Values
        Feature 3: Standard Deviation of Accelerometer Values
        Feature 4: Standard Deviation of Gyroscope Values
    Arguments:
        window: window for the feature extraction
    """
    feature = []
    
    feature.append(np.mean(window[:,0:3]))
    feature.append(np.mean(window[:,3:6]))
    feature.append(np.std(window[:,0:3]))
    feature.append(np.std(window[:3:6]))
    
    return feature

In [12]:
def select_features_from_window(window):
    """
    Function fetch main features from the given window
    Arguments:
        window: window for the feature extraction
    """
    
    f = feature1(window)
#     f = feature2(window)
    return f

In [13]:
def create_feature_vector(sample, splits):
    """
    Function to create the feature vector from the given data and creating the features
    Arguments:
        sample: sample data for the feature extraction
        splits: number of splits in which the data is to be splitted
    """
    
    feature_vector = []
    if len(sample) < splits:
#         feature_vector.append(select_features_from_window(np.array(sample)))
        return feature_vector
    to_split_data = np.array(sample[:int(len(sample)//splits)*splits])
    splitted_samples = np.split(to_split_data,splits)
    for i, window in enumerate(splitted_samples):
        if i == len(splitted_samples)-1 and len(sample[int(len(sample)//splits)*splits:]) > 0: 
            window = np.vstack((window, sample[int(len(sample)//splits)*splits:]))
        features = select_features_from_window(window)
        if features:
            feature_vector.append(features)

    debug = ' '.join([str(len(x)) for x in feature_vector])
#     print(len(feature_vector), " ", debug)
    return feature_vector

In [14]:
def create_data(samples, splits):
    """
    Function to create the data from the given samples into the required splits
    Arguments:
        samples: sample data for the feature extraction
        splits: number of splits in which the data is to be splitted
    """
    data = []
    i = 0
    for sample in samples:
        f_vector = create_feature_vector(sample, splits)
        if f_vector != []:
            data.append(f_vector)
#     print(data)
    data = np.array(data)
#     print(data.shape)
    data = data.reshape(data.shape[0], data.shape[1]*data.shape[2])
#     print(data.shape)
    
    return data

# Model Identification, Training and Testing

## Hyperparameter Tuning

In [15]:
# Function to perform grid search with pca, c, gamma values
def perform_gird_search(x_train, y_train):
    """
    Function to perform grid search with pca, c, gamma values
    Arguments:
        x_train: x values for the data values
        y_train: labels values for the data values
    """
    
    pipeline = Pipeline(
        [('scaler', StandardScaler()),
         ('pca', PCA()),
         ('SVM', SVC(kernel='rbf', random_state=1234))]
    )
    
    check_params= {
        'pca__n_components': [2,4], 
        'SVM__C': [0.05,0.1,0.5, 1],
        'SVM__gamma' : [0.1, 0.5]
    }
    
    grid = GridSearchCV(pipeline, check_params, refit=True, verbose=1, cv=3)
    grid.fit(x_train, y_train)
    best_params = grid.best_params_
    
    return best_params, grid.best_estimator_

In [16]:
random_state_seed = 1234

In [17]:
num_window_to_best_params = {}

for numw in range(3,5):
    print("Tuning data for window size: {}".format(numw))
    
    data = create_data(samples, numw)
    data_shuffled, labels_shuffled = shuffle(data, labels, random_state=random_state_seed)
    bparams, best_model = perform_gird_search(data_shuffled, labels_shuffled)
    num_window_to_best_params[numw] = bparams
    
    print(best_model.score(data, labels))

Tuning data for window size: 3
Fitting 3 folds for each of 16 candidates, totalling 48 fits
1.0
Tuning data for window size: 4
Fitting 3 folds for each of 16 candidates, totalling 48 fits
1.0


In [18]:
for window_size, parameters in num_window_to_best_params.items():
    print("Best parameters for Window Size: {}".format(window_size))
    print(parameters)

Best parameters for Window Size: 3
{'SVM__C': 0.05, 'SVM__gamma': 0.1, 'pca__n_components': 2}
Best parameters for Window Size: 4
{'SVM__C': 0.05, 'SVM__gamma': 0.1, 'pca__n_components': 2}


## Model Training

In [19]:
best_window_size = 3

best_pca__n_components = num_window_to_best_params[3]["pca__n_components"]
best_SVM__C = num_window_to_best_params[3]["SVM__C"]
best_SVM__gamma = num_window_to_best_params[3]["SVM__gamma"]

In [20]:
pipeline = Pipeline(
    [('scaler', StandardScaler()),
     ('pca', PCA(n_components = best_pca__n_components)),
     ('SVM', SVC(kernel='rbf', random_state=random_state_seed,C = best_SVM__C, gamma=best_SVM__gamma,))]
)

data = create_data(samples, best_window_size)
data_shuffled, labels_shuffled = shuffle(data, labels, random_state=random_state_seed)

pipeline.fit(data_shuffled, labels_shuffled)

Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA(n_components=2)),
                ('SVM', SVC(C=0.05, gamma=0.1, random_state=1234))])

In [21]:
pipeline.score(data, labels)

1.0

In [22]:
confusion_matrix(pipeline.predict(data), labels)

array([[122,   0],
       [  0, 122]])

## Model Testing

In [23]:
test_samples, test_labels = get_samples_and_labels(test_data_file)

In [24]:
test_data = create_data(test_samples, best_window_size)

In [25]:
shuffled_test_data, shuffled_test_labels = shuffle(test_data, test_labels, random_state = random_state_seed)
pipeline.score(shuffled_test_data, shuffled_test_labels)

1.0

In [26]:
confusion_matrix(pipeline.predict(shuffled_test_data), shuffled_test_labels)

array([[7, 0],
       [0, 6]])

# IBM Cloud Deployement

In [27]:
from ibm_watson_machine_learning import APIClient
import json

In [28]:
cloud_training_samples, cloud_training_labels = get_samples_and_labels(train_data_file)
cloud_training_data = create_data(samples, best_window_size)


cloud_shuffled_training_data, cloud_shuffled_training_target = shuffle(cloud_training_data, cloud_training_labels, random_state=random_state_seed)



In [29]:
wml_credentials_apikey = "ADD YOUR SPECIFIC API KEY HERE"
wml_credentials_url = "ADD YOUR URL HERE"

In [30]:
wml_credentials = {
    "apikey": wml_credentials_apikey,
    "url": wml_credentials_url,
}

In [31]:
client = APIClient(wml_credentials)
client.spaces.list()

Python 3.7 and 3.8 frameworks are deprecated and will be removed in a future release. Use Python 3.9 framework instead.
Note: 'limit' is not provided. Only first 50 records will be displayed if the number of records exceed 50
------------------------------------  ----  ------------------------
ID                                    NAME  CREATED
fac068c9-1bea-460a-b9cd-987107957422  PROD  2022-04-02T20:15:32.208Z
------------------------------------  ----  ------------------------


In [32]:
space_id = "ADD YOUR SPACE ID HERE"

In [33]:
client.set.default_space(space_id)

'SUCCESS'

In [35]:
model_name = "MODEL NAME"
deployment_name = "DEPLOYMENT NAME"
best_model = pipeline

In [36]:
pipeline

Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA(n_components=2)),
                ('SVM', SVC(C=0.05, gamma=0.1, random_state=1234))])

In [37]:
software_spec_uid = client.software_specifications.get_id_by_name('runtime-22.1-py3.9')
model_props = {
    client.repository.ModelMetaNames.NAME: model_name,
    client.repository.ModelMetaNames.TYPE: 'scikit-learn_1.0',
    client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid
}

model_details = client.repository.store_model(
    model=best_model,
    meta_props=model_props,
    training_data=cloud_shuffled_training_data,
    training_target=cloud_shuffled_training_target,
    pipeline=pipeline
)

Failure during scoring. (POST https://us-south.ml.cloud.ibm.com/ml/v4/deployments/6e6f0ccf-c699-4069-ab74-bf75951bd88f/predictions?version=2021-06-24)
Status code: 404, body: {"trace":"32f2aa977f524176908f40d7ba7e7cd3","errors":[{"code":"deployment_not_found","message":"Deployment with id '6e6f0ccf-c699-4069-ab74-bf75951bd88f' does not exist. Re-try with a valid deployment id."}]}


In [38]:
model_details

{'entity': {'hybrid_pipeline_software_specs': [],
  'label_column': 'l1',
  'software_spec': {'id': '12b83a17-24d8-5082-900f-0ab31fbfd3cb',
   'name': 'runtime-22.1-py3.9'},
  'training_data_references': [{'connection': {'access_key_id': 'not_applicable',
     'endpoint_url': 'not_applicable',
     'secret_access_key': 'not_applicable'},
    'id': '1',
    'location': {},
    'schema': {'fields': [{'name': 'f0', 'type': 'float'},
      {'name': 'f1', 'type': 'float'},
      {'name': 'f2', 'type': 'float'},
      {'name': 'f3', 'type': 'float'},
      {'name': 'f4', 'type': 'float'},
      {'name': 'f5', 'type': 'float'},
      {'name': 'f6', 'type': 'float'},
      {'name': 'f7', 'type': 'float'},
      {'name': 'f8', 'type': 'float'}],
     'id': '1',
     'type': 'ndarray'},
    'type': 's3'}],
  'type': 'scikit-learn_1.0'},
 'metadata': {'created_at': '2022-04-10T23:05:09.220Z',
  'id': 'e54a2552-7daa-44e6-87ce-af43f1c8ae47',
  'modified_at': '2022-04-10T23:05:12.508Z',
  'name': 'I

In [39]:
model_id = client.repository.get_model_id(model_details)
model_id

'e54a2552-7daa-44e6-87ce-af43f1c8ae47'

In [40]:
deployment_props = {
    client.deployments.ConfigurationMetaNames.NAME: deployment_name,
    client.deployments.ConfigurationMetaNames.ONLINE: {}
}

deployment = client.deployments.create(
    artifact_uid=model_id,
    meta_props=deployment_props
)
deployment



#######################################################################################

Synchronous deployment creation for uid: 'e54a2552-7daa-44e6-87ce-af43f1c8ae47' started

#######################################################################################


initializing
Note: online_url is deprecated and will be removed in a future release. Use serving_urls instead.

ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='19192d86-bf90-4ada-8898-a757aa57d433'
------------------------------------------------------------------------------------------------




{'entity': {'asset': {'id': 'e54a2552-7daa-44e6-87ce-af43f1c8ae47'},
  'custom': {},
  'deployed_asset_type': 'model',
  'hardware_spec': {'id': 'e7ed1d6c-2e89-42d7-aed5-863b972c1d2b',
   'name': 'S',
   'num_nodes': 1},
  'name': 'IoT Assignment 4 Model Deployment',
  'online': {},
  'space_id': 'fac068c9-1bea-460a-b9cd-987107957422',
  'status': {'online_url': {'url': 'https://us-south.ml.cloud.ibm.com/ml/v4/deployments/19192d86-bf90-4ada-8898-a757aa57d433/predictions'},
   'serving_urls': ['https://us-south.ml.cloud.ibm.com/ml/v4/deployments/19192d86-bf90-4ada-8898-a757aa57d433/predictions'],
   'state': 'ready'}},
 'metadata': {'created_at': '2022-04-10T23:05:18.946Z',
  'id': '19192d86-bf90-4ada-8898-a757aa57d433',
  'modified_at': '2022-04-10T23:05:18.946Z',
  'name': 'IoT Assignment 4 Model Deployment',
  'owner': 'IBMid-662003ETD1',
  'space_id': 'fac068c9-1bea-460a-b9cd-987107957422'},
    'message': 'online_url is deprecated and will be removed in a future release. Use servin

In [41]:
deployment_uid = client.deployments.get_id(deployment)

In [42]:
print(client.deployments.list())

------------------------------------  ---------------------------------  -----  ------------------------
GUID                                  NAME                               STATE  CREATED
19192d86-bf90-4ada-8898-a757aa57d433  IoT Assignment 4 Model Deployment  ready  2022-04-10T23:05:18.946Z
------------------------------------  ---------------------------------  -----  ------------------------
None


## IBM Cloud Testing

In [43]:
test_samples, test_labels = get_samples_and_labels(test_data_file)
test_data = create_data(test_samples, best_window_size)
shuffled_test_data, shuffled_test_labels = shuffle(test_data, test_labels, random_state = random_state_seed)

In [46]:
def cloud_test(cloud_test_data):
    payload = {
        "input_data": [
            {
                "fields": ["f0","f1","f2","f3","f4","f5","f6","f7","f8",],
                "values": cloud_test_data.reshape((1, cloud_test_data.shape[0])).tolist()
            }
        ]
    }
    # CHANGE THE GUID OFF THE DEPLOYED MODEL IN THE BELOW LINE
    model_GUID = 'GUID HERE'
    result = client.deployments.score(model_GUID, payload)
    print(result)
    prediction_value = result['predictions'][0]['values'][0][0]
    return prediction_value

In [47]:
prdections = []
for cloud_test_data in shuffled_test_data:
    prdections.append(cloud_test(cloud_test_data))

{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[1]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[1]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[1]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[1]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[1]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[0]]}]}
{'predictions': [{'fields': ['prediction'], 'values': [[1]]}]}


In [48]:
accuracy = sum(1 for actl,pred in zip(shuffled_test_labels,prdections) if actl == pred) / float(len(shuffled_test_labels))

print(accuracy)


1.0


# Extras

In [49]:
# shuffled_test_labels

In [50]:
# payload = {
#     "input_data": [
#         {
#             "fields": ["f0","f1","f2","f3","f4","f5","f6","f7","f8",],
#             "values": shuffled_test_data[1].reshape((1, shuffled_test_data[0].shape[0])).tolist()
#         }
#     ]
# }
# result = client.deployments.score('53134487-a7df-4f11-bd35-be9ff68bad73', payload)
# print(result)

In [51]:
# data = create_data(samples, 4)
# x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=False)
# temp = x_train[0]
# temp = temp.reshape((1, temp.shape[0]))
# temp.shape

In [52]:
# payload = {
#     "input_data": [
#         {
#             "fields": ["f0","f1","f2","f3","f4","f5","f6","f7","f8","f9","f10","f11",],
#             "values": x_train[0].reshape((1, x_train[0].shape[0])).tolist()
#         }
#     ]
# }
# result = client.deployments.score('7fd7e2f8-78f1-485d-8439-36f00fbaee81', payload)
# print(result)

In [53]:
# client.deployments.

In [54]:
# shuffled_test_labels

In [55]:
# print(result['predictions'][0]['values'][0][0])