# **Importing All Required Libraries**

In [25]:
import pandas as pd
import sklearn
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_log_error
from lightgbm import LGBMClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import f1_score
import texttable as tt
from collections import Counter
from sklearn import svm
from highcharts import highcharts

## Below dictionary for replacing integer classes with string names

In [26]:
features_dic = {1: 'WALKING',           
2: 'WALKING_UPSTAIRS',  
3: 'WALKING_DOWNSTAIRS',
4: 'SITTING',           
5: 'STANDING',          
6: 'LAYING',            
7: 'STAND_TO_SIT',      
8: 'SIT_TO_STAND',      
9: 'SIT_TO_LIE',        
10: 'LIE_TO_SIT',        
11: 'STAND_TO_LIE',      
12: 'LIE_TO_STAND'      
}

## Loading all the required data-sets

In [27]:
train_subject = pd.read_csv(r'C:\Users\HassaanSeeker\Desktop\data\Train\subject_id_train.txt',
                            sep = ' ', header = None)
test_subject = pd.read_csv(r'C:\Users\HassaanSeeker\Desktop\data\Test\subject_id_test.txt',
                           sep = ' ', header = None)
train_data = pd.read_csv(r'C:\Users\HassaanSeeker\Desktop\data\Train\X_train.txt', 
                         sep = ' ', header = None)
train_target = pd.read_csv(r'C:\Users\HassaanSeeker\Desktop\data\Train\y_train.txt',
                           sep = ' ', header = None)
test_data = pd.read_csv(r'C:\Users\HassaanSeeker\Desktop\data\Test\X_test.txt',
                        sep = ' ', header = None)
test_target = pd.read_csv(r'C:\Users\HassaanSeeker\Desktop\data\Test\y_test.txt',
                          sep = ' ', header = None)

## Merging both training and test data-sets

In [28]:
total_data = pd.concat([train_data, test_data], axis = 0).reset_index(
                                    drop = True)
total_target = pd.concat([train_target, test_target], axis = 0).reset_index(
                                    drop = True)

## Getting count for all the activities

In [29]:
features_count = Counter(total_target.replace(features_dic)[0])

## List creation for bar-chart graph

In [30]:
features_count_name = []
features_count_count = []

for key, values in features_count.items():
    features_count_name.append(key)
    features_count_count.append(values)

In [31]:
from highcharts import Highchart
H = Highchart(width=750, height=600)

data1 = features_count_count

options = {
	'title': {
        'text': 'Stacked bar chart'
    },
    'subtitle': {
        'text': ''
    },
    'xAxis': {
        'categories': features_count_name,
        'title': {
            'text': None
        }
    },
    'yAxis': {
        'min': 0,
        'title': {
            'text': 'Count',
            'align': 'high'
        },
        'labels': {
            'overflow': 'justify'
        }
    },
    'tooltip': {
        'valueSuffix': ''
    },
    'legend': {
        'layout': 'vertical',
        'align': 'right',
        'verticalAlign': 'top',
        'x': -40,
        'y': 80,
        'floating': True,
        'borderWidth': 1,
        'backgroundColor': "((Highcharts.theme && Highcharts.theme.legendBackgroundColor) || '#FFFFFF')",
        'shadow': True
    },
    'credits': {
        'enabled': False
    },
    'plotOptions': {
        'bar': {
            'dataLabels': {
                'enabled': True
            }
        }
    }
}

H.set_dict_options(options)

H.add_data_set(data1, 'bar', 'Count for different activities')

H

## Above Figure showing standing and laying as the most activities done my the subjects 

## Function for checking null values

In [None]:
def check_for_null_values(data):
  return data.loc[data.isnull().any(axis = 1)]

## Standardinzing All Data-Points
 Standardization of a dataset is a common requirement for many machine learning estimators: they might behave badly if the individual features do not more or less look like standard normally distributed data (e.g. Gaussian with 0 mean and unit variance).

In [None]:
scl = StandardScaler()
total_data = scl.fit_transform(total_data)

## Principal Component Analysis

Linear dimensionality reduction using Singular Value Decomposition of the data to project it to a lower dimensional space.
Having too many dimensions (features) in your data causes noise and difficulties (it can be sound, picture or context). This specifically get worst when features have different scales (e.g. weight,length,area,speed, power, temperature,volume,time,cell number, etc. )
We do this by reducing the dimension i.e. the features.

In [None]:
pca = PCA(n_components = 0.9, random_state = 3)
total_data = pca.fit_transform(total_data)

## Split the Data-Set into training and test

In [None]:
train_X, test_X, train_Y, test_Y = train_test_split(total_data,
                                     total_target, random_state = 3)

## Function for model_selection for different classifcation models
### Models Used:
    Multi-LogisticRegression
    LightGBM
    SupportVectorMachine(LinearSVC)
    SupportVectorMachine(SVC => onevsone) 

In [None]:
def model_selection(train_X, test_X, train_Y, test_Y):
  
  models_accuracy_score = {}
  # Multinomial Logistic Regression
  softmax_reg = LogisticRegression(multi_class = "multinomial", solver="lbfgs", C = 1)
  softmax_reg.fit(train_X, train_Y)

  # score_softmax = accuracy_score(y_true = test_Y, y_pred = softmax_reg.predict(test_X))
  
  accuracy_metrics(test_Y, softmax_reg.predict(test_X), 'Multi-LogisticRegression')

  
  # LightGBMBOOSTING Algorithm
  
  lgbm = LGBMClassifier(n_estimators = 500, random_state = 3)
  lgbm = lgbm.fit(train_X, train_Y)

  accuracy_metrics(test_Y, lgbm.predict(test_X), 'lgbm')
    
  lin_clf = svm.LinearSVC()
  lin_clf.fit(train_X, train_Y)
    
  accuracy_metrics(test_Y, lin_clf.predict(test_X), 'LinearSVC')
  
  clf = svm.SVC(kernel = 'rbf', gamma = 'scale', decision_function_shape='ovo', C = 1)
  clf.fit(train_X, train_Y)
  accuracy_metrics(test_Y, clf.predict(test_X), 'SVC-OVO')
  

  

## Function for accessing the accuracy of classification models
 ### Metrics Selected:
 ####      sklearn.accuracy_score: 
 The accuracy_score function computes the accuracy, either the fraction (default) or the count (normalize=False) of correct predictions.
 In multilabel classification, the function returns the subset accuracy. If the entire set of predicted labels for a sample strictly match with the true set of   labels, then the subset accuracy is 1.0; otherwise it is 0.0. https://scikit-learn.org/stable/modules/model_evaluation.html#accuracy-score
 ####      Precision 
 ####      Recall
 ####      F1-Score
https://en.wikipedia.org/wiki/Precision_and_recall#Definition_%28classification_context%29


In [None]:
def accuracy_metrics(Y_true, Y_pred, model):
    tab = tt.Texttable()
    Accuracy_metrics_Score = {}
    
    Accuracy_Score = accuracy_score(y_true = Y_true, y_pred = Y_pred)
    
    F1_Score_each_class = f1_score(Y_true, Y_pred, average = None)
    F1_Score_all_class = f1_score(Y_true, Y_pred, average = 'micro')
    
    # precisions, recalls, thresholds = precision_recall_curve(Y_true, Y_pred, average = 'micro')
    # plot_precision_recall_vs_threshold(precision, recalls, thresholds)
    
    precision, recall, fscore, support = score(Y_true, Y_pred)
    
    # Accuracy_metrics_Score.update({model: Accuracy_Score})
    # Accuracy_metrics_Score.update({model: F1_Score})

    print(model,"accuracy_score : ", Accuracy_Score)
    print(model, "F1_Score for all classes: ", F1_Score_all_class)
    
    
    
    print(precision)
    
    headings = ['Precision', 'Recall', 'Fscore', 'Support']
    tab.header(headings)
    for row in zip(precision, recall, fscore, support):
        tab.add_row(row)
    s = tab.draw()
    print( s )
    

In [None]:
model_selection(train_X, test_X, train_Y, test_Y)

###### From the above results Support vector machine with support vector classification implementation in sklearn is showing the best accuracy of all other models.

## Function for parameter tuning using grid-search
    As support-vector-classification is showing the best accuracy doing parameter tuning for it in below function

In [None]:
def grid_search_svm(train_X, train_Y, test_X, test_Y):
    accuracy_dic = {}
    parameters = []
    c = [0.1, 1, 10, 100]
    kernel = ['poly', 'rbf']
    degree = [0, 1, 2, 3, 4, 5, 6]
    for i in range(0, len(c)):
        
        for j in range(0, len(kernel)):
            
            for k in range(0, len(degree)):
                
                clf = svm.SVC(kernel = kernel[j], gamma = 'scale', 
                              decision_function_shape='ovo', C = c[i], degree = degree[k])
                
                parameters.append(c[i])
                parameters.append(kernel[j])
                parameters.append(degree[k])
                
                clf.fit(train_X, train_Y)
                accuracy_metrics(test_Y, clf.predict(test_X), 'SVC-OVO')
                Accuracy_Score = accuracy_score(y_true = test_Y, y_pred = clf.predict(test_X))
                accuracy_dic.update({Accuracy_Score: parameters})
                
                parameters = []

    return accuracy_dic

In [None]:
x = grid_search_svm(train_X, train_Y, test_X, test_Y)

## Below implementation for best parameters
a = []
for key, values in x.items():
    if key > 0.95:
        print(key, values)

## Final Model Selection
##### Below after grid search best paramaters for svm.svc

In [32]:
clf = svm.SVC(kernel = 'rbf', gamma = 'scale', decision_function_shape='ovo',
              C = 10, degree = 6)
clf.fit(train_X, train_Y)
accuracy_metrics(test_Y, clf.predict(test_X), 'SVC-OVO')


  y = column_or_1d(y, warn=True)


SVC-OVO accuracy_score :  0.9542627149652396
SVC-OVO F1_Score for all classes:  0.9542627149652396
[0.99537037 0.98950131 0.90740741 0.95465394 0.93787575 0.99606299
 0.89473684 1.         0.75       0.68181818 0.62857143 0.61538462]
+-----------+--------+--------+---------+
| Precision | Recall | Fscore | Support |
| 0.995     | 0.991  | 0.993  | 434     |
+-----------+--------+--------+---------+
| 0.990     | 0.977  | 0.983  | 386     |
+-----------+--------+--------+---------+
| 0.907     | 0.994  | 0.949  | 345     |
+-----------+--------+--------+---------+
| 0.955     | 0.922  | 0.938  | 434     |
+-----------+--------+--------+---------+
| 0.938     | 0.959  | 0.948  | 488     |
+-----------+--------+--------+---------+
| 0.996     | 0.998  | 0.997  | 507     |
+-----------+--------+--------+---------+
| 0.895     | 0.739  | 0.810  | 23      |
+-----------+--------+--------+---------+
| 1         | 0.778  | 0.875  | 9       |
+-----------+--------+--------+---------+
| 0.750   

## Final Words and Improvements
There is a lot of room for further improvement specifically if we have more data some deep-leerning classifcation algorithms can also be implemented. But usually deep-learning algorithms like LSTMS, RNN require more than 30,000 data points. Since simple classification algorithms are giving resonaby
good accuracy no need to investigate for deep-lerning option