In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from time import time 
from sklearn.metrics import f1_score
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
import seaborn as sns
%matplotlib inline

In [None]:
# Load training data as dataframe
df = pd.read_csv('final_train.csv')

display(df)

In [None]:
# Correct the order of values and label
del df["Unnamed: 0"]

# Calculate correlation matrix between columns
cor_matrix = df.corr().abs()

# Drop highest correlated columns
upper_tri = cor_matrix.where(np.triu(np.ones(cor_matrix.shape),k=1).astype(np.bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.8)]
df1 = df.drop(df[to_drop], axis=1)
df_analysis = df1

In [None]:
display(df_analysis)

In [None]:
# Divide dataframe into values and labels sets
Y_labels = df_analysis['Activity'] 
X_values = df_analysis[['fBodyAccJerk-bandsEnergy()-57,64.1', 'fBodyAcc-meanFreq()-Z', 
                        'angle(X,gravityMean)', 'fBodyAcc-bandsEnergy()-1,16', 'fBodyGyro-min()-X', 
                        'tBodyAccMag-arCoeff()1', 'fBodyGyro-maxInds-X', 'fBodyAcc-maxInds-X', 
                        'fBodyAcc-bandsEnergy()-17,24', 'fBodyAccMag-maxInds', 'angle(Y,gravityMean)', 
                        'fBodyAccJerk-meanFreq()-Z', 'fBodyAcc-bandsEnergy()-9,16', 'tBodyGyroMag-entropy()', 
                        'fBodyAcc-bandsEnergy()-1,16.2', 'fBodyAcc-min()-Y', 'fBodyGyro-maxInds-Z', 
                        'tBodyGyro-entropy()-X', 'fBodyBodyGyroJerkMag-maxInds', 'tBodyAcc-correlation()-X,Y', 
                        'tBodyGyro-correlation()-Y,Z', 'fBodyAcc-maxInds-Z', 'fBodyAccJerk-min()-X', 
                        'tGravityAcc-arCoeff()-X,1', 'tGravityAcc-arCoeff()-Z,1', 'fBodyAcc-bandsEnergy()-1,16.1']]

display(X_values)

In [None]:
# Standardising the data
# Center to the mean and component wise scale to unit variance
sc = StandardScaler()
X_values = sc.fit_transform(X_values)

In [None]:
# Dividing data into training and test set taking 80% of prior data as a training set,
# so that our models are tested based on matches that took place after matches from our training set
X_train, X_test= np.split(X_values, [int(.8 *len(X_values))])
Y_train, Y_test= np.split(Y_labels, [int(.8 *len(Y_labels))])

In [None]:
# Printing shape of our data sets
print('Training Features Shape:', X_train.shape)
print('Training Labels Shape:', Y_train.shape)
print('Testing Features Shape:', X_test.shape)
print('Testing Labels Shape:', Y_test.shape)

In [None]:
# Function for training the classifier and measuring training time
def train(clf, X_train, Y_train):
    
    # Starting the clock, training the classifier, then stoping the clock
    start = time()
    clf.fit(X_train, Y_train)
    end = time()
    
    # Printing time results
    print ("Model trained in: ",end - start)

In [None]:
# Function for predictions from classifier
def predict(clf, features, target):
    
    # Starting the clock, making predictions, then stoping the clock
    start = time()
    y_pred = clf.predict(features)
    end = time()
    
    # Printing time results
    print ("Prediction made in: ",end - start)
    
    # Returning F1 score, accuracy and predicted labels
    return f1_score(target, y_pred, average=None), sum(target == y_pred) / float(len(y_pred)), y_pred

In [None]:
# Function for evaluation of the classifier
def fit(clf, X_train, y_train, X_test, y_test):
    
    # Indicating the model and the training set size
    print ("Training a {} using a training set size of {}. . .".format(clf.__class__.__name__, len(X_train)))
    
    # Training the model
    train(clf, X_train, y_train)
    
    # Printing the results of prediction for both train set and test set
    f1, acc, predictions = predict(clf, X_train, y_train)
    print ("===========================================")
    print ("Indexes assesing methods for training set:")
    print ("F1 score [LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  WALKING_UPSTAIRS]:", f1)
    print ("Accuracy:", acc)
    
    print ("===========================================")
    f1, acc, predictions = predict(clf, X_test, y_test)
    print ("Indexes assesing methods for test set:")
    print ("F1 score [LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  WALKING_UPSTAIRS]:", f1)
    print ("Accuracy:", acc)

In [None]:
# Function for inference 
def inference(clf_XGB, inf):
    start = time()
    y_pred = clf_XGB.predict(inf)
    end = time()

    # Printing results
    print ("Inference made in: ",end - start)

In [None]:
# Creating our model and training it
clf_XGB = xgb.XGBClassifier()

fit(clf_XGB, X_train, Y_train, X_test, Y_test)
print ('')

In [None]:
# Inference time
pred_try = np.array([X_train[0]])
inference(clf_XGB, pred_try)

In [None]:
# Saving trained model
clf_XGB.save_model('XGB_classifier.model')

In [None]:
# Loading trained model
clf2 = xgb.XGBClassifier()
clf2.load_model('XGB_classifier.model')

In [None]:
# Printing accuracy of our classifier
f1f, accf, predicted = predict(clf2, X_values, Y_labels)
print("Accuracy after tuning: ", accf)

In [None]:
np.mean(predicted == df.Activity)