In [None]:
!rm -rf *
!git clone "https://github.com/hmda77/Ensemble-Indoor-Loc"
!cp /content/Ensemble-Indoor-Loc/JUIndoorLoc/JUIndoorLoc-Test-data.csv /content/
!cp /content/Ensemble-Indoor-Loc/JUIndoorLoc/JUIndoorLoc-Training-data.csv /content/
!rm -rf /content/Ensemble-Indoor-Loc/

# Import Packages



In [78]:
import numpy as np
import pandas as pd
import re

import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix, precision_score, roc_curve, roc_auc_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder , normalize
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate


import time
from tqdm import tqdm

from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier

import seaborn as sns

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score, cross_val_predict

from matplotlib import pyplot

import warnings
warnings.filterwarnings('ignore')

# Metric Functions

In [3]:

# Make the confusion matrix
def confusion_plot(y_test,y_pred,title):
    cmt = confusion_matrix(y_test,y_pred)
    plt.rcParams['figure.figsize'] = (10,8)
    sns.heatmap(cmt,fmt='',annot=True,linewidth=0.01,cmap=sns.cubehelix_palette(as_cmap=True))
    plt.title("confusion matrix {}".format(title))
    plt.xlabel("predicted")
    plt.ylabel("true")
    plt.show()


# generate classification report
def generate_classification_report(y_test,y_pred):
    report=pd.DataFrame.from_dict(classification_report(y_pred,y_test,output_dict=True)).T
    # report['Label']=[data_classes[i]  if i<5 else " " for i,x in enumerate(report.index)]
    report=report[['f1-score','precision','recall','support']]
    pd.set_option('display.max_rows', report.shape[0]+1)
    return report

# ROC Curve
def plot_roc_curve(y_test,proba,pos_label):
    fpr, tpr, thresholds = roc_curve(y_test, proba[:,pos_label], pos_label= pos_label)
    roc_auc = roc_auc_score(y_test, proba, multi_class='ovr')

    plt.rcParams['figure.figsize'] = (5,4)
    plt.plot(fpr, tpr,'k:',lw=5, label='average ROC curve (area = {0:0.2f})'.format(roc_auc))
    plt.plot([0 ,1],[0 ,1],'r')
    plt.legend(loc="lower right")
    # plt.title('ROC Curve for calss {0}'.format(data_classes[pos_label]))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()


# ROC Curve for all classes
def plot_roc_curve_all(y_test,proba,n_class,title):
    for i in range(n_class):
      fpr, tpr, thresholds = roc_curve(y_test, proba[:,i],pos_label= i)
      plt.plot(fpr, tpr,lw=1)
    plt.plot([0 ,1],[0 ,1],'r')
    plt.title('ROC Curve for all classes in {}'.format(title))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    # plt.legend(data_classes)
    plt.show()

# Function to extract floor number, X coordinate, and Y coordinate
def extract_info(y):
    parts = y.split('-')
    floor = int(parts[0][1:])  # Extract floor number
    x_coord = float(parts[1])  # Extract X coordinate
    y_coord = float(parts[2])  # Extract Y coordinate
    return floor, x_coord, y_coord

# Calculate average error for each floor
def calculate_avg_error(y_test, y_pred, verbose = 1):
    errors = {}
    counts = {}
    for y_t, y_p in zip(y_test, y_pred):
        floor_t, x_t, y_t = extract_info(y_t)
        floor_p, x_p, y_p = extract_info(y_p)
        error = np.sqrt((x_t - x_p)**2 + (y_t - y_p)**2)
        errors.setdefault(floor_t, []).append(error)
        counts.setdefault(floor_t, 0)
        counts[floor_t] += 1

    avg_errors = {floor: sum(errors[floor]) / counts[floor] for floor in errors}

    if verbose == 1:
        print("Average errors for each floor:")
        for floor, error in avg_errors.items():
            print(f"Floor {floor}: {error}")
        print(f"Average: {sum(avg_errors.values())/3}")
    return sum(avg_errors.values())/3

# Calculate average error for each floor
def calculate_mse(y_test, y_pred, verbose = 1):
    errors = {}
    counts = {}
    for y_t, y_p in zip(y_test, y_pred):
        floor_t, x_t, y_t = extract_info(y_t)
        floor_p, x_p, y_p = extract_info(y_p)
        error = (x_t - x_p)**2 + (y_t - y_p)**2
        errors.setdefault(floor_t, []).append(error)
        counts.setdefault(floor_t, 0)
        counts[floor_t] += 1

    mse_errors = {floor: sum(errors[floor]) / counts[floor] for floor in errors}
    if verbose == 1:
        print("\nMSE for each floor:")
        for floor, error in mse_errors.items():
            print(f"Floor {floor}: {error}")
        print(f"Average: {sum(mse_errors.values())/3}")
    return sum(mse_errors.values())/3

# Calculate average error for each floor
def calculate_rmse(y_test, y_pred, verbose = 1):
    errors = {}
    counts = {}
    for y_t, y_p in zip(y_test, y_pred):
        floor_t, x_t, y_t = extract_info(y_t)
        floor_p, x_p, y_p = extract_info(y_p)
        error = (x_t - x_p)**2 + (y_t - y_p)**2
        errors.setdefault(floor_t, []).append(error)
        counts.setdefault(floor_t, 0)
        counts[floor_t] += 1

    rmse_errors = {floor: np.sqrt(sum(errors[floor]) / counts[floor]) for floor in errors}
    if verbose == 1:
        print("\nRMSE for each floor:")
        for floor, error in rmse_errors.items():
            print(f"Floor {floor}: {error}")
        print(f"Average: {sum(rmse_errors.values())/3}")
    return sum(rmse_errors.values())/3

# Dateset Proccesses

In [4]:
tr_path = "C://Users/Hamid/content/JUIndoorLoc-Training-data.csv"
ts_path = "C://Users/Hamid/content/JUIndoorLoc-Test-data.csv"

In [5]:
data_train = pd.read_csv(tr_path)
data_train.head(5)
data_test = pd.read_csv(ts_path)
data_test.head(5)

Unnamed: 0,Cid,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,...,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did,Ts
0,L4-33-13,-77,-58,-66,-64,-92,-66,-66,-93,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813137748
1,L4-33-13,-90,-58,-78,-56,-92,-74,-74,-87,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813179138
2,L4-33-13,-80,-64,-78,-56,-92,-74,-74,-87,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489812948443
3,L4-33-13,-72,-60,-74,-58,-93,-75,-76,-95,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489812959103
4,L4-33-13,-82,-56,-74,-56,-93,-71,-76,-89,-110,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813079167


In [6]:
frames = [data_train, data_test]
df = pd.concat(frames)
df['Did'] = df['Did'].astype(str).str[1]
df['Did'] = pd.to_numeric(df['Did'])
df.head(5)

Unnamed: 0,Cid,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,...,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did,Ts
0,L4-40-1,-84,-80,-71,-58,-110,-72,-71,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,4,1469870570949
1,L4-40-1,-84,-79,-71,-58,-110,-72,-71,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,4,1470047205646
2,L4-40-1,-110,-110,-70,-56,-110,-69,-68,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,4,1469870932338
3,L4-40-1,-110,-110,-70,-53,-110,-69,-68,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,4,1470047629440
4,L4-37-2,-84,-82,-75,-65,-110,-73,-75,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,4,1469876622694


In [7]:
data_combined = df.drop('Cid', axis=1).reset_index(drop=True)
data_combined = data_combined.drop('Ts', axis=1)
data_combined.head(20)

Unnamed: 0,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,AP010,...,AP166,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did
0,-84,-80,-71,-58,-110,-72,-71,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
1,-84,-79,-71,-58,-110,-72,-71,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
2,-110,-110,-70,-56,-110,-69,-68,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
3,-110,-110,-70,-53,-110,-69,-68,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
4,-84,-82,-75,-65,-110,-73,-75,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
5,-84,-81,-75,-65,-110,-73,-75,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
6,-84,-80,-75,-65,-110,-73,-75,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
7,-85,-80,-75,-65,-110,-73,-75,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
8,-84,-80,-75,-65,-110,-73,-75,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4
9,-84,-80,-75,-65,-110,-73,-75,-110,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,0,1,4


In [67]:
#make X_train from Tr
X = normalize(data_combined)
y = df.Cid

le = LabelEncoder()
le.fit(y)
le.transform(y)

X_train, Xt, y_train, yt = train_test_split(data_combined, y, test_size=0.2, shuffle=True, random_state=41)

y_train_encode =  le.transform(y_train)

y_test_encode = le.transform(yt)

X_test = Xt

# Random Forest, AdaBoost, XGBoost

## Random Forest

In [37]:
n_learners = 25
max_depth = 38

In [38]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

rf_model = RandomForestClassifier(
    n_estimators=n_learners,
    max_depth=max_depth,
    random_state=42,
    min_samples_split=2,
    n_jobs=-1  # Utilize all available CPU cores
)
rf_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 1.7070972919464111


In [39]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = rf_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.6109833717346191


In [41]:
f1score = f1_score(yt, yp, average='weighted')
f1score

0.9516409808968941

In [42]:
generate_classification_report(yt, yp)

Unnamed: 0,f1-score,precision,recall,support
L3-1-10,0.0,0.0,0.0,1.0
L3-1-13,0.666667,0.5,1.0,1.0
L3-1-14,0.8,1.0,0.666667,3.0
L3-1-15,0.0,0.0,0.0,2.0
L3-1-16,0.4,0.333333,0.5,2.0
L3-1-17,0.4,0.25,1.0,1.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.0,0.0,0.0,1.0


In [43]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 95.1640980896894
Recall: 95.22964715158683
Precision: 96.51729143450018
Accuracy: 95.22964715158683


In [44]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.08078689172275491
Floor 5: 0.1358802092273337
Floor 3: 0.6174234742603885
Average: 0.2780301917368257

MSE for each floor:
Floor 4: 0.2966494845360825
Floor 5: 2.3107019562715765
Floor 3: 3.9135802469135803
Average: 2.17364389590708

RMSE for each floor:
Floor 4: 0.5446553814441591
Floor 5: 1.520099324475732
Floor 3: 1.9782770905294285
Average: 1.3476772654831066


1.3476772654831066

### Cross-Validation

In [79]:
# Initialize RandomForestClassifier
rf_model = RandomForestClassifier(
    n_estimators=n_learners,
    max_depth=max_depth,
    random_state=42,
    min_samples_split=2,
    n_jobs=-1  # Utilize all available CPU cores
)

# Perform cross-validation
scoring = ['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted']
cv_results = cross_validate(rf_model, X_train, y_train_encode, scoring=scoring, cv=5, n_jobs=-1)

# Extract and print results
train_time_mean = np.mean(cv_results['fit_time'])
test_accuracy_mean = np.mean(cv_results['test_accuracy'])
test_precision_mean = np.mean(cv_results['test_precision_weighted'])
test_recall_mean = np.mean(cv_results['test_recall_weighted'])
test_f1_mean = np.mean(cv_results['test_f1_weighted'])

print(f"Train time: {train_time_mean}")
print(f"Test accuracy: {test_accuracy_mean}")
print(f"Test precision: {test_precision_mean}")
print(f"Test recall: {test_recall_mean}")
print(f"Test F1-score: {test_f1_mean}")

Train time: 0.4980464458465576
Test accuracy: 0.1257695662220299
Test precision: 0.11478447747112348
Test recall: 0.1257695662220299
Test F1-score: 0.09455617521616369


## AdaBoost

In [58]:
n_learners = 9
max_depth = 49

In [59]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

# Create RandomForest model with specified hyperparameters
base_estimator = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
ada_model = AdaBoostClassifier(
    n_estimators=n_learners,
    estimator=base_estimator,
    algorithm='SAMME',
    random_state=42,
)
ada_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 21.514049291610718


In [60]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = ada_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.49463701248168945


In [61]:
f1score = f1_score(yt, yp, average='weighted')
f1score

0.9543588090412215

In [62]:
generate_classification_report(yt, yp)

Unnamed: 0,f1-score,precision,recall,support
L3-1-13,0.0,0.0,0.0,0.0
L3-1-14,0.8,1.0,0.666667,3.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,1.0,1.0,1.0,4.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.333333,0.333333,0.333333,3.0
L3-10-15,1.0,1.0,1.0,3.0
L3-10-16,0.8,1.0,0.666667,3.0


In [63]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 95.43588090412214
Recall: 95.4661935738222
Precision: 96.82795302251247
Accuracy: 95.4661935738222


In [64]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.12468815873924538
Floor 5: 0.2246315571876739
Floor 3: 0.5392649389761348
Average: 0.2961948849676847

MSE for each floor:
Floor 4: 0.8309278350515464
Floor 5: 4.631760644418872
Floor 3: 2.521604938271605
Average: 2.661431139247341

RMSE for each floor:
Floor 4: 0.9115524313233696
Floor 5: 2.152152560674747
Floor 3: 1.5879562142173835
Average: 1.5505537354051668


1.5505537354051668

## XGBoost

In [68]:
n_learners = 29
max_depth = 4

In [69]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

# Create RandomForest model with specified hyperparameters
xgb_model = XGBClassifier(
    # device = "cuda",
    n_estimators=n_learners,
    max_depth=max_depth,
    random_state=42,
    n_jobs=-1  # Utilize all available CPU cores
)
xgb_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 91.61268472671509


In [70]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = xgb_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.48828554153442383


In [71]:
f1score = f1_score(yt, yp, average='weighted')
f1score

0.9520470667794674

In [72]:
generate_classification_report(yt, yp)

Unnamed: 0,f1-score,precision,recall,support
L3-1-10,1.0,1.0,1.0,4.0
L3-1-11,0.666667,0.666667,0.666667,3.0
L3-1-12,0.666667,0.5,1.0,1.0
L3-1-13,0.8,1.0,0.666667,3.0
L3-1-14,0.888889,1.0,0.8,5.0
L3-1-15,0.0,0.0,0.0,0.0
L3-1-16,0.5,1.0,0.333333,3.0
L3-1-17,0.666667,0.5,1.0,1.0
L3-10-11,0.8,1.0,0.666667,3.0
L3-10-13,0.857143,1.0,0.75,4.0


In [75]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 95.20470667794673
Recall: 95.38734476641041
Precision: 96.25999868016426
Accuracy: 95.38734476641041


In [76]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 5: 0.20343841265243048
Floor 4: 0.08945088979357725
Floor 3: 0.7353962885572568
Average: 0.3427618636677548

MSE for each floor:
Floor 5: 1.1042382588774342
Floor 4: 0.6548717948717949
Floor 3: 4.8133333333333335
Average: 2.190814462360854

RMSE for each floor:
Floor 5: 1.050827416314132
Floor 4: 0.8092414935430554
Floor 3: 2.193931022920578
Average: 1.3513333109259218


1.3513333109259218