In [None]:
!rm -rf *
!git clone "https://github.com/hmda77/Ensemble-Indoor-Loc"
!cp /content/Ensemble-Indoor-Loc/JUIndoorLoc/JUIndoorLoc-Test-data.csv /content/
!cp /content/Ensemble-Indoor-Loc/JUIndoorLoc/JUIndoorLoc-Training-data.csv /content/
!rm -rf /content/Ensemble-Indoor-Loc/

# Import Packages



In [160]:
import numpy as np
import pandas as pd
import re

import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix, precision_score, roc_curve, roc_auc_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder , normalize

import time

import seaborn as sns

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score, cross_val_predict

from matplotlib import pyplot

# Metric Functions

In [161]:

# Make the confusion matrix
def confusion_plot(y_test,y_pred,title):
    cmt = confusion_matrix(y_test,y_pred)
    plt.rcParams['figure.figsize'] = (10,8)
    sns.heatmap(cmt,fmt='',annot=True,linewidth=0.01,cmap=sns.cubehelix_palette(as_cmap=True))
    plt.title("confusion matrix {}".format(title))
    plt.xlabel("predicted")
    plt.ylabel("true")
    plt.show()


# generate classification report
def generate_classification_report(y_test,y_pred):
    report=pd.DataFrame.from_dict(classification_report(y_pred,y_test,output_dict=True)).T
    # report['Label']=[data_classes[i]  if i<5 else " " for i,x in enumerate(report.index)]
    report=report[['f1-score','precision','recall','support']]
    pd.set_option('display.max_rows', report.shape[0]+1)
    return report

# ROC Curve
def plot_roc_curve(y_test,proba,pos_label):
    fpr, tpr, thresholds = roc_curve(y_test, proba[:,pos_label], pos_label= pos_label)
    roc_auc = roc_auc_score(y_test, proba, multi_class='ovr')

    plt.rcParams['figure.figsize'] = (5,4)
    plt.plot(fpr, tpr,'k:',lw=5, label='average ROC curve (area = {0:0.2f})'.format(roc_auc))
    plt.plot([0 ,1],[0 ,1],'r')
    plt.legend(loc="lower right")
    # plt.title('ROC Curve for calss {0}'.format(data_classes[pos_label]))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()


# ROC Curve for all classes
def plot_roc_curve_all(y_test,proba,n_class,title):
    for i in range(n_class):
      fpr, tpr, thresholds = roc_curve(y_test, proba[:,i],pos_label= i)
      plt.plot(fpr, tpr,lw=1)
    plt.plot([0 ,1],[0 ,1],'r')
    plt.title('ROC Curve for all classes in {}'.format(title))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    # plt.legend(data_classes)
    plt.show()

# Function to extract floor number, X coordinate, and Y coordinate
def extract_info(y):
    parts = y.split('-')
    floor = int(parts[0][1:])  # Extract floor number
    x_coord = float(parts[1])  # Extract X coordinate
    y_coord = float(parts[2])  # Extract Y coordinate
    return floor, x_coord, y_coord

# Calculate average error for each floor
def calculate_avg_error(y_test, y_pred):
    errors = {}
    counts = {}
    for y_t, y_p in zip(y_test, y_pred):
        floor_t, x_t, y_t = extract_info(y_t)
        floor_p, x_p, y_p = extract_info(y_p)
        error = np.sqrt((x_t - x_p)**2 + (y_t - y_p)**2)
        errors.setdefault(floor_t, []).append(error)
        counts.setdefault(floor_t, 0)
        counts[floor_t] += 1

    avg_errors = {floor: sum(errors[floor]) / counts[floor] for floor in errors}

    print("Average errors for each floor:")
    for floor, error in avg_errors.items():
        print(f"Floor {floor}: {error}")
    print(f"Average: {sum(avg_errors.values())/3}")

# Calculate average error for each floor
def calculate_mse(y_test, y_pred):
    errors = {}
    counts = {}
    for y_t, y_p in zip(y_test, y_pred):
        floor_t, x_t, y_t = extract_info(y_t)
        floor_p, x_p, y_p = extract_info(y_p)
        error = (x_t - x_p)**2 + (y_t - y_p)**2
        errors.setdefault(floor_t, []).append(error)
        counts.setdefault(floor_t, 0)
        counts[floor_t] += 1

    mse_errors = {floor: sum(errors[floor]) / counts[floor] for floor in errors}
    print("\nMSE for each floor:")
    for floor, error in mse_errors.items():
        print(f"Floor {floor}: {error}")
    print(f"Average: {sum(mse_errors.values())/3}")

# Calculate average error for each floor
def calculate_rmse(y_test, y_pred):
    errors = {}
    counts = {}
    for y_t, y_p in zip(y_test, y_pred):
        floor_t, x_t, y_t = extract_info(y_t)
        floor_p, x_p, y_p = extract_info(y_p)
        error = (x_t - x_p)**2 + (y_t - y_p)**2
        errors.setdefault(floor_t, []).append(error)
        counts.setdefault(floor_t, 0)
        counts[floor_t] += 1

    rmse_errors = {floor: np.sqrt(sum(errors[floor]) / counts[floor]) for floor in errors}
    print("\nRMSE for each floor:")
    for floor, error in rmse_errors.items():
        print(f"Floor {floor}: {error}")
    print(f"Average: {sum(rmse_errors.values())/3}")

# Dateset Proccesses

In [181]:
tr_path = "C://Users/Hamid/content/JUIndoorLoc-Training-data.csv"
ts_path = "C://Users/Hamid/content/JUIndoorLoc-Test-data.csv"

In [182]:
data_train = pd.read_csv(tr_path)
data_train.head(5)
data_test = pd.read_csv(ts_path)
data_test.head(5)

Unnamed: 0,Cid,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,...,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did,Ts
0,L4-33-13,-77,-58,-66,-64,-92,-66,-66,-93,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813137748
1,L4-33-13,-90,-58,-78,-56,-92,-74,-74,-87,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813179138
2,L4-33-13,-80,-64,-78,-56,-92,-74,-74,-87,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489812948443
3,L4-33-13,-72,-60,-74,-58,-93,-75,-76,-95,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489812959103
4,L4-33-13,-82,-56,-74,-56,-93,-71,-76,-89,-110,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813079167


In [183]:
frames = [data_train, data_test]
df = pd.concat(frames)
df.head(5)

Unnamed: 0,Cid,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,...,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did,Ts
0,L4-40-1,-84,-80,-71,-58,-110,-72,-71,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1469870570949
1,L4-40-1,-84,-79,-71,-58,-110,-72,-71,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1470047205646
2,L4-40-1,-110,-110,-70,-56,-110,-69,-68,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1469870932338
3,L4-40-1,-110,-110,-70,-53,-110,-69,-68,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1470047629440
4,L4-37-2,-84,-82,-75,-65,-110,-73,-75,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1469876622694


In [184]:
df['Did'] = df['Did'].astype(str).str[1]
df['Did'] = pd.to_numeric(df['Did'])

In [185]:
data_combined = df.drop('Cid', axis=1).reset_index(drop=True)
data_combined = data_combined.drop('Ts', axis=1)

In [216]:
#make X_train from Tr
X = normalize(data_combined)
y = df.Cid

le = LabelEncoder()
le.fit(y)
le.transform(y)

X_train, Xt, y_train, yt = train_test_split(data_combined, y, test_size=0.2, shuffle=True, random_state=42)

y_train_encode =  le.transform(y_train)

y_test_encode = le.transform(yt)

X_test = Xt

# Random Forest, Deceision Trees, Extra Tree

## Deceision Tree 

### Depth 10

In [256]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

dt_model = DecisionTreeClassifier(max_depth=10, min_samples_split=2, random_state=0)
dt_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 0.39933276176452637


In [257]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = dt_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.019029855728149414


In [258]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-13,0.0,0.0,0.0,0.0
L3-1-14,0.0,0.0,0.0,0.0
L3-1-16,0.0,0.0,0.0,0.0
L3-1-17,0.0,0.0,0.0,0.0
L3-10-11,0.0,0.0,0.0,0.0
L3-10-12,0.0,0.0,0.0,0.0
L3-10-14,0.0,0.0,0.0,0.0
L3-10-15,0.0,0.0,0.0,0.0
L3-10-16,0.0,0.0,0.0,0.0
L3-10-17,0.0,0.0,0.0,0.0


In [259]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 7.291974489258735
Recall: 7.825744135619948
Precision: 11.370152477777395
Accuracy: 7.825744135619948


  _warn_prf(average, modifier, msg_start, len(result))


In [260]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 6.363776024918202
Floor 5: 10.28816929368279
Floor 3: 18.881707129417617
Average: 11.844550816006205

MSE for each floor:
Floor 4: 72.5159793814433
Floor 5: 150.59493670886076
Floor 3: 391.58641975308643
Average: 204.89911194779685

RMSE for each floor:
Floor 4: 8.515631472852926
Floor 5: 12.271712867764661
Floor 3: 19.78854263843314
Average: 13.525295659683573


### Depth 50

In [261]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

dt_model = DecisionTreeClassifier(max_depth=50, min_samples_split=2, random_state=0)
dt_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 2.055595636367798


In [262]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = dt_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.021391630172729492


In [263]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-11,0.0,0.0,0.0,1.0
L3-1-13,0.083333,0.5,0.045455,22.0
L3-1-14,1.0,1.0,1.0,2.0
L3-1-15,0.0,0.0,0.0,2.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,0.4,0.25,1.0,1.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.0,0.0,0.0,0.0


In [264]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 84.00838643799034
Recall: 81.17484723043565
Precision: 91.9923118339127
Accuracy: 81.17484723043565


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [265]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 1.1033520405019828
Floor 5: 1.3154081398667494
Floor 3: 1.7260924838125142
Average: 1.3816175547270821

MSE for each floor:
Floor 4: 12.137886597938145
Floor 5: 13.621403912543153
Floor 3: 17.89814814814815
Average: 14.552479552876482

RMSE for each floor:
Floor 4: 3.48394698552348
Floor 5: 3.690718617362092
Floor 3: 4.230620302999094
Average: 3.801761968628222


### Depth 100

In [266]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

dt_model = DecisionTreeClassifier(max_depth=100, min_samples_split=2, random_state=0)
dt_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 2.3045125007629395


In [267]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = dt_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.025018930435180664


In [268]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-11,0.0,0.0,0.0,1.0
L3-1-13,1.0,1.0,1.0,2.0
L3-1-14,1.0,1.0,1.0,2.0
L3-1-15,0.0,0.0,0.0,2.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,0.4,0.25,1.0,1.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.5,0.333333,1.0,1.0


In [269]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 93.86841011865896
Recall: 93.69209540705697
Precision: 95.75597773232309
Accuracy: 93.69209540705697


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [270]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.1959414397729566
Floor 5: 0.21006164755415105
Floor 3: 0.7103324747766795
Average: 0.37211185403459573

MSE for each floor:
Floor 4: 1.552319587628866
Floor 5: 2.574223245109321
Floor 3: 4.746913580246914
Average: 2.957818804328367

RMSE for each floor:
Floor 4: 1.2459211803436308
Floor 5: 1.6044386074603545
Floor 3: 2.1787412834586197
Average: 1.6763670237542019


### Depth MAX

In [271]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

dt_model = DecisionTreeClassifier(max_depth=176, min_samples_split=2, random_state=0)
dt_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 2.3137588500976562


In [272]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = dt_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.022572994232177734


In [273]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-11,0.0,0.0,0.0,1.0
L3-1-13,1.0,1.0,1.0,2.0
L3-1-14,1.0,1.0,1.0,2.0
L3-1-15,0.0,0.0,0.0,2.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,0.4,0.25,1.0,1.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.5,0.333333,1.0,1.0


In [274]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 94.08233463373251
Recall: 94.10605164596886
Precision: 95.73895227798243
Accuracy: 94.10605164596886


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [275]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.1581453572675066
Floor 5: 0.17964945607049337
Floor 3: 0.7103324747766795
Average: 0.3493757627048932

MSE for each floor:
Floor 4: 1.0239690721649484
Floor 5: 2.257767548906789
Floor 3: 4.746913580246914
Average: 2.676216733772884

RMSE for each floor:
Floor 4: 1.0119135695132013
Floor 5: 1.5025869521950432
Floor 3: 2.1787412834586197
Average: 1.5644139350556214


## Random Forest

### n_estimators = 10

In [279]:
n_learners = 1
max_depth = None

In [299]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

rf_model = RandomForestClassifier(
    n_estimators=n_learners,
    max_depth=None,
    random_state=42,
    min_samples_split=2,
    n_jobs=-1  # Utilize all available CPU cores
)
rf_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 0.27915000915527344


In [300]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = rf_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 0.05670952796936035


In [301]:
f1score = f1_score(yt, yp, average='weighted')
f1score

0.8484749777004355

In [302]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-11,0.0,0.0,0.0,1.0
L3-1-12,0.0,0.0,0.0,1.0
L3-1-13,0.5,0.5,0.5,2.0
L3-1-14,0.8,1.0,0.666667,3.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,0.0,0.0,0.0,0.0
L3-10-10,0.0,0.0,0.0,2.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0


In [293]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 83.10280126024855
Recall: 82.49556475458309
Precision: 87.74626256972817
Accuracy: 82.49556475458309


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [294]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.7326198055273834
Floor 5: 0.8723073694495395
Floor 3: 1.5985954044128274
Average: 1.0678408597965834

MSE for each floor:
Floor 4: 6.920618556701031
Floor 5: 11.964326812428078
Floor 3: 10.669753086419753
Average: 9.851566151849621

RMSE for each floor:
Floor 4: 2.6307068549538224
Floor 5: 3.458948801648859
Floor 3: 3.266458799130911
Average: 3.1187048185778643


### n_estimators = 50

In [295]:
n_estimators = 50

In [251]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

rf_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=None,
                                      min_samples_split=2, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 4.61186957359314


In [252]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = rf_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 1.1763358116149902


In [253]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-13,0.571429,1.0,0.4,5.0
L3-1-14,1.0,1.0,1.0,2.0
L3-1-15,0.0,0.0,0.0,2.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,0.4,0.25,1.0,1.0
L3-10-10,0.0,0.0,0.0,1.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.5,0.333333,1.0,1.0


In [254]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 95.84488520403114
Recall: 95.80130100532229
Precision: 97.19588896111665
Accuracy: 95.80130100532229


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [255]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.06318544050798892
Floor 5: 0.13666605903040663
Floor 3: 0.37784018589619744
Average: 0.19256389514486436

MSE for each floor:
Floor 4: 0.2556701030927835
Floor 5: 2.054085155350978
Floor 3: 1.0864197530864197
Average: 1.132058337176727

RMSE for each floor:
Floor 4: 0.5056383125246577
Floor 5: 1.4332079944484604
Floor 3: 1.0423146132940955
Average: 0.9937203067557379


### n_estimators = 100

In [243]:
n_estimators = 100

In [245]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

rf_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=None,
                                      min_samples_split=2, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train_encode)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"train time: {elapsed_time}")

train time: 10.720368146896362


In [246]:
# Make predictions on the test set
start_time = time.time()  # Record the start time

y_pred = rf_model.predict(X_test)

end_time = time.time()  # Record the end time
elapsed_time = end_time - start_time  # Calculate elapsed time
print(f"test time: {elapsed_time}")

yp = le.inverse_transform(y_pred)
yp = pd.Series(yp)

test time: 3.0022952556610107


In [247]:
generate_classification_report(yt, yp)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
L3-1-13,0.333333,0.5,0.25,4.0
L3-1-14,0.8,1.0,0.666667,3.0
L3-1-15,0.0,0.0,0.0,2.0
L3-1-16,0.5,0.333333,1.0,1.0
L3-1-17,0.4,0.25,1.0,1.0
L3-10-11,1.0,1.0,1.0,1.0
L3-10-12,1.0,1.0,1.0,1.0
L3-10-13,0.0,0.0,0.0,2.0
L3-10-14,0.5,0.333333,1.0,1.0
L3-10-15,1.0,1.0,1.0,3.0


In [248]:
f1 = f1_score(yt, yp, average='weighted')
recall = recall_score(yt, yp, average='weighted')
precision = precision_score(yt, yp, average='weighted')
accuracy = accuracy_score(yt, yp)

print("F1-score:", f1*100)
print("Recall:", recall*100)
print("Precision:", precision*100)
print("Accuracy:", accuracy*100)

F1-score: 95.80988402237118
Recall: 95.76187660161641
Precision: 97.171866944191
Accuracy: 95.76187660161641


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [159]:
# Calculate average error for each floor
calculate_avg_error(yt, yp)
calculate_mse(yt, yp)
calculate_rmse(yt, yp)

Average errors for each floor:
Floor 4: 0.09772725521501421
Floor 5: 0.16035252987883128
Floor 3: 1.5836993619849264
Average: 0.6139263823595906

MSE for each floor:
Floor 4: 0.4713917525773196
Floor 5: 1.1841196777905638
Floor 3: 10.391975308641975
Average: 4.015828913003286

RMSE for each floor:
Floor 4: 0.6865797496120313
Floor 5: 1.088172632347719
Floor 3: 3.223658683645335
Average: 1.6661370218683615
