# Import Dataset

In [1]:
!rm -rf *
!git clone "https://github.com/hmda77/IndooePositioning/"
!mv -f /content/IndooePositioning/JUIndoorLoc/JUIndoorLoc-Test-data.csv /content/
!mv -f /content/IndooePositioning/JUIndoorLoc/JUIndoorLoc-Training-data.csv /content/
!rm -rf /content/IndooePositioning/

Cloning into 'IndooePositioning'...
remote: Enumerating objects: 31, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 31 (delta 5), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (31/31), 562.24 KiB | 1.23 MiB/s, done.
Resolving deltas: 100% (5/5), done.


#Import Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt


from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, classification_report, auc
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score, cross_val_predict

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Process On Dataset

## Load Dataset

In [3]:
data_train = pd.read_csv("/content/JUIndoorLoc-Training-data.csv")
data_train.head(5)

Unnamed: 0,Cid,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,...,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did,Ts
0,L4-40-1,-84,-80,-71,-58,-110,-72,-71,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1469870570949
1,L4-40-1,-84,-79,-71,-58,-110,-72,-71,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1470047205646
2,L4-40-1,-110,-110,-70,-56,-110,-69,-68,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1469870932338
3,L4-40-1,-110,-110,-70,-53,-110,-69,-68,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1470047629440
4,L4-37-2,-84,-82,-75,-65,-110,-73,-75,-110,-110,...,-110,-110,-110,-110,-110,-110,0,1,D4,1469876622694


In [4]:
data_train['Did'] = data_train['Did'].astype(str).str[1]
data_train['Did'] = pd.to_numeric(data_train['Did'])

In [5]:
data_test = pd.read_csv("/content/JUIndoorLoc-Test-data.csv")
data_test.head(5)

Unnamed: 0,Cid,AP001,AP002,AP003,AP004,AP005,AP006,AP007,AP008,AP009,...,AP167,AP168,AP169,AP170,AP171,AP172,Rs,Hpr,Did,Ts
0,L4-33-13,-77,-58,-66,-64,-92,-66,-66,-93,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813137748
1,L4-33-13,-90,-58,-78,-56,-92,-74,-74,-87,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813179138
2,L4-33-13,-80,-64,-78,-56,-92,-74,-74,-87,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489812948443
3,L4-33-13,-72,-60,-74,-58,-93,-75,-76,-95,-93,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489812959103
4,L4-33-13,-82,-56,-74,-56,-93,-71,-76,-89,-110,...,-110,-110,-110,-110,-110,-110,0,0,D2,1489813079167


In [6]:
data_test['Did'] = data_test['Did'].astype(str).str[1]
data_test['Did'] = pd.to_numeric(data_test['Did'])

# Training Base Model

In [7]:
def TrainingBaseModel(clf, DATA_TRAIN, f):
  b = 0
  TR = {} #Whole Train Set 1...b
  OPCV = {} #KFOLD Results 1....b
  for g in range(1,f+1):
    for h in range(1,f+1):
      if not (h==g) and (h>g):
        # make a Train sets
        TR["{},{}".format(g,h)] = DATA_TRAIN.loc[(DATA_TRAIN['Did'] == g) | (DATA_TRAIN['Did'] == h)]

        #make X_train from Tr
        XTR = TR["{},{}".format(g,h)].loc[:, (TR["{},{}".format(g,h)].columns != 'Ts') & (TR["{},{}".format(g,h)].columns != 'Cid')]
        # XTrain_dummies = pd.get_dummies(XTR, columns = ['Did'])
        X_train = XTR

        #make Y_train from Tr (Y_true)
        Y_train = TR["{},{}".format(g,h)].Cid
        Y_true = Y_train
        # define 10 Fold Cross Validation
        cv = KFold(n_splits=10, random_state=1, shuffle=True)

        #make model
        c = clf
        # Label of Train set
        b = b + 1

        # Prediction Y_pred
        Y_pred = cross_val_predict(c, X_train, Y_train, cv=cv, n_jobs = 1, method = 'predict')

        OPCV[b] = {}
        OPCV[b]['actual'] = Y_true
        OPCV[b]['predict'] = Y_pred

        print("Tr({},{}):".format(g,h))
        print("validation accuracy: {:.2%}".format(accuracy_score(Y_true, Y_pred)))
        print("----------------------------\n")
  return OPCV, b


# Weight Determination (Algorithm 2)

In [8]:
def WeightDetermine(opcv, b):
  T = np.empty([b])
  for q in range(1,b+1):
    alpha = 1
    beta = 1
    sigma = 0.2
    for i in range(opcv[q]['actual'].size):
      actl = opcv[q]['actual'].to_numpy()[i]
      prd = (opcv[q]['predict'][i])
      if actl == prd:
        alpha = alpha + sigma
      else:
        beta = beta + sigma

    un = (12*alpha*beta)/(((alpha+beta)**2)*(1+alpha+beta))
    bl = (alpha*(1-un))/(alpha+beta)
    T[q-1] = bl + 0.5 * un
  sumT = np.sum(T)
  W = T/sumT
  return W

# Classification

In [9]:
def Classification(clf, DATA_TRAIN, DATA_TEST, f):
  TR = {} #Whole Train Set 1...b

  # make test dataset
  Te = DATA_TEST.loc[:, (DATA_TEST.columns != 'Ts')]
  X_test = DATA_TEST.loc[:, (DATA_TEST.columns != 'Ts') & (DATA_TEST.columns != 'Cid')]
  Y_test = DATA_TEST.Cid
  OPTE = {}
  q = 0
  for g in range(1,f+1):
    for h in range(1,f+1):
      if not (h==g) and (h>g):
        # make a Train sets
        TR["{},{}".format(g,h)] = DATA_TRAIN.loc[(DATA_TRAIN['Did'] == g) | (DATA_TRAIN['Did'] == h)]

        #make X_train from Tr
        XTR = TR["{},{}".format(g,h)].loc[:, (TR["{},{}".format(g,h)].columns != 'Ts') & (TR["{},{}".format(g,h)].columns != 'Cid')]
        X_train = XTR

        #make Y_train from Tr
        Y_train = TR["{},{}".format(g,h)].Cid

        # make model
        c = clf
        c.fit(X_train, Y_train)


        # prediction
        Y_pred = c.predict(X_test)

        q = q+1
        OPTE[q] = {}
        OPTE[q]['actual'] = Y_test
        OPTE[q]['pred'] = Y_pred

        print("Tr({},{}):".format(g,h))
        print("Test accuracy: {:.2%}".format(accuracy_score(Y_test, Y_pred)))
        print("----------------------------\n")

  return OPTE

# Weight Voting (Algorithm 3)

In [10]:
def WeightVoting(opte,DATA_TRAIN, DATA_TEST, w, b):
  lk = DATA_TRAIN.Cid.unique()
  o = lk.shape[0]
  kapa_buf = []
  for i in range(0, DATA_TEST.shape[0]):
    P = np.zeros(o)
    for q in range(1,b+1):
      for k in range(1, o+1):
        if lk[k-1] == opte[q]['pred'][i]:
          z = 1
        else:
          z = 0
        P[k-1] = P[k-1] + w[q-1] * z
    kapa_buf.append(lk[np.argmax(P)])
  kapa = np.array(kapa_buf, dtype=object)

  return kapa


## Weighted Ensemble Classifier (Algorithm 1)

In [11]:
def WeightedEnsembleClassifier(clf, DATA_TRAIN, DATA_TEST):
  b = 0
  f = 4
  W = None

  if W == None:
    print("<----------- Training base model ----------->")
    OPCV, b = TrainingBaseModel(clf, DATA_TRAIN, f)

    print("\n\n<----------- Weight Determination ----------->")
    W = WeightDetermine(OPCV, b)
    print("W is: {}".format(W))
    print("\n\n<--------------- Classification --------------->")
    OPTE = Classification(clf, DATA_TRAIN, DATA_TEST, f)

    print("\n\n<--------------- Weight Voting --------------->")
    KAPA = WeightVoting(OPTE,DATA_TRAIN, DATA_TEST, W, b)
    print("weighted ensemble accuracy: {:.2%}".format(accuracy_score(DATA_TEST.Cid, KAPA)))

In [12]:
# clf = KNeighborsClassifier(n_neighbors=5)
clf = SVC(kernel='linear',gamma='scale')
WeightedEnsembleClassifier(clf, data_train,data_test)

<----------- Training base model ----------->
Tr(1,2):
validation accuracy: 93.05%
----------------------------

Tr(1,3):
validation accuracy: 96.06%
----------------------------

Tr(1,4):
validation accuracy: 95.71%
----------------------------

Tr(2,3):
validation accuracy: 94.71%
----------------------------

Tr(2,4):
validation accuracy: 95.41%
----------------------------

Tr(3,4):
validation accuracy: 99.20%
----------------------------



<----------- Weight Determination ----------->
W is: [0.1620667  0.16729219 0.16668107 0.16496976 0.16619587 0.1727944 ]


<--------------- Classification --------------->
Tr(1,2):
Test accuracy: 93.63%
----------------------------

Tr(1,3):
Test accuracy: 24.73%
----------------------------

Tr(1,4):
Test accuracy: 25.34%
----------------------------

Tr(2,3):
Test accuracy: 90.75%
----------------------------

Tr(2,4):
Test accuracy: 95.82%
----------------------------

Tr(3,4):
Test accuracy: 25.68%
----------------------------



<---------

In [13]:
clf = KNeighborsClassifier(n_neighbors=5)
WeightedEnsembleClassifier(clf, data_train,data_test)

<----------- Training base model ----------->
Tr(1,2):
validation accuracy: 63.19%
----------------------------

Tr(1,3):
validation accuracy: 59.53%
----------------------------

Tr(1,4):
validation accuracy: 60.78%
----------------------------

Tr(2,3):
validation accuracy: 77.16%
----------------------------

Tr(2,4):
validation accuracy: 77.70%
----------------------------

Tr(3,4):
validation accuracy: 93.77%
----------------------------



<----------- Weight Determination ----------->
W is: [0.14623092 0.13777427 0.14066384 0.17854094 0.17980523 0.21698481]


<--------------- Classification --------------->
Tr(1,2):
Test accuracy: 92.88%
----------------------------

Tr(1,3):
Test accuracy: 27.33%
----------------------------

Tr(1,4):
Test accuracy: 27.40%
----------------------------

Tr(2,3):
Test accuracy: 94.79%
----------------------------

Tr(2,4):
Test accuracy: 98.77%
----------------------------

Tr(3,4):
Test accuracy: 28.08%
----------------------------



<---------