<a href="https://colab.research.google.com/github/hajay180505/LocateMe/blob/main/NuevoLocateMe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Cloning github repo for the dataset source files

In [57]:
!git clone https://github.com/hajay180505/LocateMe.git

fatal: destination path 'LocateMe' already exists and is not an empty directory.



##Imports:
> * **csv** - for creating a csv data file from the read data \\
> * **pandas** - to create a dataframe for the dataset in order to train it
>*  **numpy** - for data handling processes
>* **train_test_split** - for splitting testing and training data
>* **K NeighborsClassifier** - the ML model used
>* **accuracy_score** - for measuring the accuracy of the model
>* **joblib** - for exporting the model
>* **stats** - for using mode of a _NumPy_ array

In [58]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

from joblib import Parallel, delayed
import joblib

from scipy import stats as st


##Room mapping for the text files in the data

In [59]:

room = {
    "2011.txt" : 201,
    "2012.txt" : 201,
    "2021.txt" : 202,
    "2022.txt" : 202,
    "61.txt" : 6,
    "62.txt" : 6,
    "71.txt" : 7,
    '72.txt' : 7,
    '81.txt' : 8,
    '82.txt' : 8
}

## Parsing the data:
  The collected data from RPI is parsed to make it into a single csv file.

In [60]:


file_paths = ["2011.txt", "2012.txt", "2021.txt", "2022.txt", "61.txt", "62.txt", "71.txt",'72.txt','81.txt','82.txt']
data = []
final_file = input("Enter final data file name with extension(.csv) :")
for k in file_paths:

  with open('LocateMe/Wifi_data/' + k,'r') as f:
    reader = csv.reader(f)
    block = []
    tup = []
    for row in reader:
      if row == []:
        block.append(tup)
        tup = []
      else:
        if row[0]!='':
          row.append(k)
          tup.append(list(row))
    clean_b = []
    for b in block:
      new_b = [ x for x in b if x[2] in ['PSG','AMCS'] ]
      if new_b!=[]:
        clean_b.append(new_b)
    data.append(list(clean_b ))


mac = []
for round in data:
  for reading in round:
    for signal in reading:
      mac.append(signal[0])
mac = set(mac)


header = list(mac)

header.append('Room')



dict_list = []

for k in file_paths:
  with open(k,'r') as f:
    reader = csv.reader(f)
    block = []
    tup = []
    for row in reader:
      if row == []:
        block.append(tup)
        tup = []
      else:
        if row[0]!='':
          row.append(room[k])
          tup.append(list(row))
    clean_b = []
    for b in block:
      new_b = [ x for x in b if x[2] in ['PSG','AMCS'] ]
      if new_b!=[]:
        clean_b.append(new_b)

    for record in clean_b:
      base_dict = {
          '00:FC:BA:32:99:40'  :  0,
          '78:72:5D:DE:99:20'  :  0,
          '00:FC:BA:32:91:40'  :  0,
          '78:72:5D:F5:5F:10' :  0,
          '00:FC:BA:32:77:E0'  :  0,
          '00:35:1A:08:44:60'  :  0,
          '00:35:1A:08:46:10'  :  0,
          '00:FC:BA:32:9A:E0'  :  0,
          '00:24:B2:81:A9:A0'  :  0,
          '00:FC:BA:32:9A:80'  :  0,
          '00:FC:BA:32:98:E0'  :  0,
          'Room': ''
      }
      for reading in record:
        base_dict[reading[0]] = reading[1]
        base_dict['Room'] = reading[-1]
      dict_list.append(base_dict)


with open(final_file,'w') as op:
  w = csv.DictWriter(op,fieldnames=header)
  w.writeheader()
  w.writerows(dict_list)




###Creating K Neighbors Classifier with `n_neighbors = 5` and testing its accuracy

In [61]:
d = pd.read_csv(final_file)

X = d.drop(['Room'], axis='columns').values
y = d.Room.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, random_state=42)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

print(knn.score(X_test, y_test))

y_pred = knn.predict(X_test)

print(accuracy_score(y_test, y_pred))


0.9391634980988594
0.9391634980988594


In [62]:
model_file = input("Enter the model name with .pkl extension :")

joblib.dump(knn, model_file)

# # Load the model from the file
# knn_from_joblib = joblib.load('model.pkl')

# # Use the loaded model to make predictions
# knn_from_joblib.predict(X_test)
# print(knn_from_joblib.score(X_test, y_test))

['model.pkl']

###Using  the created knn model to predict a location from a new data source

In [63]:
import csv
file_paths = ["201.txt"]
#data = []

knn_from_joblib = joblib.load(model_file)


for k in file_paths:

  with open('LocateMe/Wifi_data/'+ k,'r') as f:
    reader = csv.reader(f)
    block = []
    tup = []
    for row in reader:
      if row == []:
        block.append(tup)
        tup = []
      else:
        if row[0]!='':
          row.append(k)
          tup.append(list(row))
    clean_b = []
    for b in block:
      new_b = [ x for x in b if x[2] in ['PSG','AMCS'] ]
      if new_b!=[]:
        clean_b.append(new_b)


mac = []
for round in data:
  for reading in round:
    for signal in reading:
      mac.append(signal[0])


#print(set(mac))

mac = set(mac)


header = list(mac)


dict_list = []

for k in file_paths:
  with open(k,'r') as f:
    reader = csv.reader(f)
    block = []
    tup = []
    for row in reader:
      if row == []:
        block.append(tup)
        tup = []
      else:
        if row[0]!='':
          tup.append(list(row))
    clean_b = []
    for b in block:
      new_b = [ x for x in b if x[2] in ['PSG','AMCS'] ]
      if new_b!=[]:
        clean_b.append(new_b)

    for record in clean_b:
      base_dict = {
          '00:FC:BA:32:99:40'  :  0,
          '78:72:5D:DE:99:20'  :  0,
          '00:FC:BA:32:91:40'  :  0,
          '78:72:5D:F5:5F:10'  :  0,
          '00:FC:BA:32:77:E0'  :  0,
          '00:35:1A:08:44:60'  :  0,
          '00:35:1A:08:46:10'  :  0,
          '00:FC:BA:32:9A:E0'  :  0,
          '00:24:B2:81:A9:A0'  :  0,
          '00:FC:BA:32:9A:80'  :  0,
          '00:FC:BA:32:98:E0'  :  0,
          #'Room': ''
      }
      for reading in record:
        base_dict[reading[0]] = reading[1]
      dict_list.append(base_dict)

with open("data_test.csv",'w') as op:
  w = csv.DictWriter(op,fieldnames=header)
  w.writeheader()
  w.writerows(dict_list)

nd = pd.read_csv('data_test.csv')

whole = knn.predict(np.array(nd))

print("Whole :",whole)

# for k,v in nd.iterrows():
#   #print(v)
#   array = np.array(v)
#   #print(array)
#   ans = knn_from_joblib.predict(array.reshape(1,-1))
#   print(ans)

print("Location most likely is", st.mode(whole).mode)

Whole : [201 202 202 202 202 201 201 201 201 201 201 201 201 201 201]
Location most likely is 201
