## Etude de modèles à utiliser pour le Projet LocalPositioningSystem

In [1]:
import os

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split

### Récupération des données

In [2]:
### COMMON ###

def enlever_extension(nom):
    return ".".join(nom.split(".")[:-1])

In [3]:
class Wifi:
    def __init__(self, BSSID, capabilities, centerFreq0, centerFreq1,
                 channelWidth, frequency, level, operatorFriendlyName, SSID,
                 timestamp, venueName):
        self.BSSID = BSSID
        self.capabilities = capabilities
        self.centerFreq0 = centerFreq0
        self.centerFreq1 = centerFreq1
        self.channelWidth = channelWidth
        self.frequency = frequency
        self.level = level
        self.operatorFriendlyName = operatorFriendlyName
        self.SSID = SSID
        self.timestamp = timestamp
        self.venueName = venueName

class Scan:
    def __init__(self, date, info, piece, lWifi):
        self.date = date
        self.info = info
        self.piece = piece
        self.lWifi = lWifi

class ScanManager:
    def __init__(self):
        self.lScan = []
        self.pieceOrder = {}
        self.pieceInverse = {}
        self.BSSIDOrder = {}
    
    def read(self, directory="./Scan/Train/"):
        # pour chaque fichier dans le dossier
        i = 0
        j = 0
        for fname in os.listdir(directory):
            with open(directory + fname) as fichier:
                # la piece est donnée par le nom du fichier
                piece = enlever_extension(fname)
                tmp = self.pieceOrder[piece] = i
                
                self.pieceInverse[tmp] = piece
                
                piece = tmp
                i += 1
                
                # Separation des scans
                for scanData in fichier.read().split("scan")[1:]:
                    
                    scan_lines = scanData.splitlines()
                    scanDate = scan_lines[0].split(":")[1]
                    scanInfo = scan_lines[1].split(":")[1]
                    scanWifis = []

                    wifis_content = scan_lines[2:]

                    # Separation des wifis detectes sur le scan
                    wifis_list = [wifis_content[x:x+11] for x in range(0, len(wifis_content), 12)][:-1]
                    
                    for wifi_car_l in (wifis_list) :
                        BSSID = wifi_car_l[0].split("BSSID:")[1]
                        if BSSID not in self.BSSIDOrder:
                            self.BSSIDOrder[BSSID] = j
                            j += 1
                            
                        wifiBSSID = self.BSSIDOrder[BSSID]
                        cap = wifi_car_l[1].split(":")[1]
                        freq0 = int(wifi_car_l[2].split(":")[1])
                        freq1 = int(wifi_car_l[3].split(":")[1])
                        cWidth = int(wifi_car_l[4].split(":")[1])
                        freq = int(wifi_car_l[5].split(":")[1])
                        level = int(wifi_car_l[6].split(":")[1])
                        op_name = wifi_car_l[7].split(":")[1]
                        ssid = wifi_car_l[8].split(":")[1]
                        ts = int(wifi_car_l[9].split(":")[1])
                        vn = wifi_car_l[10].split(":")[1]                            
                            
                        wifi = Wifi(wifiBSSID, cap, freq0, freq1, cWidth, freq, level, op_name, ssid,ts, vn)
                        scanWifis.append(wifi)
                    scan = Scan(scanDate, scanInfo, piece, scanWifis)
                    self.lScan.append(scan)


### Correlations

In [4]:
def get_data_csv(scanMan):
    dico = {}
    dico["BSSID"] = []
    # dico["capabilities"] = []
    dico["centerFreq0"] = []
    dico["centerFreq1"] = []
    dico["channelWidth"] = []
    dico["frequency"] = []
    dico["level"] = []
    dico["operatorFriendlyName"] = []
    dico["SSID"] = []
    dico["timestamp"] = []
    # dico["venueName"] = []
    dico["piece"] = []
    dico["zone_info"] = []
    dico["date"] = []
    
    i = 0
    prev_piece = scanMan.lScan[0].piece
    for scan in scanMan.lScan :
        for wifi in scan.lWifi:
            if prev_piece != scan.piece:
                prev_piece = scan.piece
                i = 0
            dico["piece"].append(str(scan.piece) + ":" + str(i))
            dico["zone_info"].append(scan.info)
            dico["date"].append(scan.date)
            
            dico["BSSID"].append(wifi.BSSID)
            # dico["capabilities"].append(wifi.capabilities)
            dico["centerFreq0"].append(wifi.centerFreq0)
            dico["centerFreq1"].append(wifi.centerFreq1)
            dico["channelWidth"].append(wifi.channelWidth)
            dico["frequency"].append(wifi.frequency)
            dico["level"].append(wifi.level)
            dico["operatorFriendlyName"].append(wifi.operatorFriendlyName)
            dico["SSID"].append(wifi.SSID)
            dico["timestamp"].append(wifi.timestamp)
            # dico["venueName"].append(wifi.venueName)
        i += 1
    data_csv = pd.DataFrame.from_dict(dico)
    
    return data_csv
    


In [5]:
def data_organizer(scanMan, car = 'level'):
    
    all_data_csv = get_data_csv(scanMan)

    data_csv = all_data_csv[ ['BSSID', 'piece', car] ]
    data_csv = data_csv.pivot_table(data_csv,
                                    index=['BSSID'],
                                    columns = ['piece'],
                                    aggfunc={car: np.mean},
                                    fill_value=0)

      
    return data_csv.astype(int)

def all_data_organizer(scanMan):
    
    all_data_csv = get_data_csv(scanMan)

    data_csv = all_data_csv.pivot_table(all_data_csv,
                                    index=['BSSID'],
                                    columns = ['piece'], 
                                    aggfunc={'level': np.mean,
                                             'frequency': np.mean},
                                    fill_value=0)

    return data_csv.astype(int)

def data_organizer2(scanMan, car = 'level'):
    
    all_data_csv = get_data_csv(scanMan)

    data_csv = all_data_csv[ ['BSSID', 'piece', car] ]
    data_csv = data_csv.set_index(['BSSID'])
    return data_csv.astype(int)

In [6]:
scanMan = ScanManager()
scanMan.read()
all_data_csv = get_data_csv(scanMan)
all_data_csv.head()

Unnamed: 0,BSSID,centerFreq0,centerFreq1,channelWidth,frequency,level,operatorFriendlyName,SSID,timestamp,piece,zone_info,date
0,0,0,0,0,2437,-61,,eduroam,1595678334712,0:0,entree,2021-04-16_10-10-42
1,1,0,0,0,2437,-61,,visiteurs,1595678334731,0:0,entree,2021-04-16_10-10-42
2,2,0,0,0,2412,-67,,UnivToulon,1595678334808,0:0,entree,2021-04-16_10-10-42
3,3,0,0,0,2412,-68,,eduroam,1595678334819,0:0,entree,2021-04-16_10-10-42
4,4,0,0,0,2412,-68,,visiteurs,1595678334744,0:0,entree,2021-04-16_10-10-42


In [7]:
scanMan = ScanManager()
scanMan.read()
data_csv = data_organizer(scanMan)
# data_csv['BSSID'] = int(data_csv['BSSID'].replace(":", ""),  16)

print(data_csv)

      level                                       ...                        \
piece   0:0 0:1 0:2 0:3 0:4 0:5 0:6 0:7 0:8 10:0  ... 9:10 9:11 9:2 9:3 9:4   
BSSID                                             ...                         
0       -61 -61   0   0   0 -52   0 -73 -49    0  ...    0    0   0   0   0   
1       -61 -64   0   0   0 -54   0 -74 -49    0  ...    0    0   0   0   0   
2       -67 -66 -70 -82 -76 -77 -78 -89 -90    0  ...    0    0   0   0   0   
3       -68 -67 -71 -80 -76 -77 -80 -72 -88    0  ...    0    0   0   0   0   
4       -68 -67 -70   0 -78 -77 -79 -75 -89    0  ...    0    0   0   0   0   
...     ...  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...  ...  ...  ..  ..  ..   
203       0   0   0   0   0   0   0   0   0    0  ...  -92    0   0   0   0   
204       0   0   0   0   0   0   0   0   0  -80  ...    0    0   0   0   0   
205       0   0   0   0   0   0   0   0   0    0  ...    0    0   0   0   0   
206       0   0   0   0   0   0   0   0   0    0  ..

In [8]:
all_org_data_csv = all_data_organizer(scanMan)
all_org_data_csv  

Unnamed: 0_level_0,frequency,frequency,frequency,frequency,frequency,frequency,frequency,frequency,frequency,frequency,...,level,level,level,level,level,level,level,level,level,level
piece,0:0,0:1,0:2,0:3,0:4,0:5,0:6,0:7,0:8,10:0,...,9:10,9:11,9:2,9:3,9:4,9:5,9:6,9:7,9:8,9:9
BSSID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,2437,2437,0,0,0,2437,0,2437,2437,0,...,0,0,0,0,0,0,0,0,0,0
1,2437,2437,0,0,0,2437,0,2437,2437,0,...,0,0,0,0,0,0,0,0,0,0
2,2412,2412,2412,2412,2412,2412,2412,2412,2412,0,...,0,0,0,0,0,0,0,0,0,0
3,2412,2412,2412,2412,2412,2412,2412,2412,2412,0,...,0,0,0,0,0,0,0,0,0,0
4,2412,2412,2412,0,2412,2412,2412,2412,2412,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0,0,0,0,0,0,0,0,0,0,...,-92,0,0,0,0,0,0,0,0,0
204,0,0,0,0,0,0,0,0,0,2437,...,0,0,0,0,0,0,0,0,0,0
205,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
206,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
def model_data_analyzer(scanMan):
    model_data_csv = get_data_csv(scanMan)

    return model_data_csv[ ['BSSID', 'level', 'piece'] ]
    
model_data = model_data_analyzer(scanMan)

model_data.head()

Unnamed: 0,BSSID,level,piece
0,0,-61,0:0
1,1,-61,0:0
2,2,-67,0:0
3,3,-68,0:0
4,4,-68,0:0


In [10]:
def calcul_correlation(scanMan):
    """
    Renvoie la corrélation de variables récupérées
    """
    lvl_data = all_data_organizer(scanMan)
    return lvl_data.corr()


In [11]:
#lvl_data_csv = data_organizer(scanMan, 'level')['level']
#sns.heatmap(lvl_data_csv.corr(), annot=True, cbar=True, cmap="RdYlGn")

In [12]:
#freq_data_csv = data_organizer(scanMan, 'frequency')['frequency']
#sns.heatmap(freq_data_csv.corr(), annot=True, cbar=True, cmap="RdYlGn")

## Creation de modeles

In [13]:
# Separation des donnees
def data_separator(model_data_csv):
    # Shuffle  les lignes
    model_data_csv = model_data_csv.sample(frac=1)
    x1, y1, x2, y2 = [], [], [], []
    x_train2, x_test2, y_train2, y_test2 = [], [], [], []
    for piece in model_data_csv:
        room_name = int(piece.split(':')[0])
        # On met de cote dans x1, y1 1 donnee de chaque piece
        if room_name not in y1 :
            y1.append(room_name)
            x1.append(model_data_csv[piece])
        else:
            y2.append(room_name)
            x2.append(model_data_csv[piece])
    
    
    # On split aleatoirement en test et entrainement les data des salles
    x_train2, x_test2, y_train2, y_test2 = train_test_split(x2,
                                                    y2,
                                                    test_size=0.2,
                                                    random_state=42)
    
    
        
    return x_train2+x1, x_test2, y_train2+y1, y_test2

In [14]:
# Creation d'un modèle KNeighborsClassifier
def create_KNeighborsClassifier(x_train, x_test, y_train, y_test):
    model = KNeighborsClassifier()
    model.fit(x_train, y_train)
    return model

# Creation d'un modèle RandomForestClassifier
def create_RandomForestClassifier(x_train, x_test, y_train, y_test):
    model = RandomForestClassifier()
    model.fit(x_train, y_train)
    return model

In [15]:
# Mesure de l'efficacite du modele

def get_efficiency(model):
    somme = 0
    nb = 10

    for i in range(nb):
        somme += model.score(x_test, y_test)

    precision = "{:.2f}".format(somme / nb)
    print("Précisions sur " + str(nb) + " essais pour le modèle = " + str(precision))
    return precision

In [16]:
#model_data_csv = get_data_csv(scanMan)[['piece', 'level', 'frequency', 'BSSID']]
model_data_csv = data_organizer(scanMan)['level']
#print(model_data_csv)
x_train, x_test, y_train, y_test = data_separator(model_data_csv)
len(x_train), len(x_test), len(y_train), len(y_test)

(84, 17, 84, 17)

### Test des modeles

In [17]:
model = create_KNeighborsClassifier(x_train, x_test, y_train, y_test)
efficiency = get_efficiency(model)

Précisions sur 10 essais pour le modèle = 0.76


In [18]:
model = create_RandomForestClassifier(x_train, x_test, y_train, y_test)
efficiency = get_efficiency(model)

Précisions sur 10 essais pour le modèle = 0.82
