In [1]:
import pandas as pd
import re
import math
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [10]:
packets = pd.read_csv("packets_0.csv")
packets = packets[packets["mac"] == "48:a3:80:0b:7c:fa"]

In [11]:
node_groups = packets.groupby("node_id")
node1_packets = node_groups.get_group(1).drop_duplicates()
node2_packets = node_groups.get_group(2).drop_duplicates()
node3_packets = node_groups.get_group(3).drop_duplicates()


In [12]:
merged_data = node1_packets.merge(node2_packets, on="ts").merge(node3_packets, on="ts").filter(
    ["rssi_x", "rssi_y", "rssi", "location"]).rename(columns={
    "rssi_x" : "rssi_b",
    "rssi_y" : "rssi_m",
    "rssi" : "rssi_n"
})

In [13]:
grouped_data = merged_data.groupby("location")

In [14]:
def split_to_train_and_test(location):
    data = grouped_data.get_group(location)
    l = len(data)
    train_size = math.floor(3 * l / 4)
    return data[0:train_size], data[train_size:]

In [15]:
train_data = pd.DataFrame()    
test_data = pd.DataFrame()

In [16]:
for loc in range(1, 7):
    print("Adding location {l} to train data".format(l=loc))
    train, test = split_to_train_and_test(loc)
    train_data = pd.concat([train_data, train])
    test_data = pd.concat([test_data, test])
    
train_data = train_data.reset_index()
test_data = test_data.reset_index()


Adding location 1 to train data
Adding location 2 to train data
Adding location 3 to train data
Adding location 4 to train data
Adding location 5 to train data
Adding location 6 to train data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [9]:
train_data.groupby("location").mean()

Unnamed: 0_level_0,index,rssi_b,rssi_m,rssi_n
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,104.106918,-59.735849,-62.314465,-52.748428
2,381.5,-64.216312,-68.382979,-51.992908
3,725.0,-65.72093,-72.897674,-49.888372
4,1034.5,-67.303846,-70.307692,-50.503846
5,1610.178282,-66.844408,-73.419773,-49.058347


In [226]:
def input_fn_train():
    features = train_data.filter(["rssi_m", "rssi_n", "rssi_b"]).values
    labels = train_data["location"].values
    return features, labels

In [235]:
test_data.filter(["rssi_m", "rssi_n", "rssi_b"])

Unnamed: 0,rssi_m,rssi_n,rssi_b
0,-70,-48,-60
1,-70,-50,-62
2,-64,-52,-62
3,-65,-60,-61
4,-65,-52,-61
5,-65,-53,-61
6,-65,-62,-61
7,-65,-52,-62
8,-69,-50,-60
9,-68,-49,-60


In [227]:
def input_fn_eval():
    features = test_data.filter(["rssi_m", "rssi_n", "rssi_b"]).values
    labels = test_data["location"].values
    return features, labels

In [228]:
knn = KNeighborsClassifier(n_neighbors=11, leaf_size=50)
x, y = input_fn_train()
knn.fit(x, y)

KNeighborsClassifier(algorithm='auto', leaf_size=50, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=11, p=2,
                     weights='uniform')

In [229]:
test_x, test_y = input_fn_eval()
knn.score(test_x, test_y)

0.43106796116504853

In [230]:
predicted = pd.DataFrame(list(zip(knn.predict(test_x), test_y)))

In [232]:
def get_loc_precision(loc):
    return len(predicted[predicted[1] == loc][predicted[0] == loc]) / len(predicted[predicted[1] == loc])

print(get_loc_precision(1))
print(get_loc_precision(2))
print(get_loc_precision(3))
print(get_loc_precision(4))
print(get_loc_precision(5))


0.3333333333333333
0.24210526315789474
0.4305555555555556
0.27586206896551724
0.6086956521739131


  


In [136]:
predicted[predicted[1] == 4]

Unnamed: 0,0,1
221,4,4
222,4,4
223,4,4
224,4,4
225,4,4
226,3,4
227,2,4
228,3,4
229,2,4
230,5,4


In [234]:
import pickle
with open("model.pickle", "wb") as f:
    pickle.dump(knn, f)