In [1]:
import psycopg2
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV

# connect to db 

In [2]:
host = 'localhost'
port = 5432
user = 'postgres'
password = ''
db_name = 'ble_rssi'
conn = psycopg2.connect(host=host, port=port, user=user, password=password, database=db_name)

# create df


In [3]:
query = "select * from rssi_log where (card_x=1 and card_y=1) or (card_x=4.25 and card_y=2.45)"
card_pos = {3131: "1_1", 2292: "4.25_2.45"}
classes = ["1_1", "4.25_2.45"]
data = {"msi-gt70": [], "raspberry-10": [], "erhan-e570": [], "class": []}
with conn.cursor() as cur:
    cur.execute(query)
    i = 0
    for row in cur:
        data[row[2]].append(row[3])
        #data["rssi"].append(row[3])
        if i % 3 == 0:
            data["class"].append(card_pos[row[1]])
        i+=1

conn.commit()

In [4]:
for k,v in data.items():
    print(f'{k}: {len(v)}')

msi-gt70: 976
raspberry-10: 976
erhan-e570: 976
class: 976


In [5]:
df = pd.DataFrame(data=data , columns=["msi-gt70", "raspberry-10", "erhan-e570", "class"])
df

Unnamed: 0,msi-gt70,raspberry-10,erhan-e570,class
0,-69.0,-57.5,-64.0,1_1
1,-65.5,-42.0,-48.5,1_1
2,-71.0,-59.0,-58.0,1_1
3,-67.5,-43.0,-60.0,1_1
4,-68.0,-41.0,-60.0,1_1
...,...,...,...,...
971,-63.0,-49.0,-51.0,4.25_2.45
972,-64.0,-56.0,-49.0,4.25_2.45
973,-64.0,-51.5,-48.5,4.25_2.45
974,-64.0,-60.0,-45.0,4.25_2.45


# x,y train&test 

In [6]:
features = ["msi-gt70", "raspberry-10", "erhan-e570"]
# get features
x = df.loc[:,features].values
# get target
y = df.loc[:,['class']].values

In [7]:
# 80% train, 20% test 
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.2, random_state=0)

# Dimension of Train and Test set 
print("Dimension of Train set",x_train.shape)
print("Dimension of Test set",x_test.shape,"\n")

Dimension of Train set (780, 3)
Dimension of Test set (196, 3) 



In [8]:
x_train = pd.DataFrame(data=x_train[0:,0:],
            index=[i for i in range(x_train.shape[0])],
            columns=features)

In [9]:
y_train_label = pd.DataFrame(data=y_train[0:,0:],
            index=[i for i in range(y_train.shape[0])],
            columns=['class'])
y_train_label = y_train_label['class'].values.astype(object)

In [10]:
y_test_label = pd.DataFrame(data=y_test[0:,0:],
            index=[i for i in range(y_test.shape[0])],
            columns=['class'])
y_test_label = y_test_label['class'].values.astype(object)

In [11]:
# Transforming non numerical labels into numerical labels
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()

# encoding train labels 
encoder.fit(y_train)
y_train = encoder.transform(y_train_label)

# encoding test labels 
encoder.fit(classes)
y_test = encoder.transform(y_test_label)

#Total Number of Continous and Categorical features in the training set
num_cols = x_train._get_numeric_data().columns
print("Number of numeric features:",num_cols.size)
#list(set(X_train.columns) - set(num_cols))


names_of_predictors = list(x_train.columns.values)

# Scaling the Train and Test feature set 
#from sklearn.preprocessing import StandardScaler
#scaler = StandardScaler()
#x_train_scaled = scaler.fit_transform(x_train)
#x_test_scaled = scaler.transform(x_test)

Number of numeric features: 3


  y = column_or_1d(y, warn=True)


# Hyperparameter tuning using grid search and cross validation

In [12]:
#Libraries to Build Ensemble Model : Random Forest Classifier 
# Create the parameter grid based on the results of random search 
params_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 2, 5, 10]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
# Performing CV to tune parameters for best SVM fit 
svm_model = GridSearchCV(SVC(probability=True), params_grid, cv=5)
svm_model.fit(x_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=True, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid=[{'C': [1, 2, 5, 10], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']},
                         {'C': [1, 10, 100, 1000], 'kernel': ['linear']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

# confusion matrix and accuracy score 

In [13]:
# View the accuracy score
print('Best score for training data:', svm_model.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model.best_estimator_.gamma,"\n")

final_model = svm_model.best_estimator_
y_pred = final_model.predict(x_test)
y_pred_label = list(encoder.inverse_transform(y_pred))

Best score for training data: 0.9371794871794872 

Best C: 10 

Best Kernel: rbf 

Best Gamma: 0.001 



In [16]:
# Making the Confusion Matrix
print(confusion_matrix(y_test_label, y_pred_label))
print("\n")
print(classification_report(y_test_label, y_pred_label))

print("Training set score for SVM: %f" % final_model.score(x_train, y_train))
print("Testing set score for SVM: %f" % final_model.score(x_test, y_test))

svm_model.score

[[ 26   6]
 [  5 159]]


              precision    recall  f1-score   support

         1_1       0.84      0.81      0.83        32
   4.25_2.45       0.96      0.97      0.97       164

    accuracy                           0.94       196
   macro avg       0.90      0.89      0.90       196
weighted avg       0.94      0.94      0.94       196

Training set score for SVM: 0.942308
Testing set score for SVM: 0.943878


<bound method BaseSearchCV.score of GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=True, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid=[{'C': [1, 2, 5, 10], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']},
                         {'C': [1, 10, 100, 1000], 'kernel': ['linear']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)>

In [17]:
# single prediction
vals = [-65, -54, -55]
temp = np.array(vals).reshape(1,3)
temp = pd.DataFrame(data=temp,
            index=[i for i in range(temp.shape[0])],
            columns=features)
prob = final_model.predict_proba(temp)
prob

array([[0.54891892, 0.45108108]])

# save the model - pickle 

In [18]:
import pickle
# save the model to disk
filename = 'svm_final_model.sav'
pickle.dump(final_model, open(filename, 'wb'))