In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split

## Support Vector Machine

## Reading [Smoke Detection](https://www.kaggle.com/datasets/deepcontractor/smoke-detection-dataset/) dataset

[Real-time Smoke Detection with AI-based Sensor Fusion](https://www.hackster.io/stefanblattmann/real-time-smoke-detection-with-ai-based-sensor-fusion-1086e6)

In [17]:
smoke_train = pd.read_csv("./smoke_detection_iot.csv")
# smoke_edited = smoke_train.drop(columns=['Index', 'UTC', 'CNT', 'PM1.0', 'eCO2[ppm]'])
# smoke_edited = smoke_train.drop(columns=['Index', 'UTC', 'CNT'])
smoke_edited = smoke_train.drop(columns=['Index', 'UTC', 'CNT', 'PM1.0', 'eCO2[ppm]'])
smoke_edited.head()

Unnamed: 0,Temperature[C],Humidity[%],TVOC[ppb],Raw H2,Raw Ethanol,Pressure[hPa],PM2.5,NC0.5,NC1.0,NC2.5,Fire Alarm
0,20.0,57.36,0,12306,18520,939.735,0.0,0.0,0.0,0.0,0
1,20.015,56.67,0,12345,18651,939.744,0.0,0.0,0.0,0.0,0
2,20.029,55.96,0,12374,18764,939.738,0.0,0.0,0.0,0.0,0
3,20.044,55.28,0,12390,18849,939.736,0.0,0.0,0.0,0.0,0
4,20.059,54.69,0,12403,18921,939.744,0.0,0.0,0.0,0.0,0


In [18]:
smoke_edited.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62630 entries, 0 to 62629
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Temperature[C]  62630 non-null  float64
 1   Humidity[%]     62630 non-null  float64
 2   TVOC[ppb]       62630 non-null  int64  
 3   Raw H2          62630 non-null  int64  
 4   Raw Ethanol     62630 non-null  int64  
 5   Pressure[hPa]   62630 non-null  float64
 6   PM2.5           62630 non-null  float64
 7   NC0.5           62630 non-null  float64
 8   NC1.0           62630 non-null  float64
 9   NC2.5           62630 non-null  float64
 10  Fire Alarm      62630 non-null  int64  
dtypes: float64(7), int64(4)
memory usage: 5.3 MB


## Processing Smoke detection

In [19]:
smoke_edited.isnull().sum()

Temperature[C]    0
Humidity[%]       0
TVOC[ppb]         0
Raw H2            0
Raw Ethanol       0
Pressure[hPa]     0
PM2.5             0
NC0.5             0
NC1.0             0
NC2.5             0
Fire Alarm        0
dtype: int64

## Split data

In [20]:
X = smoke_edited.drop(['Fire Alarm'], axis = 1)
y = smoke_edited['Fire Alarm']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

# Standartization
scaler = StandardScaler()
scaler.fit(X_train)

In [21]:
scaled_X_train = scaler.transform(X_train) 
scaled_X_test = scaler.transform(X_test) 

## SVM model

In [22]:
model = SVC(C=1.0, gamma = 1.0, kernel = 'rbf', cache_size=7000)

model.fit(scaled_X_train,y_train)

In [23]:
y_pred = model.predict(scaled_X_test)

## Evaluation

In [24]:
print(confusion_matrix(y_test, y_pred))

[[3416   65]
 [  13 9032]]


In [25]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99      3481
           1       0.99      1.00      1.00      9045

    accuracy                           0.99     12526
   macro avg       0.99      0.99      0.99     12526
weighted avg       0.99      0.99      0.99     12526


## Gridsearch

C -- 
gamma -- 
kernel "rbf" -- 

In [26]:
param_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 

In [27]:
grid = GridSearchCV(SVC(), param_grid, refit=True)

In [28]:
grid.fit(scaled_X_train,y_train)

In [29]:
grid.best_params_

{'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}

In [30]:
grid.best_estimator_

In [31]:
grid_predictions = grid.predict(scaled_X_test)
print(confusion_matrix(y_test,grid_predictions))

[[3478    3]
 [   3 9042]]


In [32]:
print(classification_report(y_test,grid_predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3481
           1       1.00      1.00      1.00      9045

    accuracy                           1.00     12526
   macro avg       1.00      1.00      1.00     12526
weighted avg       1.00      1.00      1.00     12526
