# Preprocessing and Training Data for Arduino

## Preprocessing

In [10]:
import pandas as pd
Water_Dataset=pd.read_csv('dataset.csv')

In [11]:
Water_Dataset

Unnamed: 0,Suhu,pH,Turbidity,target,target_name
0,28.62,6.82,127.88,0,Bersih
1,28.19,6.77,219.75,0,Bersih
2,27.94,6.88,588.09,0,Bersih
3,28.31,6.90,546.58,0,Bersih
4,27.25,6.56,462.89,0,Bersih
...,...,...,...,...,...
85,29.91,8.69,1216.80,2,Perlu Dikuras
86,29.12,8.91,1661.67,2,Perlu Dikuras
87,29.47,8.44,1325.52,2,Perlu Dikuras
88,29.77,8.38,1627.69,2,Perlu Dikuras


In [12]:
Water_Dataset.describe()

Unnamed: 0,Suhu,pH,Turbidity,target
count,90.0,90.0,90.0,90.0
mean,28.609222,8.133556,987.026333,1.0
std,0.953811,0.970424,652.99546,0.821071
min,25.93,6.56,127.88,0.0
25%,27.9775,6.965,397.685,0.0
50%,28.655,8.455,875.285,1.0
75%,29.43,8.95,1411.415,2.0
max,29.94,9.65,2851.47,2.0


In [13]:
Water_Dataset.columns

Index(['Suhu', 'pH', 'Turbidity', 'target', 'target_name'], dtype='object')

In [14]:
Water_Dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Suhu         90 non-null     float64
 1   pH           90 non-null     float64
 2   Turbidity    90 non-null     float64
 3   target       90 non-null     int64  
 4   target_name  90 non-null     object 
dtypes: float64(3), int64(1), object(1)
memory usage: 3.6+ KB


In [15]:
Water_Dataset.shape

(90, 5)

In [16]:
Water_Dataset['target_name'].unique()

array(['Bersih', 'Perlu Diencerkan', 'Perlu Dikuras'], dtype=object)

In [17]:
Water_Dataset.describe()

Unnamed: 0,Suhu,pH,Turbidity,target
count,90.0,90.0,90.0,90.0
mean,28.609222,8.133556,987.026333,1.0
std,0.953811,0.970424,652.99546,0.821071
min,25.93,6.56,127.88,0.0
25%,27.9775,6.965,397.685,0.0
50%,28.655,8.455,875.285,1.0
75%,29.43,8.95,1411.415,2.0
max,29.94,9.65,2851.47,2.0


# Train Model

## SVC

In [18]:
import pandas as pd
Water_Dataset=pd.read_csv('dataset_final.csv')

In [19]:
x = Water_Dataset.drop(['target'], axis = 1)
y = Water_Dataset['target']

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [21]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 0)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(72, 3)
(18, 3)
(72,)
(18,)


In [22]:
model = SVC(C=1000, kernel='rbf', gamma=0.001,decision_function_shape='ovr')
model.fit(x_train,y_train)
pred = model.predict(x_test)
print("accuarcy score is : ", accuracy_score(y_test, pred))
print("classification report is : ", classification_report(y_test, pred))
print("confusion matirx is : ", confusion_matrix(y_test, pred))

accuarcy score is :  1.0
classification report is :                precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00         4

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

confusion matirx is :  [[7 0 0]
 [0 7 0]
 [0 0 4]]


# Training Data and Port to C code

In [174]:
import pandas as pd
Water_Dataset=pd.read_csv('dataset_final.csv')

## SVC

In [179]:
from micromlgen import port
from sklearn.svm import SVC


if __name__ == '__main__':
    X = Water_Dataset.drop(['target'], axis = 1)
    y = Water_Dataset['target']
    clf = SVC(C=1000, kernel='rbf', gamma=0.001, decision_function_shape='ovr').fit(X, y)
    classmap = {
        0: 'Bersih',
        1: 'Perlu Diencerkan',
        2: 'Perlu Dikuras'
    }
    c_code = port(clf, classmap=classmap)

    with open('classifier_svm.h', 'w') as file:
        file.write(c_code)