In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

In [27]:
# Membaca dataset
df = pd.read_csv('smoking.csv')

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   ID                    10 non-null     int64 
 1   Umur                  10 non-null     int64 
 2   Jenis_Kelamin         10 non-null     object
 3   Batuk_Berkepanjangan  10 non-null     object
 4   Sesak_Napas           10 non-null     object
 5   Merokok               10 non-null     object
 6   Riwayat_Keluarga      10 non-null     object
 7   Hasil_Rontgen         10 non-null     object
 8   Diagnosa              10 non-null     object
dtypes: int64(2), object(7)
memory usage: 852.0+ bytes


In [29]:
df.columns

Index(['ID', 'Umur', 'Jenis_Kelamin', 'Batuk_Berkepanjangan', 'Sesak_Napas',
       'Merokok', 'Riwayat_Keluarga', 'Hasil_Rontgen', 'Diagnosa'],
      dtype='object')

In [30]:
df.head()

Unnamed: 0,ID,Umur,Jenis_Kelamin,Batuk_Berkepanjangan,Sesak_Napas,Merokok,Riwayat_Keluarga,Hasil_Rontgen,Diagnosa
0,1,45,Pria,Ya,Ya,Ya,Tidak,Tidak_Normal,Ya
1,2,30,Wanita,Tidak,Tidak,Tidak,Tidak,Normal,Tidak
2,3,60,Pria,Ya,Ya,Ya,Ya,Tidak_Normal,Ya
3,4,38,Pria,Ya,Tidak,Tidak,Tidak,Normal,Tidak
4,5,52,Wanita,Tidak,Ya,Tidak,Ya,Tidak_Normal,Ya


In [31]:
X = df.drop(columns=["Diagnosa"])  # Semua fitur kecuali target
y = df["Diagnosa"]

In [32]:
# 3. Encode fitur kategorikal di X
le = LabelEncoder()
for col in X.select_dtypes(include=['object']).columns:
    X[col] = le.fit_transform(X[col])

# 4. Encode target (y) jika belum numerik
y = le.fit_transform(y)

In [33]:
print(df.head())

   ID  Umur Jenis_Kelamin Batuk_Berkepanjangan Sesak_Napas Merokok  \
0   1    45          Pria                   Ya          Ya      Ya   
1   2    30        Wanita                Tidak       Tidak   Tidak   
2   3    60          Pria                   Ya          Ya      Ya   
3   4    38          Pria                   Ya       Tidak   Tidak   
4   5    52        Wanita                Tidak          Ya   Tidak   

  Riwayat_Keluarga Hasil_Rontgen Diagnosa  
0            Tidak  Tidak_Normal       Ya  
1            Tidak        Normal    Tidak  
2               Ya  Tidak_Normal       Ya  
3            Tidak        Normal    Tidak  
4               Ya  Tidak_Normal       Ya  


In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   ID                    10 non-null     int64 
 1   Umur                  10 non-null     int64 
 2   Jenis_Kelamin         10 non-null     object
 3   Batuk_Berkepanjangan  10 non-null     object
 4   Sesak_Napas           10 non-null     object
 5   Merokok               10 non-null     object
 6   Riwayat_Keluarga      10 non-null     object
 7   Hasil_Rontgen         10 non-null     object
 8   Diagnosa              10 non-null     object
dtypes: int64(2), object(7)
memory usage: 852.0+ bytes


In [42]:
# Bagi dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [43]:
# Standarisasi fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [44]:
# Reduksi dimensi dengan PCA ke 2D
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [45]:
# Inisialisasi dan latih model SVM
model = SVC(kernel='linear', decision_function_shape='ovr')
model.fit(X_train_pca, y_train)

In [46]:
# Evaluasi model
accuracy = accuracy_score(y_test, model.predict(X_test_pca))
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00
