In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# 1. Persiapan Data
print("1. Persiapan Data")
# Data yang dimasukkan sesuai dengan tabel yang disediakan
data = {
    'Temperature': [14.0, 39.0, 30.0, 38.0, 27.0, 32.0, -2.0, 3.0, 3.0, 28.0],
    'Humidity': [73, 96, 64, 83, 74, 55, 97, 85, 83, 74],
    'Wind Speed': [9.5, 8.5, 7.0, 1.5, 17.0, 3.5, 8.0, 6.0, 6.0, 8.5],
    'Precipitation (%)': [82.0, 71.0, 16.0, 82.0, 66.0, 26.0, 86.0, 96.0, 66.0, 107.0],
    'Cloud Cover': ['partly cloudy', 'partly cloudy', 'clear', 'clear', 'overcast', 'overcast', 'overcast', 'partly cloudy', 'overcast', 'clear'],
    'Atmospheric Pressure': [1010.82, 1011.43, 1018.72, 1026.25, 990.67, 1010.03, 990.87, 984.46, 999.44, 1012.13],
    'UV Index': [2, 7, 5, 7, 1, 2, 1, 1, 0, 8],
    'Season': ['Winter', 'Spring', 'Spring', 'Spring', 'Winter', 'Summer', 'Winter', 'Winter', 'Winter', 'Winter'],
    'Visibility (km)': [3.5, 10.0, 5.5, 1.0, 2.5, 5.0, 4.0, 3.5, 1.0, 7.5],
    'Location': ['inland', 'inland', 'mountain', 'coastal', 'mountain', 'inland', 'inland', 'inland', 'mountain', 'coastal'],
    'Weather Type': ['Rainy', 'Cloudy', 'Sunny', 'Sunny', 'Rainy', 'Cloudy', 'Snowy', 'Snowy', 'Snowy', 'Sunny']
}
df = pd.DataFrame(data)
print("Data asli:")
print(df.head())

# 2. Pre-processing Data (Mengubah Kategorikal ke Numerik)
print("\n2. Pre-processing Data")

# Menggunakan Label Encoding untuk kolom target (Weather Type)
le_target = LabelEncoder()
df['Weather Type_encoded'] = le_target.fit_transform(df['Weather Type'])
# Label mapping: 0=Cloudy, 1=Rainy, 2=Snowy, 3=Sunny (Urutan tergantung abjad)

# Menggunakan One-Hot Encoding untuk fitur kategorikal lainnya
categorical_features = ['Cloud Cover', 'Season', 'Location']
df_encoded = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# Menentukan Fitur (X) dan Target (Y)
# Drop kolom string asli dan kolom target asli
X = df_encoded.drop(columns=['Weather Type', 'Weather Type_encoded'])
y = df_encoded['Weather Type_encoded']

# Memastikan semua fitur X adalah numerik (sisa kolom string harus sudah di-drop)
X = X.select_dtypes(include=np.number)

print("Fitur setelah encoding dan pembersihan:")
print(X.head())

# 3. Scaling Fitur (Penting untuk KNN)
print("\n3. Scaling Fitur Numerik")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

print("Fitur setelah scaling (Standar Deviasi = 1, Mean = 0):")
print(X_scaled_df.head())

# 4. Membagi Data Training dan Testing
print("\n4. Membagi Data (Training: 60%, Testing: 40%)") # Adjusted for stratification
# Karena data sangat kecil (10 baris), split ini hanya untuk demonstrasi.
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled_df, y, test_size=0.4, random_state=42, stratify=y # Changed test_size to 0.4
)

print(f"Jumlah data Training: {len(X_train)} baris")
print(f"Jumlah data Testing: {len(X_test)} baris")

# 5. Implementasi Model KNN
print("\n5. Implementasi Model KNN (K=3)")
# Pilih nilai K (jumlah tetangga)
k_value = 3
knn_model = KNeighborsClassifier(n_neighbors=k_value)

# Melatih model
knn_model.fit(X_train, y_train)

# Melakukan Prediksi
y_pred = knn_model.predict(X_test)

# 6. Evaluasi Model
print("\n6. Evaluasi Model")

# Menghitung Akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi Model (K={k_value}): {accuracy:.4f}")

# Classification Report
# Catatan: Karena data sangat kecil dan stratify mungkin gagal menyeimbangkan
# kelas di data test (hanya 3 baris), hasil metrik mungkin tidak informatif.
print("\nClassification Report (Metrik per Kelas):")
print(classification_report(y_test, y_pred, target_names=le_target.classes_, zero_division=0))

1. Persiapan Data
Data asli:
   Temperature  Humidity  Wind Speed  Precipitation (%)    Cloud Cover  \
0         14.0        73         9.5               82.0  partly cloudy   
1         39.0        96         8.5               71.0  partly cloudy   
2         30.0        64         7.0               16.0          clear   
3         38.0        83         1.5               82.0          clear   
4         27.0        74        17.0               66.0       overcast   

   Atmospheric Pressure  UV Index  Season  Visibility (km)  Location  \
0               1010.82         2  Winter              3.5    inland   
1               1011.43         7  Spring             10.0    inland   
2               1018.72         5  Spring              5.5  mountain   
3               1026.25         7  Spring              1.0   coastal   
4                990.67         1  Winter              2.5  mountain   

  Weather Type  
0        Rainy  
1       Cloudy  
2        Sunny  
3        Sunny  
4       