<a href="https://colab.research.google.com/github/hafidzpro/ml/blob/main/knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
file_path = 'weather_classification_data.csv'
data = pd.read_csv(file_path)

In [3]:
categorical_features = ['Cloud Cover', 'Season', 'Location']
numerical_features = ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 'Atmospheric Pressure', 'UV Index', 'Visibility (km)']


In [4]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [5]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', KNeighborsClassifier(n_neighbors=5))])

In [6]:
X = data.drop(columns=['Weather Type'])
y = data['Weather Type']


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [8]:
pipeline.fit(X_train, y_train)

In [9]:
y_pred = pipeline.predict(X_test)


In [10]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8946969696969697


In [11]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

      Cloudy       0.87      0.85      0.86       659
       Rainy       0.86      0.90      0.88       682
       Snowy       0.93      0.93      0.93       664
       Sunny       0.93      0.89      0.91       635

    accuracy                           0.89      2640
   macro avg       0.90      0.89      0.90      2640
weighted avg       0.90      0.89      0.89      2640



In [19]:
def predict_weather(temperature, humidity, wind_speed, precipitation, pressure, uv_index, visibility, cloud_cover, season, location):

    temperature = float(temperature)
    humidity = float(humidity)
    wind_speed = float(wind_speed)
    precipitation = float(precipitation)
    pressure = float(pressure)
    uv_index = float(uv_index)
    visibility = float(visibility)

    input_data = pd.DataFrame([[temperature, humidity, wind_speed, precipitation, pressure, uv_index, visibility, cloud_cover, season, location]],
                              columns=['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 'Atmospheric Pressure', 'UV Index', 'Visibility (km)', 'Cloud Cover', 'Season', 'Location'])


    predictions = pipeline.predict(input_data)
    print(predictions)
    return predictions

In [25]:
predict_weather(0.0, 7, 9.5, 82.0, 1010.82, 2, 3.5, "overcast", "Winter", "inland")

['Cloudy']


array(['Cloudy'], dtype=object)