In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/DataThinkers/Datasets/refs/heads/main/DS/water_potability.csv')

### Check Null Values In The Dataset¶¶

In [3]:
data.isnull().sum()

ph                 491
Hardness             0
Solids               0
Chloramines          0
Sulfate            781
Conductivity         0
Organic_carbon       0
Trihalomethanes    162
Turbidity            0
Potability           0
dtype: int64

### Splitting The Dataset Into The Training Set And Test Set

In [4]:
from sklearn.model_selection import train_test_split
train_data,test_data = train_test_split(data,test_size=0.20,random_state=42)

In [5]:
def fill_missing_with_median(df):
    for column in df.columns:
        if df[column].isnull().any():
            median_value = df[column].median()
            df[column].fillna(median_value,inplace=True)
    return df


# Fill missing values with median
train_processed_data = fill_missing_with_median(train_data)
test_processed_data = fill_missing_with_median(test_data)

In [12]:
from sklearn.ensemble import RandomForestClassifier
import pickle
X_train = train_processed_data.iloc[:,0:-1].values
y_train = train_processed_data.iloc[:,-1].values

n_estimators = 500

clf = RandomForestClassifier(n_estimators=n_estimators)
clf.fit(X_train,y_train)

# save 
pickle.dump(clf,open("rf_model.pkl","wb"))

In [13]:
X_test = test_processed_data.iloc[:,0:-1].values
y_test = test_processed_data.iloc[:,-1].values

In [14]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

model = pickle.load(open('model.pkl',"rb"))

y_pred = model.predict(X_test)

acc = accuracy_score(y_test,y_pred)
precision = precision_score(y_test,y_pred)
recall = recall_score(y_test,y_pred)
f1_score = f1_score(y_test,y_pred)

print("acc",acc)
print("precision", precision)
print("recall", recall)
print("f1-score",f1_score)

acc 0.6890243902439024
precision 0.6538461538461539
recall 0.3483606557377049
f1-score 0.45454545454545453
