# SmartWater â€“ ML Modellerinin EÄŸitimi ve DeÄŸerlendirilmesi
Bu defterde iki farklÄ± tahmin modeli eÄŸitilecek ve performans metrikleri karÅŸÄ±laÅŸtÄ±rÄ±larak en iyi model seÃ§ilecektir.


In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [3]:
df = pd.read_csv("data/temiz_su_tuketimi_ml_ready.csv")
print("Temiz veri seti yÃ¼klendi.")
df.head()


Temiz veri seti yÃ¼klendi.


Unnamed: 0,YIL,AY,ORTALAMA_TUKETIM_MEAN,ABONE_ADEDI_SUM,KAYIT_SAYISI,ORT_DOLULUK,TOPLAM_SU,BARAJ_SAYISI,TUKETIM
0,2024,1,170.5,7.0,1.0,35.19,280086000.0,6.0,76.62
1,2022,10,170.5,7.0,1.0,40.355,303945000.0,6.0,148.1
2,2021,8,170.5,7.0,1.0,48.658333,348099000.0,6.0,132.88
3,2022,1,58.0,1.0,1.0,52.893333,324869000.0,6.0,141.36
4,2021,3,1457.0,56.0,1.0,69.538333,440241000.0,6.0,111.44


BaÄŸÄ±mlÄ± ve BaÄŸÄ±msÄ±z DeÄŸiÅŸkenlerin AyrÄ±lmasÄ±

In [4]:
X = df.drop("TUKETIM", axis=1)   # â†’ Ã–zellikler
y = df["TUKETIM"]                # â†’ Tahmin edilecek deÄŸer

print("X boyutu:", X.shape)
print("y boyutu:", y.shape)


X boyutu: (1350, 8)
y boyutu: (1350,)


Train-Test Split Ä°ÅŸlemi

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.20,   # â†’ verinin %20â€™si test iÃ§in ayrÄ±ldÄ±
    random_state = 42   # â†’ tekrarlanabilirlik
)

print("EÄŸitim seti:", X_train.shape)
print("Test seti   :", X_test.shape)


EÄŸitim seti: (1080, 8)
Test seti   : (270, 8)


MODEL 1: Lineer Regresyonun EÄŸitilmesi

In [6]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

y_pred_lr = linear_model.predict(X_test)

# Metrikler
mae_lr  = mean_absolute_error(y_test, y_pred_lr)
rmse_lr = mean_squared_error(y_test, y_pred_lr) ** 0.5
r2_lr   = r2_score(y_test, y_pred_lr)

print("----- Lineer Regresyon SonuÃ§larÄ± -----")
print("MAE :", round(mae_lr, 2))
print("RMSE:", round(rmse_lr, 2))
print("RÂ²  :", round(r2_lr, 4))


----- Lineer Regresyon SonuÃ§larÄ± -----
MAE : 34.03
RMSE: 43.34
RÂ²  : -0.0063


MODEL 2: Random Forest Regressorâ€™Ä±n EÄŸitilmesi

In [7]:
rf_model = RandomForestRegressor(
    n_estimators = 200,
    random_state = 42
)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Metrikler
mae_rf  = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = mean_squared_error(y_test, y_pred_rf) ** 0.5
r2_rf   = r2_score(y_test, y_pred_rf)

print("----- Random Forest SonuÃ§larÄ± -----")
print("MAE :", round(mae_rf, 2))
print("RMSE:", round(rmse_rf, 2))
print("RÂ²  :", round(r2_rf, 4))


----- Random Forest SonuÃ§larÄ± -----
MAE : 36.8
RMSE: 47.88
RÂ²  : -0.2278


MODEL 3: Support Vector Regression (SVR)

In [8]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

# SVR Ã¶lÃ§eklendirme ister â†’ X verisini scale edelim
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svr_model = SVR(kernel="rbf", C=100, gamma=0.1)

svr_model.fit(X_train_scaled, y_train)
y_pred_svr = svr_model.predict(X_test_scaled)

mae_svr  = mean_absolute_error(y_test, y_pred_svr)
rmse_svr = mean_squared_error(y_test, y_pred_svr) ** 0.5
r2_svr   = r2_score(y_test, y_pred_svr)

print("----- SVR SonuÃ§larÄ± -----")
print("MAE :", round(mae_svr, 2))
print("RMSE:", round(rmse_svr, 2))
print("RÂ²  :", round(r2_svr, 4))


----- SVR SonuÃ§larÄ± -----
MAE : 35.19
RMSE: 44.77
RÂ²  : -0.0734


MODEL 4: KNN Regressor

In [9]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred_knn = knn_model.predict(X_test)

mae_knn  = mean_absolute_error(y_test, y_pred_knn)
rmse_knn = mean_squared_error(y_test, y_pred_knn) ** 0.5
r2_knn   = r2_score(y_test, y_pred_knn)

print("----- KNN Regrasyon SonuÃ§larÄ± -----")
print("MAE :", round(mae_knn, 2))
print("RMSE:", round(rmse_knn, 2))
print("RÂ²  :", round(r2_knn, 4))


----- KNN Regrasyon SonuÃ§larÄ± -----
MAE : 37.13
RMSE: 48.13
RÂ²  : -0.2406


MODEL 5: Decision Tree Regressor

In [10]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor(
    max_depth=None,
    random_state=42
)

dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

mae_dt  = mean_absolute_error(y_test, y_pred_dt)
rmse_dt = mean_squared_error(y_test, y_pred_dt) ** 0.5
r2_dt   = r2_score(y_test, y_pred_dt)

print("----- Decision Tree SonuÃ§larÄ± -----")
print("MAE :", round(mae_dt, 2))
print("RMSE:", round(rmse_dt, 2))
print("RÂ²  :", round(r2_dt, 4))


----- Decision Tree SonuÃ§larÄ± -----
MAE : 41.38
RMSE: 53.92
RÂ²  : -0.5572


TÃœM MODELLERÄ°N KARÅžILAÅžTIRMA TABLOSU

In [11]:
results = pd.DataFrame({
    "Model": [
        "Lineer Regresyon",
        "Random Forest",
        "SVR",
        "KNN Regressor",
        "Decision Tree"
    ],
    "MAE":  [mae_lr, mae_rf, mae_svr, mae_knn, mae_dt],
    "RMSE": [rmse_lr, rmse_rf, rmse_svr, rmse_knn, rmse_dt],
    "RÂ²":   [r2_lr, r2_rf, r2_svr, r2_knn, r2_dt]
})

results


Unnamed: 0,Model,MAE,RMSE,RÂ²
0,Lineer Regresyon,34.032925,43.344515,-0.006264
1,Random Forest,36.804624,47.879336,-0.227835
2,SVR,35.190398,44.767313,-0.07341
3,KNN Regressor,37.125578,48.128271,-0.240635
4,Decision Tree,41.382335,53.919914,-0.557192


En iyi modelin seÃ§ilmesi

In [12]:
best_model = results.loc[results["RMSE"].idxmin()]
print("ðŸ“Œ En iyi model:")
print(best_model)


ðŸ“Œ En iyi model:
Model    Lineer Regresyon
MAE             34.032925
RMSE            43.344515
RÂ²              -0.006264
Name: 0, dtype: object


In [13]:
import os
import json
import joblib
from sklearn.linear_model import LinearRegression

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

os.makedirs("ml_service/artifacts", exist_ok=True)

joblib.dump(linear_model, "ml_service/artifacts/best_model.joblib")

feature_cols = list(X.columns)
with open("ml_service/artifacts/feature_cols.json", "w", encoding="utf-8") as f:
    json.dump(feature_cols, f, ensure_ascii=False)

print("âœ… Model ve feature listesi kaydedildi")
print("â†’ ml_service/artifacts/best_model.joblib")
print("â†’ ml_service/artifacts/feature_cols.json")


âœ… Model ve feature listesi kaydedildi
â†’ ml_service/artifacts/best_model.joblib
â†’ ml_service/artifacts/feature_cols.json


In [14]:
import os
os.getcwd()



'C:\\Users\\DELL\\PycharmProjects\\SmartWaterML'

In [15]:
import joblib

# En iyi model olan linear_model'i kaydediyoruz
joblib.dump(linear_model, 'ml_service/artifacts/su_tahmin_modeli.pkl')
print("âœ… Model 'su_tahmin_modeli.pkl' olarak kaydedildi!")

âœ… Model 'su_tahmin_modeli.pkl' olarak kaydedildi!
