In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [14]:
df=pd.read_excel("rainfall-and-daily-consumption-data-on-istanbul-dams.xlsx")

In [15]:
df = df[['Tarih', 'İstanbul günlük tüketim(m³/gün)']]
df = df.set_index("Tarih")
df.index = pd.to_datetime(df.index)
df['İstanbul günlük tüketim(m³/gün)'] = df['İstanbul günlük tüketim(m³/gün)'] // 100
df['İstanbul günlük tüketim(m³/gün)'] = df['İstanbul günlük tüketim(m³/gün)'].astype(float)
df = np.log(df)

In [16]:
train_size = int(len(df) * 0.80)
test_size = len(df) - train_size
train, test = df[0:train_size], df[train_size:len(df)]

In [17]:
def create_features(df):
    df=df.copy()
    df['dayofweek']= df.index.dayofweek
    df['quarter']= df.index.quarter
    df['month']= df.index.month
    df['year']= df.index.year
    df['dayofyear']= df.index.dayofyear
    return df
df=create_features(df)

In [18]:
train=create_features(train)
test=create_features(test)
FEATURES=['dayofweek','quarter','month','year','dayofyear']
TARGET='İstanbul günlük tüketim(m³/gün)'

In [19]:
X_train=train[FEATURES]
y_train=train[TARGET]

X_test=test[FEATURES]
y_test=test[TARGET]

In [20]:
!pip install lightgbm



In [21]:
from lightgbm import LGBMRegressor

In [22]:
lgbm = LGBMRegressor()
lgbm_model = lgbm.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000145 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 292
[LightGBM] [Info] Number of data points in the train set: 3771, number of used features: 5
[LightGBM] [Info] Start training from score 10.181615


In [23]:
y_pred = lgbm_model.predict(X_test, 
                            num_iteration = lgbm_model.best_iteration_)

In [24]:
# Tahminlerin gerçek değerlerle karşılaştırılması
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Mean Squared Error (MSE): {:.4f}".format(mse))
print("Root Mean Squared Error (RMSE): {:.4f}".format(rmse))

#r2_knn = r2_score(y_test, y_pred)
#print("KNN R-kare değeri:", r2_knn)


# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE) of Light GBM model:", mae)

# Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test, y_pred)
print("Mean Absolute Percentage Error (MAPE) of Light GBM model:", mape)

Mean Squared Error (MSE): 0.0036
Root Mean Squared Error (RMSE): 0.0599
Mean Absolute Error (MAE) of Light GBM model: 0.05187660030983401
Mean Absolute Percentage Error (MAPE) of Light GBM model: 0.5019117271041387


In [25]:
y_pred

array([10.24600775, 10.2461239 , 10.25010584, 10.25672249, 10.2519553 ,
       10.23894231, 10.23225381, 10.23421871, 10.23629516, 10.24575241,
       10.25755142, 10.24905657, 10.24708921, 10.24500212, 10.26602982,
       10.26692185, 10.28268436, 10.28801086, 10.29094507, 10.28016376,
       10.27931366, 10.27633492, 10.27844744, 10.28268436, 10.27746177,
       10.26637598, 10.26646782, 10.26438073, 10.26602202, 10.26676538,
       10.27584684, 10.2809195 , 10.27693968, 10.26156622, 10.26603958,
       10.28115198, 10.2672816 , 10.27316943, 10.29050012, 10.28933467,
       10.30265295, 10.30181764, 10.31276046, 10.30980978, 10.31374918,
       10.32092947, 10.32111272, 10.31573646, 10.31117471, 10.30956626,
       10.30424898, 10.30787203, 10.30379096, 10.30645028, 10.31460078,
       10.3164857 , 10.31805372, 10.31797977, 10.31911373, 10.30841526,
       10.31399412, 10.31605541, 10.33629593, 10.34015405, 10.3405654 ,
       10.34252085, 10.33622866, 10.33567832, 10.33148306, 10.32

In [26]:
len(y_pred)

943