In [1]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import datetime
import random
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error,mean_absolute_percentage_error,mean_squared_error,r2_score
import wandb
from wandb.lightgbm import wandb_callback

In [2]:
os.environ["WANDB_NOTEBOOK_NAME"] = "hyperparameter_tuning_lightgbm_no_external_data.ipynb"

In [3]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mcemalicoskunirmak[0m ([33mthebiasbusters[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
df = pd.read_excel("C:\\Users\\cemal\\Desktop\\Dersler\\Data Informatics\\DI502\\Github\\Electricity_Consmption_EPIAS_data.xlsx")

dates_dt = pd.to_datetime(df.Tarih, format='%d.%m.%Y')
    
df.Tarih = dates_dt.copy()
df = df.sort_values("Tarih")

In [5]:
df['year']=df['Tarih'].dt.year 
df['month']=df['Tarih'].dt.month
df['day']=df['Tarih'].dt.day
df['season']=df['Tarih'].dt.quarter
df['week']=df['Tarih'].dt.week
df['dayofweek']=df['Tarih'].dt.dayofweek
df['hour']= df.Saat.astype("str").apply(lambda x: x[:2]).astype("int")

df = df.sort_values(["year","month","day","hour"])

target = "Tüketim Miktarı (MWh)"

df.loc[:,target] =  pd.Series([item.replace(".", "").replace(",",".") for item in df.loc[:,target]]).astype("float")

  df['week']=df['Tarih'].dt.week
  df.loc[:,target] =  pd.Series([item.replace(".", "").replace(",",".") for item in df.loc[:,target]]).astype("float")


In [6]:
train = df[df.Tarih <= "2023-09-30"].reset_index(drop=True).copy()
test = df[df.Tarih > "2023-09-30"].reset_index(drop=True).copy()

In [7]:
x_train = train.iloc[:,-7:]
x_test = test.iloc[:,-7:]
y_train = train.loc[:,target]
y_test = test.loc[:,target]

eval_set = [(x_test,y_test)]

In [8]:
sweep_config = {
    
    'method': 'bayes',  # Örneğin bayes, grid, random
    'metric': {
        
      'name': 'r2',
      'goal': 'maximize'
        
    },
    
    'parameters': {
        
        'learning_rate': {
            'min': 0.005,
            'max': 0.2
            
        },
        
        'max_depth': {
            'values': [4,5,6, 7]
            
        },
        
        'num_leaves': {
            'min': 20,
            'max': 50
        },
        
        'n_estimators': {
            'min':100,
            'max':1000
        },
        
        'subsample' : {
            'values' : [0.8,0.9,1]
        },
        
        'reg_alpha' : {
            
            'min': 0,
            'max':100
        },
        
        'reg_lambda' : {
            
            'min':0,
            'max':100
        }
    }
}

In [9]:
# sweep_id = wandb.sweep(sweep_config, project="ElectricConsumption", entity="thebiasbusters")

def train():
    # W&B entegrasyonunu başlat
    with wandb.init() as run:
        config = run.config

        # Modelinizi konfigürasyona göre oluşturun
        model = LGBMRegressor(
            learning_rate=config.learning_rate,
            max_depth=int(config.max_depth),
            num_leaves=int(config.num_leaves),
            n_estimators=int(config.n_estimators),
            subsample= config.subsample,
            reg_alpha= config.reg_alpha,
            reg_lambda= config.reg_lambda,
            random_state=42
        )

        # Modeli eğitin
        model.fit(x_train, y_train, eval_set=eval_set)
        preds = model.predict(x_test)

        # Calculate R-squared on the test set
        r2 = r2_score(y_test, preds)

        # Log the R-squared score
        wandb.log({'r2': r2})

Create sweep with ID: imyaimnq
Sweep URL: https://wandb.ai/thebiasbusters/ElectricConsumption/sweeps/imyaimnq


In [10]:
# Sweep agent'larını başlatın ve sweep ID'si ile ilişkilendirin
wandb.agent("imyaimnq", train, count=100)

[34m[1mwandb[0m: Agent Starting Run: 12qzzqeb with config:
[34m[1mwandb[0m: 	learning_rate: 0.12366309393238084
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 477
[34m[1mwandb[0m: 	num_leaves: 36
[34m[1mwandb[0m: 	reg_alpha: 73
[34m[1mwandb[0m: 	reg_lambda: 97
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.93305


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: orjyhnbf with config:
[34m[1mwandb[0m: 	learning_rate: 0.0055539285987642115
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 467
[34m[1mwandb[0m: 	num_leaves: 32
[34m[1mwandb[0m: 	reg_alpha: 6
[34m[1mwandb[0m: 	reg_lambda: 45
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.88964


[34m[1mwandb[0m: Agent Starting Run: 1cg2d49t with config:
[34m[1mwandb[0m: 	learning_rate: 0.044719404156379934
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 920
[34m[1mwandb[0m: 	num_leaves: 48
[34m[1mwandb[0m: 	reg_alpha: 97
[34m[1mwandb[0m: 	reg_lambda: 72
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.93501


[34m[1mwandb[0m: Agent Starting Run: my2gj5iw with config:
[34m[1mwandb[0m: 	learning_rate: 0.00943093290131122
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 844
[34m[1mwandb[0m: 	num_leaves: 46
[34m[1mwandb[0m: 	reg_alpha: 92
[34m[1mwandb[0m: 	reg_lambda: 85
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.92998


[34m[1mwandb[0m: Agent Starting Run: jdux5sxs with config:
[34m[1mwandb[0m: 	learning_rate: 0.17263852257373366
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 879
[34m[1mwandb[0m: 	num_leaves: 45
[34m[1mwandb[0m: 	reg_alpha: 78
[34m[1mwandb[0m: 	reg_lambda: 90
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.94249


[34m[1mwandb[0m: Agent Starting Run: 85zmn5hx with config:
[34m[1mwandb[0m: 	learning_rate: 0.19563074987436957
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 808
[34m[1mwandb[0m: 	num_leaves: 50
[34m[1mwandb[0m: 	reg_alpha: 100
[34m[1mwandb[0m: 	reg_lambda: 95
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.91973


[34m[1mwandb[0m: Agent Starting Run: 8gtmrpus with config:
[34m[1mwandb[0m: 	learning_rate: 0.17181146536435135
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 950
[34m[1mwandb[0m: 	num_leaves: 34
[34m[1mwandb[0m: 	reg_alpha: 88
[34m[1mwandb[0m: 	reg_lambda: 66
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.93819


[34m[1mwandb[0m: Agent Starting Run: dgeypzsy with config:
[34m[1mwandb[0m: 	learning_rate: 0.189405340337235
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 559
[34m[1mwandb[0m: 	num_leaves: 41
[34m[1mwandb[0m: 	reg_alpha: 74
[34m[1mwandb[0m: 	reg_lambda: 94
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.92502


[34m[1mwandb[0m: Agent Starting Run: k7o4vq10 with config:
[34m[1mwandb[0m: 	learning_rate: 0.18261919110176353
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 984
[34m[1mwandb[0m: 	num_leaves: 35
[34m[1mwandb[0m: 	reg_alpha: 96
[34m[1mwandb[0m: 	reg_lambda: 77
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.91676


[34m[1mwandb[0m: Agent Starting Run: m1g7tsg1 with config:
[34m[1mwandb[0m: 	learning_rate: 0.11732599029565892
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 841
[34m[1mwandb[0m: 	num_leaves: 40
[34m[1mwandb[0m: 	reg_alpha: 75
[34m[1mwandb[0m: 	reg_lambda: 74
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.92116


[34m[1mwandb[0m: Agent Starting Run: 63gxtxlt with config:
[34m[1mwandb[0m: 	learning_rate: 0.13911435100017552
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 905
[34m[1mwandb[0m: 	num_leaves: 27
[34m[1mwandb[0m: 	reg_alpha: 85
[34m[1mwandb[0m: 	reg_lambda: 77
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.90664


[34m[1mwandb[0m: Agent Starting Run: y6ntconx with config:
[34m[1mwandb[0m: 	learning_rate: 0.1403055389258876
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 777
[34m[1mwandb[0m: 	num_leaves: 42
[34m[1mwandb[0m: 	reg_alpha: 81
[34m[1mwandb[0m: 	reg_lambda: 95
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.93275


[34m[1mwandb[0m: Agent Starting Run: fyjyvoac with config:
[34m[1mwandb[0m: 	learning_rate: 0.1959467993677957
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 971
[34m[1mwandb[0m: 	num_leaves: 45
[34m[1mwandb[0m: 	reg_alpha: 54
[34m[1mwandb[0m: 	reg_lambda: 99
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.91528


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iwn1ihgh with config:
[34m[1mwandb[0m: 	learning_rate: 0.01303321548959269
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 926
[34m[1mwandb[0m: 	num_leaves: 50
[34m[1mwandb[0m: 	reg_alpha: 93
[34m[1mwandb[0m: 	reg_lambda: 68
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.91136


[34m[1mwandb[0m: Agent Starting Run: afmfm211 with config:
[34m[1mwandb[0m: 	learning_rate: 0.14963502328824668
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 750
[34m[1mwandb[0m: 	num_leaves: 42
[34m[1mwandb[0m: 	reg_alpha: 71
[34m[1mwandb[0m: 	reg_lambda: 81
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.90955


[34m[1mwandb[0m: Agent Starting Run: pp16knhq with config:
[34m[1mwandb[0m: 	learning_rate: 0.010168836669947102
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 789
[34m[1mwandb[0m: 	num_leaves: 42
[34m[1mwandb[0m: 	reg_alpha: 25
[34m[1mwandb[0m: 	reg_lambda: 85
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.93768


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ujfj3yqy with config:
[34m[1mwandb[0m: 	learning_rate: 0.1217468255861644
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 195
[34m[1mwandb[0m: 	num_leaves: 21
[34m[1mwandb[0m: 	reg_alpha: 65
[34m[1mwandb[0m: 	reg_lambda: 68
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.93985


[34m[1mwandb[0m: Agent Starting Run: dv8rexfv with config:
[34m[1mwandb[0m: 	learning_rate: 0.1858881336543904
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 669
[34m[1mwandb[0m: 	num_leaves: 28
[34m[1mwandb[0m: 	reg_alpha: 57
[34m[1mwandb[0m: 	reg_lambda: 60
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.91879


[34m[1mwandb[0m: Agent Starting Run: pdomdm7g with config:
[34m[1mwandb[0m: 	learning_rate: 0.02270042986541898
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 194
[34m[1mwandb[0m: 	num_leaves: 40
[34m[1mwandb[0m: 	reg_alpha: 25
[34m[1mwandb[0m: 	reg_lambda: 32
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.93589


[34m[1mwandb[0m: Agent Starting Run: 0a1j1kvc with config:
[34m[1mwandb[0m: 	learning_rate: 0.013279880621739532
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 244
[34m[1mwandb[0m: 	num_leaves: 22
[34m[1mwandb[0m: 	reg_alpha: 51
[34m[1mwandb[0m: 	reg_lambda: 5
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.89597


[34m[1mwandb[0m: Agent Starting Run: 4gxl1js9 with config:
[34m[1mwandb[0m: 	learning_rate: 0.06533767263228625
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 674
[34m[1mwandb[0m: 	num_leaves: 26
[34m[1mwandb[0m: 	reg_alpha: 72
[34m[1mwandb[0m: 	reg_lambda: 53
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.93035


[34m[1mwandb[0m: Agent Starting Run: 2lk7cqes with config:
[34m[1mwandb[0m: 	learning_rate: 0.023243798290626515
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	n_estimators: 799
[34m[1mwandb[0m: 	num_leaves: 25
[34m[1mwandb[0m: 	reg_alpha: 14
[34m[1mwandb[0m: 	reg_lambda: 51
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.90514


[34m[1mwandb[0m: Agent Starting Run: uucdg8hm with config:
[34m[1mwandb[0m: 	learning_rate: 0.047613852254222
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 305
[34m[1mwandb[0m: 	num_leaves: 28
[34m[1mwandb[0m: 	reg_alpha: 78
[34m[1mwandb[0m: 	reg_lambda: 85
[34m[1mwandb[0m: 	subsample: 0.8


0,1
r2,▁

0,1
r2,0.92491


[34m[1mwandb[0m: Agent Starting Run: qlc6bted with config:
[34m[1mwandb[0m: 	learning_rate: 0.024824728680292396
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 938
[34m[1mwandb[0m: 	num_leaves: 34
[34m[1mwandb[0m: 	reg_alpha: 52
[34m[1mwandb[0m: 	reg_lambda: 26
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.89731


[34m[1mwandb[0m: Agent Starting Run: waqeqcfq with config:
[34m[1mwandb[0m: 	learning_rate: 0.12536521427838637
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 296
[34m[1mwandb[0m: 	num_leaves: 39
[34m[1mwandb[0m: 	reg_alpha: 71
[34m[1mwandb[0m: 	reg_lambda: 77
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.84093


[34m[1mwandb[0m: Agent Starting Run: w4evry6e with config:
[34m[1mwandb[0m: 	learning_rate: 0.07846364249409718
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 487
[34m[1mwandb[0m: 	num_leaves: 46
[34m[1mwandb[0m: 	reg_alpha: 10
[34m[1mwandb[0m: 	reg_lambda: 7
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.88837


[34m[1mwandb[0m: Agent Starting Run: nqd1jou6 with config:
[34m[1mwandb[0m: 	learning_rate: 0.05540834643699604
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 622
[34m[1mwandb[0m: 	num_leaves: 22
[34m[1mwandb[0m: 	reg_alpha: 59
[34m[1mwandb[0m: 	reg_lambda: 62
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.94939


[34m[1mwandb[0m: Agent Starting Run: hiiz7jx3 with config:
[34m[1mwandb[0m: 	learning_rate: 0.091106260501861
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 658
[34m[1mwandb[0m: 	num_leaves: 26
[34m[1mwandb[0m: 	reg_alpha: 53
[34m[1mwandb[0m: 	reg_lambda: 61
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.93751


[34m[1mwandb[0m: Agent Starting Run: z7jfzp5c with config:
[34m[1mwandb[0m: 	learning_rate: 0.06098838572283356
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 656
[34m[1mwandb[0m: 	num_leaves: 25
[34m[1mwandb[0m: 	reg_alpha: 77
[34m[1mwandb[0m: 	reg_lambda: 92
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.91655


[34m[1mwandb[0m: Agent Starting Run: 5dz7emps with config:
[34m[1mwandb[0m: 	learning_rate: 0.1389413524058735
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	n_estimators: 121
[34m[1mwandb[0m: 	num_leaves: 23
[34m[1mwandb[0m: 	reg_alpha: 75
[34m[1mwandb[0m: 	reg_lambda: 62
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.8963


[34m[1mwandb[0m: Agent Starting Run: aia6n6iu with config:
[34m[1mwandb[0m: 	learning_rate: 0.17607320676790433
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 920
[34m[1mwandb[0m: 	num_leaves: 44
[34m[1mwandb[0m: 	reg_alpha: 100
[34m[1mwandb[0m: 	reg_lambda: 86
[34m[1mwandb[0m: 	subsample: 0.9


0,1
r2,▁

0,1
r2,0.92672


[34m[1mwandb[0m: Agent Starting Run: 0rx7pths with config:
[34m[1mwandb[0m: 	learning_rate: 0.05975797090916945
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 487
[34m[1mwandb[0m: 	num_leaves: 30
[34m[1mwandb[0m: 	reg_alpha: 52
[34m[1mwandb[0m: 	reg_lambda: 70
[34m[1mwandb[0m: 	subsample: 1


0,1
r2,▁

0,1
r2,0.90497


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
