In [106]:
import pandas as pd
import os
import numpy as np
import glob
import lightgbm as lgb
import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

Reading and Processing CSV File

In [107]:
df = pd.read_csv ('Catalogue.csv', encoding='latin-1')
dfProcessed = pd.DataFrame(df, columns = ['Zaman (UTC)', 'Enlem', 'Boylam', 'Derinlik', 'Büyüklük'])
dfProcessed['Ay'] = pd.DatetimeIndex(df['Zaman (UTC)']).month
dfProcessed = dfProcessed.drop(columns="Zaman (UTC)")
dfProcessed = dfProcessed[['Enlem', 'Boylam', 'Ay', 'Derinlik', 'Büyüklük']]
print(dfProcessed)


         Enlem   Boylam  Ay  Derinlik  Büyüklük
0      35.7380  24.8870   6     37.50       4.0
1      36.4261  27.0918   6     18.97       4.1
2      36.4461  27.1643   6     11.09       4.1
3      37.5625  36.1321   6     14.37       4.2
4      35.3220  32.8156   5     25.63       4.3
...        ...      ...  ..       ...       ...
12196  38.1200  31.2900   2     10.00       5.7
12197  37.8300  27.7100   1     10.00       4.4
12198  37.6300  37.3700   1     10.00       4.8
12199  39.5400  26.1400   1     10.00       5.2
12200  37.7900  28.2100   1     10.00       4.4

[12201 rows x 5 columns]


Dividing Data into 3 pieces as Train, Validation and Test data

In [108]:
r, c = dfProcessed.shape
print(r)
trainSize = int(0.7 * r)
validSize = int(0.1 * r)
trainLastIndex = trainSize
validLastIndex = trainLastIndex + validSize
dfTrain = dfProcessed.iloc[:trainLastIndex]
dfValid = dfProcessed.iloc[trainLastIndex+1:validLastIndex]
dfTest = dfProcessed.iloc[validLastIndex+1:]

12201


In [109]:
dfTrainFeature = dfTrain.drop(columns="Büyüklük")
dfTrainTarget = dfTrain[['Büyüklük']]

dfValidFeature = dfValid.drop(columns="Büyüklük")
dfValidTarget = dfValid[['Büyüklük']]

dfTestFeature = dfTest.drop(columns="Büyüklük")
dfTestTarget = dfTest[['Büyüklük']]

Setting Parameters of Model

In [110]:
train_data = lgb.Dataset(dfTrainFeature, label=dfTrainTarget)
valid_data = lgb.Dataset(dfValidFeature, label=dfValidTarget)

parameters = {'objective': 'regression',
              'metric': 'root_mean_squared_error',
              'boosting': 'gbdt',
              'num_leaves': 63,
              'bagging_freq': 20,
              'learning_rate': 0.01,
              'verbose': -1
             }

Creating Model ve Training

In [111]:
model_lgbm = lgb.train(parameters,
                            train_data,
                            valid_sets=valid_data,
                            num_boost_round=1000,
                            early_stopping_rounds=50)

[1]	valid_0's rmse: 0.395667
Training until validation scores don't improve for 50 rounds.
[2]	valid_0's rmse: 0.395405
[3]	valid_0's rmse: 0.395146
[4]	valid_0's rmse: 0.394905
[5]	valid_0's rmse: 0.394663
[6]	valid_0's rmse: 0.394433
[7]	valid_0's rmse: 0.394215
[8]	valid_0's rmse: 0.394043
[9]	valid_0's rmse: 0.393858
[10]	valid_0's rmse: 0.393694
[11]	valid_0's rmse: 0.393539
[12]	valid_0's rmse: 0.393349
[13]	valid_0's rmse: 0.3932
[14]	valid_0's rmse: 0.393059
[15]	valid_0's rmse: 0.392932
[16]	valid_0's rmse: 0.392753
[17]	valid_0's rmse: 0.392633
[18]	valid_0's rmse: 0.392451
[19]	valid_0's rmse: 0.392316
[20]	valid_0's rmse: 0.392177
[21]	valid_0's rmse: 0.392019
[22]	valid_0's rmse: 0.391889
[23]	valid_0's rmse: 0.391746
[24]	valid_0's rmse: 0.391625
[25]	valid_0's rmse: 0.391507
[26]	valid_0's rmse: 0.391361
[27]	valid_0's rmse: 0.391247
[28]	valid_0's rmse: 0.391109
[29]	valid_0's rmse: 0.391026
[30]	valid_0's rmse: 0.390928
[31]	valid_0's rmse: 0.39078
[32]	valid_0's rmse:

Saving and Loading Model

In [112]:
model_lgbm.save_model('lgbmModel.txt', num_iteration=model_lgbm.best_iteration)

loadedModel = lgb.Booster(model_file='lgbmModel.txt')

In [113]:
pred = loadedModel.predict(dfTestFeature,  num_iteration = loadedModel.best_iteration)

In [114]:
npTestVal = dfTestTarget.to_numpy().flatten()

Calculating RMS Error

In [115]:
from sklearn.metrics import mean_squared_error

rms = mean_squared_error(npTestVal, pred, squared=False)
print(rms)

0.793554760418947
