In [11]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.multioutput import MultiOutputRegressor
import warnings
warnings.filterwarnings("ignore")


SEED = 42
n_splits = 8
n_estimators=5000
early_stopping_rounds = 100

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
eq_df = pd.read_csv("eq.csv")
sample_submission = pd.read_csv("sample_submission.csv")



# test_df.describe()
# test_df.describe()
# test_df.info()
train_df.info()
train_df = train_df.drop(columns=["Unnamed: 0"])

print(f"Training data shape: {train_df.shape}")
print(f"Earthquake data shape: {eq_df.shape}")
print(f"Test data shape: {test_df.shape}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1250 entries, 0 to 1249
Data columns (total 21 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  1250 non-null   int64  
 1   year        1250 non-null   int64  
 2   month       1250 non-null   int64  
 3   day         1250 non-null   int64  
 4   hour        1250 non-null   int64  
 5   min         1250 non-null   int64  
 6   sec         1250 non-null   int64  
 7   lat         1250 non-null   float64
 8   lon         1250 non-null   float64
 9   depth       1250 non-null   int64  
 10  class       1250 non-null   float64
 11  year_as     1250 non-null   int64  
 12  month_as    1250 non-null   int64  
 13  day_as      1250 non-null   int64  
 14  hour_as     1250 non-null   int64  
 15  min_as      1250 non-null   int64  
 16  sec_as      1250 non-null   int64  
 17  lat_as      1250 non-null   float64
 18  lon_as      1250 non-null   float64
 19  depth_as    1250 non-null  

In [None]:
target_cols = ["year_as", "month_as", "day_as", "hour_as", "min_as", "sec_as",
                  "lat_as", "lon_as", "depth_as", "class_as"]

X = train_df.drop(columns=target_cols)
y = train_df[target_cols]

X_test = test_df[X.columns].copy()

In [10]:
nFolds = 7
cv = KFold(n_splits=nFolds, random_state=SEED, shuffle=True)

predictions = {col: np.zeros(len(X_test)) for col in target_cols}

for col in target_cols:
    fold_preds = np.zeros(len(X_test))
    print(f"Training for target: {col}")
    for train_idx, valid_idx in cv.split(X, y):
        X_train, y_train = X.iloc[train_idx].copy(), y.iloc[train_idx][col]
        X_valid, y_valid = X.iloc[valid_idx].copy(), y.iloc[valid_idx][col]
        
        model = lgb.LGBMRegressor(
            n_estimators=1000,
            max_depth=-1,
            num_leaves=1024,
            colsample_bytree=0.7,
            learning_rate=0.03,
            objective='regression',
            metric='mae',
            verbosity=-1,  
            device='gpu',  
            random_state=87
        )
        
        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            eval_metric='mae',
            callbacks=[lgb.early_stopping(stopping_rounds=early_stopping_rounds, verbose=True)]
        )


        
        fold_preds += model.predict(X_test) / nFolds
    predictions[col] = fold_preds

Training for target: year_as
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[995]	valid_0's l1: 0.468006
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[809]	valid_0's l1: 0.43077
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[996]	valid_0's l1: 0.472861
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[611]	valid_0's l1: 0.398632
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[947]	valid_0's l1: 0.392085
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[902]	valid_0's l1: 0.369031
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[885]	valid_0's l1: 0.402725
Training for target: month_as
Training until validation scores don't im

In [14]:
submission = pd.DataFrame({
    'id_eq': test_df['id_eq'],
    'year_as': predictions['year_as'],
    'month_as': predictions['month_as'],
    'day_as': predictions['day_as'],
    'hour_as': predictions['hour_as'],
    'min_as': predictions['min_as'],
    'sec_as': predictions['sec_as'],
    'lat_as': predictions['lat_as'],
    'lon_as': predictions['lon_as'],
    'depth_as': predictions['depth_as'],
    'class_as': predictions['class_as']
})

submission.to_csv('submission.csv', index=False)
print('Submission file saved as submission.csv')


Submission file saved as submission.csv
