In [None]:
import pandas as pd
import numpy as np
import NUTILS as nutils
import lightgbm as lgb
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from lightgbm import LGBMRegressor

In [None]:
df_t = pd.read_csv('/home/agi/Desktop/NOMAD/ensembling/train_full.csv')
df_s = pd.read_csv('/home/agi/Desktop/NOMAD/ensembling/test_full.csv')

X_train = nutils.drop_features(df_t)
X_submit = nutils.drop_features_s(df_s)
y_form = df_t["formation_energy_ev_natom"]
y_band = df_t["bandgap_energy_ev"]

In [None]:
params1 = {
    'num_leaves': 7,
    'objective': 'regression',
    'min_data_in_leaf': 18,
    'learning_rate': 0.04,
    'feature_fraction': 0.93,
    'bagging_fraction': 0.93,
    'bagging_freq': 1,
    'metric': 'l2',
    'num_threads': 1,
    'verbose': 0
}

params2 = {
    'num_leaves': 8,
    'objective': 'regression',
    'min_data_in_leaf': 18,
    'learning_rate': 0.04,
    'feature_fraction': 0.93,
    'bagging_fraction': 0.93,
    'bagging_freq': 1,
    'metric': 'l2',
    'num_threads': 1,
    'verbose': 0
}

boost_rounds_form = 230
boost_rounds_band = 721

model_form = LGBMRegressor(**params1, num_boost_round=boost_rounds_form)
model_band = LGBMRegressor(**params2, num_boost_round=boost_rounds_band)

In [None]:
k_fold = KFold(n_splits=10, random_state=7)
objective  = make_scorer(nutils.rmsle, greater_is_better=False)

scores_form = cross_val_score(model_form, X_train, y_form, scoring=objective, cv=k_fold)
scores_band = cross_val_score(model_band, X_train, y_band, scoring=objective, cv=k_fold)

rmse_scores = -scores_form - scores_band
nutils.display_scores(rmse_scores / 2)

In [None]:
lgb_train_form = lgb.Dataset(X_train, y_form)
lgb_train_band = lgb.Dataset(X_train, y_band)

gbm_form = lgb.train(
                params1,
                lgb_train_form,
                num_boost_round=boost_rounds_form)

gbm_band = lgb.train(
                params2,
                lgb_train_band,
                num_boost_round=boost_rounds_band)


submit_pred_form = gbm_form.predict(X_submit)
submit_pred_band = gbm_band.predict(X_submit)

gbm_form.save_model('/home/agi/Desktop/NOMAD/ensembling/lgbm_models/lgbm_form_model1')
gbm_band.save_model('/home/agi/Desktop/NOMAD/ensembling/lgbm_models/lgbm_band_model1')

In [None]:
submission = np.concatenate((submit_pred_form.reshape(600,1), submit_pred_band.reshape(600,1)), axis=1)
submit_df = pd.DataFrame(submission, columns=['formation_energy_ev_natom', "bandgap_energy_ev"])
submit_df[submit_df < 0] = 0
submit_df.insert(0, 'id', range(1, 601))

submit_df.to_csv("/home/agi/Desktop/NOMAD/ensembling/subs/lgbm_1.csv", index=False)