<a href="https://colab.research.google.com/github/anik199/House_Prices_Advanced_Regression/blob/main/Autogluon_AutoML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

In [None]:
## define configuration
PATH_TRAIN = '/content/train.csv'
PATH_TEST = '/content/test.csv'

PATH_AUTOGLUON_SUBMISSION = 'submission_autogluon.csv'
PATH_EVALML_SUBMISSION = 'submission_evalml.csv'
PATH_FLAML_SUBMISSION = 'submission_flaml.csv'
PATH_H2OAML_SUBMISSION = 'submission_h2oaml.csv'
PATH_LAML_SUBMISSION = 'submission_laml.csv'

MAX_MODEL_RUNTIME_MINS = 15
MAX_MODEL_RUNTIME_SECS = MAX_MODEL_RUNTIME_MINS * 60

In [None]:
pip install datatable

In [None]:
## prepare data
import gc
import os
import shutil
import datatable as dt
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')

train = dt.fread(PATH_TRAIN)[:100000, :]
test = dt.fread(PATH_TEST)

target = train['SalePrice'].to_numpy().ravel()
test_ids = test['Id']

del train[:, ['Id', 'SalePrice']]
test = test[:, train.names]

In [None]:
! pip install delayed

###1. AutoGluon ML

In [None]:
## install packages
!python3 -m pip install -q "mxnet<2.0.0"
!python3 -m pip install -q autogluon
!python3 -m pip install -q -U graphviz
!python3 -m pip install -q scikit-learn==0.24.2

In [None]:
## import packages
from autogluon.tabular import TabularPredictor

In [None]:
## run model
train['SalePrice'] = dt.Frame(target)

model_autogluon = TabularPredictor(label='SalePrice')
model_autogluon.fit(train_data=train.to_pandas(), excluded_model_types=['KNN'], time_limit=MAX_MODEL_RUNTIME_SECS)

del train['SalePrice']

In [None]:
## generate predictions
preds_autogluon = model_autogluon.predict_proba(test.to_pandas())

In [None]:
## create submission
submission = dt.Frame(Id=test_ids, SalePrice=dt.Frame(preds_autogluon))
submission.to_csv('submission.csv')
submission.head()

In [None]:
## clear memory
shutil.rmtree('AutogluonModels')
del model_autogluon

gc.collect()

###AutoGluonML Score 0.12939

##2. H2o AutoML

In [None]:
!pip install h2o


In [None]:
# import packages
import h2o
from h2o.automl import H2OAutoML

In [None]:
## prepare data
h2o.init()

h2o_train = h2o.H2OFrame(train.to_pandas())
h2o_test = h2o.H2OFrame(test.to_pandas())

h2o_train['SalePrice'] = h2o.H2OFrame(target).asfactor()

In [None]:
#run model
features = [x for x in h2o_train.columns if x != 'SalePrice']

model_h2o = H2OAutoML(stopping_metric='AUC', max_runtime_secs=MAX_MODEL_RUNTIME_SECS)
model_h2o.train(x=features, y='SalePrice', training_frame=h2o_train)

In [None]:
#generate predictions
preds_h2o = model_h2o.leader.predict(h2o_test).as_data_frame()

In [None]:
#create submission
submission = dt.Frame(Id=test_ids, SalePrice=dt.Frame(preds_h2o['predict']))
submission.to_csv('submission1.csv')
submission.head()

###H2o AutoML Score 0.34716 (Very Bad)

##3. LightAuto ML

In [None]:
## install packages
!python3 -m pip install -q lightautoml
!python3 -m pip install -q -U torch
!python3 -m pip install -q -U torchvision

In [None]:
## import packages
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [None]:
## run model
train['SalePrice'] = dt.Frame(target)

model_laml = TabularAutoML(task = Task('reg'), timeout = MAX_MODEL_RUNTIME_SECS)
model_laml.fit_predict(train_data=train.to_pandas(), roles={'target': 'SalePrice'})

del train['SalePrice']

In [None]:
## generate predictions
preds_laml = model_laml.predict(test.to_pandas()).data.ravel()

In [None]:
## create submission
submission = dt.Frame(Id=test_ids, SalePrice=preds_laml)
submission.to_csv('submission2.csv')
submission.head()

###LightAutoML Score 0.12966