In [None]:
!pip install pytorch-tabular


In [23]:
from pytorch_tabular import TabularModel
from pytorch_tabular.models import (
    FTTransformerConfig,
    TabNetModelConfig,
    TabTransformerConfig
)
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

# Classification

## Download the dataset
The "Adult" dataset, also known as the "Census Income" or "adult.data" dataset, is widely used in machine learning for tasks that involve classifying two different categories. It was created by Barry Becker from data collected by the United States Census Bureau in 1994. The main goal with this data is to predict if a person's income is over $50,000 a year based on various other pieces of information.


In [2]:
import pandas as pd

url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']
data = pd.read_csv(url, names=column_names)

# Save the dataframe into a CSV file
data.to_csv('adult.csv', index=False)


## Create train, test split

In [15]:
# Split the data into train and test sets
train = data.sample(frac=0.8, random_state=0)
test = data.drop(train.index)

# Specify the categorical and numerical columns
cat_col_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
num_col_names = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
target_col_name = ["income"]


## Set up the configurations
This is a critical step in the procedure. You'll need to supply four configurations (most of them come with sensible default values), which will guide the rest of the process.

1. DataConfig - This is where you specify the names of the target, categorical, and numerical columns, as well as any transformations that need to be done.

2. ModelConfig - Each model has its own specific configuration. This config not only determines the model we'll train but also allows you to set the model's hyperparameters.

3. TrainerConfig - This config allows you to tailor the training process by setting parameters such as batch size, number of epochs, early stopping criteria, etc. Most of these parameters are taken directly from PyTorch Lightning and are passed to the underlying Trainer object during the training process.

4. OptimizerConfig - This configuration allows you to define and utilize various optimizers and learning rate schedulers. Standard PyTorch Optimizers and Learning Rate Schedulers are supported. If you want to use custom optimizers, you can override this by using the parameter in the fit method. Remember, the custom optimizer should be compatible with PyTorch.

In [16]:
# Data Configuration
data_config = DataConfig(
    target=target_col_name,
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
    continuous_feature_transform="quantile_normal",
    normalize_continuous_features=True
)

# Trainer Configuration
trainer_config = TrainerConfig(
    auto_lr_find=True,
    batch_size=256,
    max_epochs=100,
    early_stopping="valid_loss",
    early_stopping_mode="min",
    early_stopping_patience=5,
    checkpoints="valid_loss",
    load_best=True
)

# Optimizer Configuration
optimizer_config = OptimizerConfig()

# Model Configuration
head_config = LinearHeadConfig(
    layers="",
    dropout=0.1,
    initialization="kaiming"
).__dict__

In the following section, we will train our classifier with 3 models, TabTransformer, FT Transformer and Tabnet

# TabTransformer

In [None]:
model_config = TabTransformerConfig(
    task="classification",
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
    learning_rate = 1e-3
)

In [17]:
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

2023-06-18 17:44:30,519 - {pytorch_tabular.tabular_model:105} - INFO - Experiment Tracking is turned off
INFO:pytorch_tabular.tabular_model:Experiment Tracking is turned off


In [18]:
tabular_model.fit(train=train)
tabular_model.evaluate(test)

INFO:lightning_fabric.utilities.seed:Global seed set to 42
2023-06-18 17:45:12,091 - {pytorch_tabular.tabular_model:473} - INFO - Preparing the DataLoaders
INFO:pytorch_tabular.tabular_model:Preparing the DataLoaders
2023-06-18 17:45:12,100 - {pytorch_tabular.tabular_datamodule:290} - INFO - Setting up the datamodule for classification task
INFO:pytorch_tabular.tabular_datamodule:Setting up the datamodule for classification task
2023-06-18 17:45:12,498 - {pytorch_tabular.tabular_model:521} - INFO - Preparing the Model: TabTransformerModel
INFO:pytorch_tabular.tabular_model:Preparing the Model: TabTransformerModel
2023-06-18 17:45:12,602 - {pytorch_tabular.tabular_model:268} - INFO - Preparing the Trainer
INFO:pytorch_tabular.tabular_model:Preparing the Trainer
  rank_zero_deprecation(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
INFO:pytorch_lightning.tuner.lr_finder:LR finder stopped early after 92 steps due to diverging loss.
INFO:pytorch_lightning.tuner.lr_finder:Learning rate set to 0.0005248074602497723
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/.lr_find_60ef394a-f3f8-41ce-8ae2-5daad95a9006.ckpt
INFO:pytorch_lightning.utilities.rank_zero:Restored all states from the checkpoint file at /content/.lr_find_60ef394a-f3f8-41ce-8ae2-5daad95a9006.ckpt
2023-06-18 17:45:43,251 - {pytorch_tabular.tabular_model:575} - INFO - Suggested LR: 0.0005248074602497723. For plot and detailed analysis, use `find_learning_rate` method.
INFO:pytorch_tabular.tabular_model:Suggested LR: 0.0005248074602497723. For plot and detailed analysis, use `find_learning_rate` method.
2023-06-18 17:45:43,257 - {pytorch_tabular.tabular_model:582} - INFO - Training Started
INFO:pytorch_tabular.tabular_model:Training Started


Output()

2023-06-18 17:50:14,793 - {pytorch_tabular.tabular_model:584} - INFO - Training the model completed
INFO:pytorch_tabular.tabular_model:Training the model completed
2023-06-18 17:50:14,799 - {pytorch_tabular.tabular_model:1258} - INFO - Loading the best model
INFO:pytorch_tabular.tabular_model:Loading the best model
  rank_zero_deprecation(


Output()

[{'test_loss': 0.3315524160861969, 'test_accuracy': 0.8430589437484741}]

# FT Transformer

In [20]:
model_config = FTTransformerConfig(
    task="classification",
    learning_rate = 1e-3,
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)
tabular_model.fit(train=train)
tabular_model.evaluate(test)

2023-06-18 17:57:17,619 - {pytorch_tabular.tabular_model:105} - INFO - Experiment Tracking is turned off
INFO:pytorch_tabular.tabular_model:Experiment Tracking is turned off
INFO:lightning_fabric.utilities.seed:Global seed set to 42
2023-06-18 17:57:17,656 - {pytorch_tabular.tabular_model:473} - INFO - Preparing the DataLoaders
INFO:pytorch_tabular.tabular_model:Preparing the DataLoaders
2023-06-18 17:57:17,664 - {pytorch_tabular.tabular_datamodule:290} - INFO - Setting up the datamodule for classification task
INFO:pytorch_tabular.tabular_datamodule:Setting up the datamodule for classification task
2023-06-18 17:57:17,917 - {pytorch_tabular.tabular_model:521} - INFO - Preparing the Model: FTTransformerModel
INFO:pytorch_tabular.tabular_model:Preparing the Model: FTTransformerModel
2023-06-18 17:57:17,971 - {pytorch_tabular.tabular_model:268} - INFO - Preparing the Trainer
INFO:pytorch_tabular.tabular_model:Preparing the Trainer
  rank_zero_deprecation(
INFO:pytorch_lightning.utilities

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=100` reached.
INFO:pytorch_lightning.tuner.lr_finder:Learning rate set to 0.003981071705534969
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/.lr_find_191ee530-884b-4a26-b932-56a47520f36d.ckpt
INFO:pytorch_lightning.utilities.rank_zero:Restored all states from the checkpoint file at /content/.lr_find_191ee530-884b-4a26-b932-56a47520f36d.ckpt
2023-06-18 17:58:12,582 - {pytorch_tabular.tabular_model:575} - INFO - Suggested LR: 0.003981071705534969. For plot and detailed analysis, use `find_learning_rate` method.
INFO:pytorch_tabular.tabular_model:Suggested LR: 0.003981071705534969. For plot and detailed analysis, use `find_learning_rate` method.
2023-06-18 17:58:12,587 - {pytorch_tabular.tabular_model:582} - INFO - Training Started
INFO:pytorch_tabular.tabular_model:Training Started


Output()

2023-06-18 18:07:11,777 - {pytorch_tabular.tabular_model:584} - INFO - Training the model completed
INFO:pytorch_tabular.tabular_model:Training the model completed
2023-06-18 18:07:11,782 - {pytorch_tabular.tabular_model:1258} - INFO - Loading the best model
INFO:pytorch_tabular.tabular_model:Loading the best model


Output()

  rank_zero_deprecation(


[{'test_loss': 0.32397058606147766, 'test_accuracy': 0.85012286901474}]

In [19]:
# TabNet

In [22]:
model_config = TabNetModelConfig(
    task="classification",
    learning_rate = 1e-3,
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)
tabular_model.fit(train=train)
tabular_model.evaluate(test)

2023-06-18 18:19:25,919 - {pytorch_tabular.tabular_model:105} - INFO - Experiment Tracking is turned off
INFO:pytorch_tabular.tabular_model:Experiment Tracking is turned off
INFO:lightning_fabric.utilities.seed:Global seed set to 42
2023-06-18 18:19:25,950 - {pytorch_tabular.tabular_model:473} - INFO - Preparing the DataLoaders
INFO:pytorch_tabular.tabular_model:Preparing the DataLoaders
2023-06-18 18:19:25,955 - {pytorch_tabular.tabular_datamodule:290} - INFO - Setting up the datamodule for classification task
INFO:pytorch_tabular.tabular_datamodule:Setting up the datamodule for classification task
2023-06-18 18:19:26,202 - {pytorch_tabular.tabular_model:521} - INFO - Preparing the Model: TabNetModel
INFO:pytorch_tabular.tabular_model:Preparing the Model: TabNetModel
2023-06-18 18:19:26,257 - {pytorch_tabular.tabular_model:268} - INFO - Preparing the Trainer
INFO:pytorch_tabular.tabular_model:Preparing the Trainer
  rank_zero_deprecation(
INFO:pytorch_lightning.utilities.rank_zero:GPU

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=100` reached.
INFO:pytorch_lightning.tuner.lr_finder:Learning rate set to 0.02089296130854041
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/.lr_find_7769f5ac-1225-4764-afd2-806d611c71ff.ckpt
INFO:pytorch_lightning.utilities.rank_zero:Restored all states from the checkpoint file at /content/.lr_find_7769f5ac-1225-4764-afd2-806d611c71ff.ckpt
2023-06-18 18:19:31,462 - {pytorch_tabular.tabular_model:575} - INFO - Suggested LR: 0.02089296130854041. For plot and detailed analysis, use `find_learning_rate` method.
INFO:pytorch_tabular.tabular_model:Suggested LR: 0.02089296130854041. For plot and detailed analysis, use `find_learning_rate` method.
2023-06-18 18:19:31,468 - {pytorch_tabular.tabular_model:582} - INFO - Training Started
INFO:pytorch_tabular.tabular_model:Training Started


Output()

2023-06-18 18:23:07,247 - {pytorch_tabular.tabular_model:584} - INFO - Training the model completed
INFO:pytorch_tabular.tabular_model:Training the model completed
2023-06-18 18:23:07,252 - {pytorch_tabular.tabular_model:1258} - INFO - Loading the best model
INFO:pytorch_tabular.tabular_model:Loading the best model


Output()

[{'test_loss': 0.3304014503955841, 'test_accuracy': 0.8453624248504639}]

# Saving The model

In [25]:
tabular_model.save_model("income_prediction.model")

# Loading the model from file and making inference

In [27]:
loaded_model = TabularModel.load_from_checkpoint("income_prediction.model")
pred_df = tabular_model.predict(test)
pred_df.head()

2023-06-18 18:34:48,017 - {pytorch_tabular.tabular_model:129} - INFO - Experiment Tracking is turned off
INFO:pytorch_tabular.tabular_model:Experiment Tracking is turned off
2023-06-18 18:34:48,026 - {pytorch_tabular.tabular_model:268} - INFO - Preparing the Trainer
INFO:pytorch_tabular.tabular_model:Preparing the Trainer
  rank_zero_deprecation(
INFO:pytorch_lightning.utilities.rank_zero:Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income,<=50K_probability,>50K_probability,prediction
10,37,Private,280464,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,Black,Male,0,0,80,United-States,>50K,0.404782,0.595218,>50K
13,32,Private,205019,Assoc-acdm,12,Never-married,Sales,Not-in-family,Black,Male,0,0,50,United-States,<=50K,0.917458,0.082542,<=50K
19,43,Self-emp-not-inc,292175,Masters,14,Divorced,Exec-managerial,Unmarried,White,Female,0,0,45,United-States,>50K,0.665317,0.334683,<=50K
28,39,Private,367260,HS-grad,9,Divorced,Exec-managerial,Not-in-family,White,Male,0,0,80,United-States,<=50K,0.659728,0.340272,<=50K
40,31,Private,507875,9th,5,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,43,United-States,<=50K,0.816894,0.183106,<=50K


# Regression Problem

The Ames Housing dataset describes the sale of individual residential properties in Ames, Iowa from 2006 to 2010. It contains a large number of explanatory variables (over 80) involved in assessing home values, offering a rich set of variables for predictive modeling.

The variables involved cover a wide range of aspects, including:

1. General characteristics of the property, such as the type of dwelling, the zone where it is located, its proximity to various amenities and roads, and the overall shape and layout of the property and lot.
2. Specific features of the house, such as the type of roof, exterior, masonry, and foundation.
3. The overall quality and condition of various aspects of the house, from the exterior to the heating.
4. Information about various areas of the house, like the basement, garage, and porch, and the presence of a pool.
The number and quality of rooms, bedrooms, kitchens, and bathrooms.
5. Information about the sale, such as the type of sale, the condition of sale, and the month and year of the sale.

The target variable is the final price at which the property was sold. This makes it a regression problem if we want to build a machine learning model to predict the sale price based on the rest of the variables.

## Download the dataset

In [60]:
import pandas as pd

url = "https://raw.githubusercontent.com/wblakecannon/ames/master/data/housing.csv"
ames_df = pd.read_csv(url)



## Specify the continous and categorical variables
 Note: You could further optimize it.

In [61]:
# List of categorical and numerical columns
cat_cols = ['Garage Yr Blt', 'Mo Sold', 'Yr Sold','Open Porch SF', 'Enclosed Porch', '3Ssn Porch', 'Screen Porch','Wood Deck SF','Fireplaces','Year Remod/Add','Year Built','Overall Cond','Overall Qual','MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Lot Shape', 'Land Contour', 'Utilities', 'Lot Config', 'Land Slope', 'Neighborhood', 'Condition 1', 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl', 'Exterior 1st', 'Exterior 2nd', 'Mas Vnr Type', 'Exter Qual', 'Exter Cond', 'Foundation', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating', 'Heating QC', 'Central Air', 'Electrical', 'Kitchen Qual', 'Functional', 'Fireplace Qu', 'Garage Type', 'Garage Finish', 'Garage Qual', 'Garage Cond', 'Paved Drive', 'Pool QC', 'Fence', 'Misc Feature', 'Sale Type', 'Sale Condition']
num_cols = ['Lot Frontage', 'Lot Area',   'Mas Vnr Area', 'BsmtFin SF 1', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF', '1st Flr SF', '2nd Flr SF', 'Low Qual Fin SF', 'Gr Liv Area', 'Bsmt Full Bath', 'Bsmt Half Bath', 'Full Bath', 'Half Bath', 'Bedroom AbvGr', 'Kitchen AbvGr', 'TotRms AbvGrd',   'Garage Cars', 'Garage Area',   'Pool Area', 'Misc Val']
target_col = ['SalePrice']

## Perform Null Value Imputation
1. Replace with Mode for categorical varibale
2. Replace with median for Continous variable

Note: You could further optimize this

> Indented block



In [None]:
for col in cat_cols:
    ames_df[col].fillna(ames_df[col].mode()[0], inplace=True)

# Replace NaN in continuous columns with the median
for col in num_cols+target_col:
    ames_df[col].fillna(ames_df[col].median(), inplace=True)
ames_df = ames_df.dropna()

# Check the first few rows
print(ames_df.shape)
print(ames_df.head())

## Perform Min-max scalar

In [63]:
from sklearn.preprocessing import MinMaxScaler

# Assuming df is your DataFrame and the columns you want to scale are in the list 'cols_to_scale'
scaler = MinMaxScaler()
cols_to_scale=num_cols+target_col
# Fit the scaler to the columns in 'cols_to_scale'
scaler.fit(ames_df[cols_to_scale])

# Transform the columns
ames_df[cols_to_scale] = scaler.transform(ames_df[cols_to_scale])

In [64]:
print(cat_cols)
print(num_cols)
print(target_col)

['Garage Yr Blt', 'Mo Sold', 'Yr Sold', 'Open Porch SF', 'Enclosed Porch', '3Ssn Porch', 'Screen Porch', 'Wood Deck SF', 'Fireplaces', 'Year Remod/Add', 'Year Built', 'Overall Cond', 'Overall Qual', 'MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Lot Shape', 'Land Contour', 'Utilities', 'Lot Config', 'Land Slope', 'Neighborhood', 'Condition 1', 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl', 'Exterior 1st', 'Exterior 2nd', 'Mas Vnr Type', 'Exter Qual', 'Exter Cond', 'Foundation', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating', 'Heating QC', 'Central Air', 'Electrical', 'Kitchen Qual', 'Functional', 'Fireplace Qu', 'Garage Type', 'Garage Finish', 'Garage Qual', 'Garage Cond', 'Paved Drive', 'Pool QC', 'Fence', 'Misc Feature', 'Sale Type', 'Sale Condition']
['Lot Frontage', 'Lot Area', 'Mas Vnr Area', 'BsmtFin SF 1', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF', '1st Flr SF', '2nd Flr SF', 'Low Qual Fin SF', 'Gr Liv Area',

## Train, Test split

In [66]:
train = ames_df.sample(frac=0.8, random_state=0)
test = ames_df.drop(train.index)

## Define Model Configuration

In [67]:
# Data Configuration
data_config = DataConfig(
    target=target_col,
    continuous_cols=num_cols,
    categorical_cols=cat_cols,
    continuous_feature_transform="quantile_normal",
    normalize_continuous_features=True
)

# Trainer Configuration
trainer_config = TrainerConfig(
    auto_lr_find=True,
    batch_size=256,
    max_epochs=100,
    early_stopping="valid_loss",
    early_stopping_mode="min",
    early_stopping_patience=5,
    checkpoints="valid_loss",
    load_best=True
)

# Optimizer Configuration
optimizer_config = OptimizerConfig()

# Model Configuration
head_config = LinearHeadConfig(
    layers="",
    dropout=0.1,
    initialization="kaiming"
).__dict__




In [68]:
model_config = FTTransformerConfig(
    task="regression",
    learning_rate = 1e-3,
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)
tabular_model.fit(train=train)
tabular_model.evaluate(test)

2023-06-19 12:56:27,987 - {pytorch_tabular.tabular_model:105} - INFO - Experiment Tracking is turned off
INFO:pytorch_tabular.tabular_model:Experiment Tracking is turned off
INFO:lightning_fabric.utilities.seed:Global seed set to 42
2023-06-19 12:56:28,031 - {pytorch_tabular.tabular_model:473} - INFO - Preparing the DataLoaders
INFO:pytorch_tabular.tabular_model:Preparing the DataLoaders
2023-06-19 12:56:28,041 - {pytorch_tabular.tabular_datamodule:290} - INFO - Setting up the datamodule for regression task
INFO:pytorch_tabular.tabular_datamodule:Setting up the datamodule for regression task
2023-06-19 12:56:28,557 - {pytorch_tabular.tabular_model:521} - INFO - Preparing the Model: FTTransformerModel
INFO:pytorch_tabular.tabular_model:Preparing the Model: FTTransformerModel
2023-06-19 12:56:28,730 - {pytorch_tabular.tabular_model:268} - INFO - Preparing the Trainer
INFO:pytorch_tabular.tabular_model:Preparing the Trainer
  rank_zero_deprecation(
INFO:pytorch_lightning.utilities.rank_ze

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
INFO:pytorch_lightning.tuner.lr_finder:LR finder stopped early after 83 steps due to diverging loss.
INFO:pytorch_lightning.tuner.lr_finder:Learning rate set to 0.000363078054770101
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/.lr_find_d0e2a2e1-f9d7-42ec-a1cc-8bbb35951a17.ckpt
INFO:pytorch_lightning.utilities.rank_zero:Restored all states from the checkpoint file at /content/.lr_find_d0e2a2e1-f9d7-42ec-a1cc-8bbb35951a17.ckpt
2023-06-19 13:02:08,888 - {pytorch_tabular.tabular_model:575} - INFO - Suggested LR: 0.000363078054770101. For plot and detailed analysis, use `find_learning_rate` method.
INFO:pytorch_tabular.tabular_model:Suggested LR: 0.000363078054770101. For plot and detailed analysis, use `find_learning_rate` method.
2023-06-19 13:02:08,895 - {pytorch_tabular.tabular_model:582} - INFO - Training Started
INFO:pytorch_tabular.tabular_model:Training Started


Output()

2023-06-19 13:09:33,439 - {pytorch_tabular.tabular_model:584} - INFO - Training the model completed
INFO:pytorch_tabular.tabular_model:Training the model completed
2023-06-19 13:09:33,445 - {pytorch_tabular.tabular_model:1258} - INFO - Loading the best model
INFO:pytorch_tabular.tabular_model:Loading the best model
  rank_zero_deprecation(


Output()

[{'test_loss': 0.0030161135364323854,
  'test_mean_squared_error': 0.0030161135364323854}]

In [69]:
prediction=tabular_model.predict(test)
prediction.head()

Output()

Unnamed: 0.1,Unnamed: 0,Order,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,...,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type,Sale Condition,SalePrice,SalePrice_prediction
0,0,1,526301100,20,RL,0.410959,0.14242,Pave,Grvl,IR1,...,Ex,MnPrv,Shed,0.0,5,2010,WD,Normal,0.272444,0.28151
3,3,4,526353030,20,RL,0.246575,0.046087,Pave,Grvl,Reg,...,Ex,MnPrv,Shed,0.0,4,2010,WD,Normal,0.311517,0.344626
7,7,8,527145080,120,RL,0.075342,0.017318,Pave,Grvl,IR1,...,Ex,MnPrv,Shed,0.0,1,2010,WD,Normal,0.240782,0.226344
21,21,22,527358200,85,RL,0.219178,0.043586,Pave,Grvl,Reg,...,Ex,MnPrv,Shed,0.0,1,2010,WD,Family,0.211814,0.204646
24,24,25,527402250,20,RL,0.160959,0.052523,Pave,Grvl,IR1,...,Ex,MnPrv,Shed,0.0,4,2010,WD,Normal,0.184733,0.244087


In [70]:
from sklearn.metrics import r2_score

r2 = r2_score(prediction['SalePrice'], prediction['SalePrice_prediction'])

print(f"R2 Score: {r2}")

R2 Score: 0.735613747041542
