# Model Training Example

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from traintrack.datasets import Dataset
from traintrack.model import Model, list_models
import pandas as pd

In [3]:
from sklearn.model_selection import train_test_split
data = {
    "Bedrooms": [2, 3, 3, 4, 2, 4, 3, 5, 2, 4],
    "Bathrooms": [1, 2, 1, 3, 1, 2, 2, 3, 1, 2],
    "Sqft": [900, 1500, 1200, 2000, 950, 1850, 1400, 2500, 1000, 2100],
    "Age": [30, 15, 20, 5, 40, 10, 12, 4, 35, 8],  # years
    "Price": [200_000, 340_000, 275_000, 500_000, 210_000, 480_000, 320_000, 600_000, 205_000, 520_000],
}

df = pd.DataFrame(data)

X = df[["Bedrooms", "Bathrooms", "Sqft", "Age"]]
y = df[["Price"]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dataset = Dataset(None, "training_data", "1.0.0", "Raw data")
dataset.set_artefact("input_features_train", X_train)
dataset.set_artefact("output_train", y_train)
dataset.set_artefact("input_features_test", X_test)
dataset.set_artefact("output_test", y_test)
dataset

<Dataset training_data:1.0.0>

In [4]:
def setup_model(dataset, config):
    from sklearn.ensemble import RandomForestClassifier
    return RandomForestClassifier(n_estimators=config['n_estimators'])

def train_model(model_obj, dataset):
    X, y = dataset.artefacts["input_features_train"], dataset.artefacts["output_train"]
    model_obj.fit(X, y)
    return model_obj

def eval_model(model_obj, dataset):
    from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error, r2_score
    X = dataset.artefacts["input_features_test"]
    y_true = dataset.artefacts["output_test"]
    y_pred = model_obj.predict(X)
    # print("True values:", y_true)
    # print("Predicted values:", y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {
        "mae": mae,
        "mse": mse,
        "rmse": rmse,
        "r2": r2,
    }


In [5]:
model = Model(None, "house_prices", "0.0.1", "initial model", dataset=dataset, config={'n_estimators': 100})

model.setup(setup_model)

In [6]:
model.train(train_model)

In [7]:
model.eval(eval_model)
model.evaluation

{'mae': 12500.0,
 'mse': 212500000.0,
 'rmse': 14577.379737113251,
 'r2': 0.953360768175583}

In [8]:
model.dataset

<Dataset training_data:1.0.0>

In [9]:
def pretty_print_dict(d, indent=""):
    new_indent = indent + "\t"
    for key, value in d.items():
        print(f"{indent}{key}:")
        if isinstance(value, str) and '\n' in value:
            lines = value.rstrip().split('\n')
            for line in lines:
                print(f"{new_indent}{line}")
        elif isinstance(value, dict):
            pretty_print_dict(value, new_indent)
        else:
            print(f"{new_indent}{value}")
        print() 

In [10]:
print("## Config")
pretty_print_dict(model.config)

print("## Model")
print(model.model_obj)
print() 

print("## Trained Model")
print(model.trained_model)
print() 

print("## Metadata")
pretty_print_dict(model.metadata)

print("## Environment")
pretty_print_dict(model.environment)

## Config
n_estimators:
	100

## Model
RandomForestClassifier()

## Trained Model
RandomForestClassifier()

## Metadata
setup_fn_source:
	def setup_model(dataset, config):
	    from sklearn.ensemble import RandomForestClassifier
	    return RandomForestClassifier(n_estimators=config['n_estimators'])

model_class:
	RandomForestClassifier

init_params:
	bootstrap:
		True

	ccp_alpha:
		0.0

	class_weight:
		None

	criterion:
		gini

	max_depth:
		None

	max_features:
		sqrt

	max_leaf_nodes:
		None

	max_samples:
		None

	min_impurity_decrease:
		0.0

	min_samples_leaf:
		1

	min_samples_split:
		2

	min_weight_fraction_leaf:
		0.0

	monotonic_cst:
		None

	n_estimators:
		100

	n_jobs:
		None

	oob_score:
		False

	random_state:
		None

	verbose:
		0

	warm_start:
		False


train_fn_source:
	def train_model(model_obj, dataset):
	    X, y = dataset.artefacts["input_features_train"], dataset.artefacts["output_train"]
	    model_obj.fit(X, y)
	    return model_obj

eval_fn_source:
	def eva

In [11]:
model.save()
model

<traintrack.model.Model at 0x11273a6d0>

In [12]:
models = list_models()
models

<Models 2 items>

In [13]:
loaded_model = models.latest_version('house_prices')
loaded_model

<traintrack.model.Model at 0x14cf02ca0>

In [14]:
eval_model(loaded_model.trained_model, dataset)

{'mae': 12500.0,
 'mse': 212500000.0,
 'rmse': 14577.379737113251,
 'r2': 0.953360768175583}