In [1]:
import sklearn
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [2]:
scikit_learn_version = sklearn.__version__
scikit_learn_version

'0.24.1'

In [3]:
auto_train = pd.read_csv('datasets/automobiles_file1.csv')
auto_train.shape

(82, 52)

In [4]:
auto_test = pd.read_csv('datasets/automobiles_test.csv')
auto_test.shape

(41, 52)

In [5]:
X_train = auto_train.drop('price', axis=1)
y_train = auto_train['price']

In [6]:
X_test = auto_test.drop('price', axis=1)
y_test = auto_test['price']

In [7]:
regressor_model =  RandomForestRegressor(n_estimators=5, warm_start=True)
rfr_model = regressor_model.fit(X_train, y_train)

rfr_model

RandomForestRegressor(n_estimators=5, warm_start=True)

In [8]:
train_score = rfr_model.score(X_train, y_train)
train_score

0.9664941543513106

In [9]:
y_pred = rfr_model.predict(X_test)

In [10]:
test_score = r2_score(y_test, y_pred)
test_score

0.8183343211585542

In [11]:
rfr_model_param = {}

rfr_model_param['model'] = rfr_model
rfr_model_param['sklearn_version'] = scikit_learn_version
rfr_model_param['r2_score'] = test_score

In [12]:
rfr_model_param

{'model': RandomForestRegressor(n_estimators=5, warm_start=True),
 'sklearn_version': '0.24.1',
 'r2_score': 0.8183343211585542}

In [13]:
import joblib

In [14]:
filename = 'models/rfr_model_checkpoint.joblib'

In [15]:
joblib.dump(rfr_model_param, filename)

['models/rfr_model_checkpoint.joblib']

In [16]:
joblib_model = joblib.load(filename)

In [17]:
joblib_model['model']

RandomForestRegressor(n_estimators=5, warm_start=True)

In [18]:
joblib_model['sklearn_version']

'0.24.1'

In [19]:
joblib_model['model'].n_estimators = 15
joblib_model['model']

RandomForestRegressor(n_estimators=15, warm_start=True)

In [20]:
auto_retrain = pd.read_csv('datasets/automobiles_file2.csv')
auto_retrain.shape

(82, 52)

In [21]:
X_train = auto_retrain.drop('price', axis=1)
y_train = auto_retrain['price']

In [22]:
rfr_retrain = joblib_model['model'].fit(X_train, y_train)

In [23]:
rfr_retrain_train_score = rfr_retrain.score(X_train, y_train)
rfr_retrain_train_score

0.960269593487997

In [24]:
y_pred = rfr_retrain.predict(X_test)

In [25]:
rfr_retrain_test_score = r2_score(y_test, y_pred)
rfr_retrain_test_score

0.8954898118693329

In [26]:
rfr_model_param['r2_score']

0.8183343211585542

In [28]:
retrained_rfr_model_param = {}
retrained_rfr_model_param['model'] = rfr_retrain
retrained_rfr_model_param['sklearn_version'] = scikit_learn_version
retrained_rfr_model_param['r2_score'] = rfr_retrain_test_score

In [29]:
filename = 'models/retrained_rfr_model_checkpoint.joblib'

In [30]:
joblib.dump(retrained_rfr_model_param, filename)

['models/retrained_rfr_model_checkpoint.joblib']