This notebook demonstrates how to serialize and deserialize a simple Random Forest machine learning model in Python. 

We'll train a small Random Forest Classifier on synthetic data, then use the pickle module to save the trained model to a file and subsequently load it back into memory. 

This process is crucial for saving trained models for later use or deployment without needing to retrain them.

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification # For generating sample data
import pickle

## Demo model: RF

In [3]:
# Generate synthetic data for classification
# X: features, y: target variable
X, y = make_classification(n_samples=100, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=15)
print(X[:5])
print(y[:5])

# Initialize a Random Forest Classifier
# We'll keep it small for demonstration purposes
model = RandomForestClassifier(n_estimators=10, random_state=15)

# Train the model
model.fit(X, y)

print(f"Original Model Type: {type(model)}")
print(f"Original Model n_estimators: {model.n_estimators}")
print(f"Original Model classes: {model.classes_}")

# Make a prediction with the original model to show it works
sample_data = np.array([[0.5, 1.2, -0.3, 0.8]])
prediction = model.predict(sample_data)
print(f"Prediction with original model for {sample_data}: {prediction}")

[[-1.11178545 -0.94642978 -0.2414266   0.88093159]
 [-1.0712841   0.67208313 -0.11144277 -0.30342317]
 [ 1.45537816 -0.95408643  0.68775269  1.19964312]
 [ 1.57861845 -1.42768724 -0.61128957  1.21959898]
 [ 1.92879674  1.32640903  1.21800532 -0.43525432]]
[0 0 1 1 1]
Original Model Type: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
Original Model n_estimators: 10
Original Model classes: [0 1]
Prediction with original model for [[ 0.5  1.2 -0.3  0.8]]: [1]


## Serialize (Pickle) the Model to a File

In [5]:
# Define the filename for our pickled model
model_filename = 'demo_rf_model.pkl'

try:
    with open(model_filename, 'wb') as file:
        pickle.dump(model, file)
    print(f"Random Forest model successfully serialized and saved to '{model_filename}'")
except Exception as e:
    print(f"Error during pickling: {e}")

Random Forest model successfully serialized and saved to 'demo_rf_model.pkl'


## Deserialization (Unpickling) the Model from a File

In [6]:
loaded_model = None
try:
    with open(model_filename, 'rb') as file:
        loaded_model = pickle.load(file)
    print(f"Random Forest model successfully deserialized from '{model_filename}'")

    # Verify that the loaded model is indeed a RandomForestClassifier
    print(f"Loaded Model Type: {type(loaded_model)}")
    print(f"Loaded Model n_estimators: {loaded_model.n_estimators}")
    print(f"Loaded Model classes: {loaded_model.classes_}")

    # Make a prediction with the loaded model to confirm it works
    loaded_prediction = loaded_model.predict(sample_data)
    print(f"Prediction with loaded model for {sample_data}: {loaded_prediction}")

    # Check if the predictions are identical
    print(f"Are predictions from original and loaded models identical? {np.array_equal(prediction, loaded_prediction)}")

except FileNotFoundError:
    print(f"Error: The model file '{model_filename}' was not found.")
except Exception as e:
    print(f"Error during unpickling: {e}")

Random Forest model successfully deserialized from 'demo_rf_model.pkl'
Loaded Model Type: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
Loaded Model n_estimators: 10
Loaded Model classes: [0 1]
Prediction with loaded model for [[ 0.5  1.2 -0.3  0.8]]: [1]
Are predictions from original and loaded models identical? True


In [8]:
# Make a prediction with the original model to show it works
test_data = np.array([[-1.5, 1.8, -0.1, 0.3]])
test_prediction = loaded_model.predict(test_data)
print(f"Prediction with loaded model for {test_data}: {test_prediction}")

Prediction with loaded model for [[-1.5  1.8 -0.1  0.3]]: [0]
