# Incremental Machine learning model training for regression technique using partial fit



In [1]:
## Importing libraries

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import SGDRegressor as sgd

In [2]:
# creating dummy data samples (Any other data set can also be used)

X, Y = datasets.make_regression(n_samples=100000, random_state=42)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=42)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((90000, 100), (10000, 100), (90000,), (10000,))

In [3]:
X_train[0].shape, Y_train[0].shape

((100,), ())

In [4]:
#reshaping data

X_train, X_test = X_train.reshape(-1,10,100), X_test.reshape(-1,10,100)
Y_train, Y_test = Y_train.reshape(-1,10), Y_test.reshape(-1,10)

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((9000, 10, 100), (1000, 10, 100), (9000, 10), (1000, 10))

In [5]:
X_train[0].shape, Y_train[0].shape

((10, 100), (10,))

# Model training

In [6]:
%%time

regressor = sgd()  #using linear regression
epochs = 5 # epochs / iterations data go through
for k in range(epochs): 
    for i in range(X_train.shape[0]): 
        X_batch, Y_batch = X_train[i], Y_train[i]
        regressor.partial_fit(X_batch, Y_batch)

CPU times: user 11.8 s, sys: 42.1 ms, total: 11.9 s
Wall time: 14.8 s


# Model  Prediction

In [7]:
y_test_predictions = []
for j in range(X_test.shape[0]): 
    Y_preds = regressor.predict(X_test[j])
    y_test_predictions.extend(Y_preds.tolist())

print("Mean Squared Error",mean_squared_error(Y_test.reshape(-1), y_test_predictions))
print("R2 Score : ",r2_score(Y_test.reshape(-1), y_test_predictions))

Mean Squared Error 0.0002119382886700794
R2 Score :  0.9999999896602724


# From test predictions result we can conclude that partial_fit is also one of the incremental approach for model training.