<a href="https://colab.research.google.com/github/gulabpatel/ML_Retraining_Approaches/blob/main/03_sklearn_partial_fit()_Incremental_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

sklean partial_fit() documentation : https://coderzcolumn.com/tutorials/machine-learning/scikit-learn-incremental-learning-for-large-datasets

#Load Dataset

In [3]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

X, Y = datasets.make_regression(n_samples=240000, random_state=123)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((216000, 100), (24000, 100), (216000,), (24000,))

In [5]:
X_train, X_test = X_train.reshape(-1,24,100), X_test.reshape(-1,24,100)
Y_train, Y_test = Y_train.reshape(-1,24), Y_test.reshape(-1,24)

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((9000, 24, 100), (1000, 24, 100), (9000, 24), (1000, 24))

In [7]:
X_train[0].shape, Y_train[0].shape

((24, 100), (24,))

#Create and Train Model

In [8]:
from sklearn.linear_model import SGDRegressor

regressor = SGDRegressor()
epochs = 10
for k in range(epochs): ## Number of loops through data
    for i in range(X_train.shape[0]): ## Looping through batches
        X_batch, Y_batch = X_train[i], Y_train[i]
        regressor.partial_fit(X_batch, Y_batch) ## Partially fitting data in batches

#Evaluate Model Performance on Test Data

In [9]:
from sklearn.metrics import mean_squared_error, r2_score

Y_test_preds = []
for j in range(X_test.shape[0]): ## Looping through test batches for making predictions
    Y_preds = regressor.predict(X_test[j])
    Y_test_preds.extend(Y_preds.tolist())

print("Test MSE      : {}".format(mean_squared_error(Y_test.reshape(-1), Y_test_preds)))
print("Test R2 Score : {}".format(r2_score(Y_test.reshape(-1), Y_test_preds)))

Test MSE      : 0.00024876416610145524
Test R2 Score : 0.9999999907248697


#Evaluate Model Performance on Train Data

In [10]:
from sklearn.metrics import mean_squared_error, r2_score

Y_train_preds = []
for j in range(X_train.shape[0]): ## Looping through train batches for making predictions
    Y_preds = regressor.predict(X_train[j])
    Y_train_preds.extend(Y_preds.tolist())

print("Train MSE      : {}".format(mean_squared_error(Y_train.reshape(-1), Y_train_preds)))
print("Train R2 Score : {}".format(r2_score(Y_train.reshape(-1), Y_train_preds)))

Train MSE      : 0.00024853890489015816
Train R2 Score : 0.9999999907225741


#Preprocessing 

In [11]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

X, Y = datasets.make_regression(n_samples=240000, random_state=123)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((216000, 100), (24000, 100), (216000,), (24000,))

In [12]:
X_train, X_test = X_train.reshape(-1,24,100), X_test.reshape(-1,24,100)
Y_train, Y_test = Y_train.reshape(-1,24), Y_test.reshape(-1,24)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((9000, 24, 100), (1000, 24, 100), (9000, 24), (1000, 24))

#Create and Train Model After Scaling Data

In [13]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

### Scaling Data
scaler = StandardScaler()

for i in range(X_train.shape[0]):
    X_batch, Y_batch = X_train[i], Y_train[i]
    scaler.partial_fit(X_batch, Y_batch) ## Partially fitting data in batches

### Fitting Data in batches
regressor = SGDRegressor()
epochs = 10
for k in range(epochs):
    for i in range(X_train.shape[0]):
        X_batch, Y_batch = X_train[i], Y_train[i]
        X_batch = scaler.transform(X_batch) ## Preprocessing Single batch of data
        regressor.partial_fit(X_batch, Y_batch) ## Partially fitting data in batches

#Evaluate Model Performance on Test Data

In [14]:
from sklearn.metrics import mean_squared_error, r2_score

Y_test_preds = []
for j in range(X_test.shape[0]): ## Looping through test batches for making predictions
    X_batch = scaler.transform(X_test[j]) ## Preprocessing Single batch of data
    Y_preds = regressor.predict(X_batch)
    Y_test_preds.extend(Y_preds.tolist())

print("Test MSE      : {}".format(mean_squared_error(Y_test.reshape(-1), Y_test_preds)))
print("Test R2 Score : {}".format(r2_score(Y_test.reshape(-1), Y_test_preds)))

Test MSE      : 0.00024862831600677395
Test R2 Score : 0.9999999907299348
