In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1 & 2: Load the data
# Replace 'your_file_path.csv' with the actual path after downloading
df = pd.read_csv('data_for_large_scale.csv')

# Step 3: Separate features and target
X = df.drop('Target', axis=1)  # Replace 'target_column' with actual target column name
y = df['Target']

# Get number of features
num_features = X.shape[1]
print(f"Number of features: {num_features}")

# Step 4: Convert to arrays
X_array = X.values
y_array = y.values

# Step 5: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array,
                                                    test_size=0.3,
                                                    random_state=10)

# Step 6: Reshape the dataset (90 samples)
# This step depends on your data structure
# Assuming you want to reshape it to have 90 time steps
# You'll need to adjust this based on your actual data dimensions
X_train_reshaped = X_train.reshape(-1, 90, X_train.shape[1])
X_test_reshaped = X_test.reshape(-1, 90, X_test.shape[1])

# Step 7: Create and train SGD regressor
sgd = SGDRegressor(random_state=10)

# Train the model for multiple iterations
for i in range(5):
    sgd.partial_fit(X_train, y_train)
    if i == 4:  # After 5th iteration
        print(f"Coefficient for feature-5 after 5th iteration: {sgd.coef_[4]}")

# Get final model parameters
print(f"Intercept: {sgd.intercept_}")
print(f"Coefficient for feature-3: {sgd.coef_[2]}")

# Step 8: Make predictions and calculate metrics
y_pred = sgd.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R2 Score for test data: {r2}")
print(f"Mean Squared Error: {mse}")

Number of features: 10
Coefficient for feature-5 after 5th iteration: 76.46446678410382
Intercept: [0.00858904]
Coefficient for feature-3: 81.2538457066686
R2 Score for test data: 0.9999919892315331
Mean Squared Error: 0.3054578722113805


In [6]:
X_train_reshaped.shape

(700, 90, 10)

In [7]:
X_test_reshaped.shape

(300, 90, 10)

In [8]:
y_train.shape

(63000,)

In [9]:
y_test.shape

(27000,)