<a href="https://colab.research.google.com/github/cs255214339-debug/mlproject255214339/blob/main/Multiple_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Set random seed for reproducibility
np.random.seed(42)

# Generate random dataset: 200 samples, 3 features + target
n_samples = 200
X = np.random.randn(n_samples, 3) * 2 + np.array([1, 2, 3])
true_weights = np.array([2.5, -1.2, 3.1])
noise = np.random.randn(n_samples) * 0.5
y = X @ true_weights + noise

# Create DataFrame for better visualization
feature_names = ['Feature1', 'Feature2', 'Feature3']
X_df = pd.DataFrame(X, columns=feature_names)
y_df = pd.DataFrame(y, columns=['Target'])

# Split into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Display training data (first 10 rows)
print("=== TRAINING DATA (First 10 rows) ===")
train_df = pd.concat([
    pd.DataFrame(X_train, columns=feature_names),
    pd.DataFrame(y_train, columns=['Target'])
], axis=1)
print(train_df.head(10).to_string(index=False))

print("\n" + "="*50 + "\n")

# Display testing data (first 10 rows)
print("=== TESTING DATA (First 10 rows) ===")
test_df = pd.concat([
    pd.DataFrame(X_test, columns=feature_names),
    pd.DataFrame(y_test, columns=['Target'])
], axis=1)
print(test_df.head(10).to_string(index=False))

# Create and train Linear Regression model
print("\n" + "="*50 + "\n")
print("TRAINING LINEAR REGRESSION MODEL...")
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Calculate evaluation metrics
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

# Print evaluation results
print("\n=== EVALUATION RESULTS ===")
print(f"Training MSE: {train_mse:.4f}")
print(f"Testing MSE:  {test_mse:.4f}")
print(f"Training R²:  {train_r2:.4f}")
print(f"Testing R²:   {test_r2:.4f}")

# Print learned model weights
print("\n=== LEARNED MODEL WEIGHTS ===")
print(f"Intercept: {model.intercept_:.4f}")
for i, name in enumerate(feature_names):
    print(f"{name}: {model.coef_[i]:.4f}")

print("\n" + "="*50)
print("MODEL TRAINING COMPLETED SUCCESSFULLY!")

=== TRAINING DATA (First 10 rows) ===
 Feature1  Feature2  Feature3    Target
 1.372909  0.676427  4.704867 17.841423
 0.044685  2.957960  3.667324  8.271617
 0.615278  2.603095  2.930576  6.854288
 0.928348  5.129287 -2.239490 -9.980101
 1.448185  2.025185  3.195352 10.421740
 1.496441  1.081278  1.300311  7.001110
 1.657502  0.940480  4.026535 16.044534
 0.381575  2.662527  4.951090 13.219228
-2.517479 -0.366517 -1.078464 -9.184071
 1.887639  3.549268  1.146139  3.892844


=== TESTING DATA (First 10 rows) ===
 Feature1  Feature2  Feature3    Target
-2.904176  1.696430  4.176634  3.496639
-0.439688  1.078722  5.114244 13.525080
 1.194155  3.937290  1.595894  2.361678
 4.289935  1.501928  4.153114 21.367999
-0.518265  2.300788  3.683512  7.092399
 1.464100 -0.896169  0.185072  5.861028
 2.027572  3.030095 10.705463 35.165504
 1.540914  1.899524  2.522104  9.745254
-0.325248  3.141197  1.473482  0.296143
 4.099869  0.433493  2.355877 16.896337


TRAINING LINEAR REGRESSION MODEL...

=== 