## Sklearn 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load CSV file
df = pd.read_csv("data.csv")

# Selecting multiple features (independent variables)
X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Model evaluation
print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
print(f"R-squared Score: {r2_score(y_test, y_pred)}")


## Statsmodel 

In [None]:
import pandas as pd
import statsmodels.api as sm

# Load dataset
df = pd.read_csv("data.csv")

# Define independent and dependent variables
X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Add constant for intercept
X = sm.add_constant(X)

# Build model
model = sm.OLS(y, X).fit()

# Print summary (includes p-values, R-squared, coefficients)
print(model.summary())


## TensorFLow / Keras

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("data.csv")

X = df[['X1', 'X2', 'X3']].values
y = df['Y'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, input_shape=(X_train.shape[1],))
])

# Compile model
model.compile(optimizer='adam', loss='mse')

# Train model
model.fit(X_train, y_train, epochs=100, verbose=0)

# Evaluate model
mse = model.evaluate(X_test, y_test)
print(f"Mean Squared Error: {mse}")


## Pytorch

In [None]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("data.csv")

X = torch.tensor(df[['X1', 'X2', 'X3']].values, dtype=torch.float32)
y = torch.tensor(df['Y'].values, dtype=torch.float32).view(-1, 1)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model
model = nn.Linear(X_train.shape[1], 1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train model
for epoch in range(1000):
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Evaluate model
with torch.no_grad():
    test_pred = model(X_test)
    test_loss = criterion(test_pred, y_test)
print(f"Mean Squared Error: {test_loss.item()}")


## XGBoost 

In [None]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load dataset
df = pd.read_csv("data.csv")

X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = xgb.XGBRegressor(objective='reg:squarederror')
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


## LightGBM

In [None]:
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load dataset
df = pd.read_csv("data.csv")

X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = lgb.LGBMRegressor()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


## CatBoost

In [None]:
import catboost as cb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load dataset
df = pd.read_csv("data.csv")

X = df[['X1', 'X2', 'X3']]
y = df['Y']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = cb.CatBoostRegressor(verbose=0)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


## Summary of Libraries for Multiple Linear Regression

**Library Best For**
----------------------------------------------------------------------
**sklearn**	         |Simple and fast implementation                 |
----------------------------------------------------------------------
**statsmodels**	     |In-depth statistical analysis with p-values    |
----------------------------------------------------------------------
**TensorFlow/Keras** |	Deep learning-based regression               |
----------------------------------------------------------------------
**PyTorch**	         |Customizable models for advanced applications  |
----------------------------------------------------------------------
**XGBoost**	         |Gradient boosting with high accuracy           |
----------------------------------------------------------------------
**LightGBM**         | Faster training for large datasets            |
----------------------------------------------------------------------
**CatBoost**         | Handles categorical data efficiently          |
----------------------------------------------------------------------