<a href="https://colab.research.google.com/github/abdulsamadkhan/MachineLearningTutorials/blob/main/linear_regression_scikit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt


# Step 1: Data Loading
Loading a sample dataset for demonstration.

In [None]:
from sklearn.datasets import load_diabetes
data = load_diabetes()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Step 2: Data Exploration
Exploring the dataset.

In [None]:
print("First five rows of the dataset:")
print(df.head())
print("\nChecking for missing values:")
print(df.isnull().sum())
print("\nBasic statistics:")
print(df.describe())

# Step 3: Data Preprocessing
Selecting a single feature for linear regression.

In [None]:
X = df[['bmi']].values
y = df['target'].values

# Step 4: Training-Test Split
Splitting the dataset.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Training the Model
Training a Linear Regression model.

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Model Coefficients
Displaying the model coefficients.

In [None]:
print(f"Coefficient: {model.coef_[0]}")
print(f"Intercept: {model.intercept_}")

# Step 7: Evaluating the Model
Calculating performance metrics.

In [None]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# Step 8: Visualizing the Results
Plotting the results.

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='blue', label='Actual values')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression line')
plt.xlabel('BMI')
plt.ylabel('Disease Progression')
plt.title('Linear Regression on Diabetes Dataset')
plt.legend()
plt.show()

# Step 9: Conclusion
Summary of results.

In [None]:
print("\nConclusion:")
print("The model shows a linear relationship between BMI and disease progression.")
print("The R^2 score indicates how well the model explains the variability in the data.")