<a href="https://colab.research.google.com/github/aimanmalik0404/Linear-Regression-Model/blob/main/Linear_Regression_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implement initial model training with scikit-learn



In [None]:
try:
    import numpy as np
    import pandas as pd
    from sklearn.linear_model import LinearRegression

    print("✅ Libraries imported successfully!")
except Exception as e:
    print(f"❌ Error importing libraries: {e}")


✅ Libraries imported successfully!


In [None]:
try:
    # Simple dataset: X = feature, y = target
    X = np.array([[1], [2], [3], [4], [5]])
    y = np.array([2, 4, 6, 8, 10])

    print("✅ Data created successfully!")
except Exception as e:
    print(f"❌ Error creating data: {e}")


✅ Data created successfully!


In [None]:
try:
    model = LinearRegression()
    model.fit(X, y)
    print("✅ Model trained successfully!")
    print(f"θ0 (Intercept): {model.intercept_}")
    print(f"θ1 (Coefficient): {model.coef_[0]}")
except Exception as e:
    print(f"❌ Error during training: {e}")


✅ Model trained successfully!
θ0 (Intercept): 0.0
θ1 (Coefficient): 2.0


In [None]:
# Assuming 'model' is already trained

try:
    # Example: Ask for input from user
    x_value = float(input("Enter a value for X: "))

    # Reshape because sklearn expects 2D array for prediction
    x_array = [[x_value]]

    # Predict using trained model
    y_pred = model.predict(x_array)

    print(f"✅ Prediction for X = {x_value} is Y = {y_pred[0]}")

except Exception as e:
    print(f"❌ Error during prediction: {e}")


Enter a value for X: 4
✅ Prediction for X = 4.0 is Y = 8.0


Implement initial model training without scikit-learn

In [None]:
import numpy as np

# -----------------------------
# Step 1: Dataset
# -----------------------------
X = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])
m_samples = len(X)

# -----------------------------
# Step 2: Initialize parameters
# -----------------------------
theta_0 = 0  # Intercept
theta_1 = 0  # Slope
alpha = 0.01 # Learning rate
iterations = 1000

# -----------------------------
# Step 3: Gradient Descent Loop
# -----------------------------
for _ in range(iterations):
    # Hypothesis function h_theta(x)
    y_pred = theta_0 + theta_1 * X

    # Errors
    error = y_pred - y

    # Gradients (derivatives of cost w.r.t parameters)
    d_theta0 = (1/m_samples) * np.sum(error)      # ?/??0 J(?)
    d_theta1 = (1/m_samples) * np.sum(error * X)  # ?/??1 J(?)

    # Update parameters using update rule
    theta_0 -= alpha * d_theta0
    theta_1 -= alpha * d_theta1

# -----------------------------
# Step 4: Final Parameters
# -----------------------------
print(f"Final ?0 (intercept): {theta_0}")
print(f"Final ?1 (slope): {theta_1}")

# -----------------------------
# Step 5: Prediction
# -----------------------------
x_value = float(input("Enter a value for X: "))
y_prediction = theta_0 + theta_1 * x_value
print(f"Prediction for X = {x_value} is Y = {y_prediction}")

Final ?0 (intercept): 0.09475321533750963
Final ?1 (slope): 1.9737548787242036
Enter a value for X: 5
Prediction for X = 5.0 is Y = 9.963527608958529


Implement model training without scikit-learn using csv file

In [None]:
# ====== 1. IMPORT LIBRARIES ======
try:
    import numpy as np
    import pandas as pd
    print("✅ Libraries imported successfully!")
except Exception as e:
    print(f"❌ Error importing libraries: {e}")


# ====== 2. LOAD CSV ======
try:
    from google.colab import files
    uploaded = files.upload()  # Opens file picker popup in Colab
    file_name = list(uploaded.keys())[0]  # Get the uploaded file's name
    df = pd.read_csv(file_name)
    print("✅ CSV loaded successfully!")
except FileNotFoundError:
    print("❌ CSV file not found. Please upload it.")
except Exception as e:
    print(f"❌ Error reading CSV: {e}")


# ====== 3. DATA CLEANING ======
try:
    # Remove duplicates
    df = df.drop_duplicates()

    # Handle missing values (if any)
    df = df.dropna()

    print("✅ Data cleaned successfully!")
except Exception as e:
    print(f"❌ Error cleaning data: {e}")


# ====== 4. NORMALIZATION ======
try:
    # Min-Max normalization
    df['X'] = (df['X'] - df['X'].min()) / (df['X'].max() - df['X'].min())
    df['Y'] = (df['Y'] - df['Y'].min()) / (df['Y'].max() - df['Y'].min())
    print("✅ Normalization done!")
except Exception as e:
    print(f"❌ Error in normalization: {e}")


# ====== 5. SPLIT INTO TRAIN / TEST ======
try:
    train_size = int(0.8 * len(df))
    train_data = df.iloc[:train_size]
    test_data = df.iloc[train_size:]

    X_train = train_data['X'].values
    y_train = train_data['Y'].values
    X_test = test_data['X'].values
    y_test = test_data['Y'].values

    print("✅ Data split into training and testing sets!")
except Exception as e:
    print(f"❌ Error splitting data: {e}")


# ====== 6. GRADIENT DESCENT IMPLEMENTATION ======
try:
    # Initialize parameters
    theta_0 = 0
    theta_1 = 0
    alpha = 0.1  # learning rate
    m = len(X_train)  # number of samples
    epochs = 0
    tolerance = 1e-6  # stop when improvement is very small

    prev_cost = float('inf')

    while True:
        # Predictions
        y_pred = theta_0 + theta_1 * X_train

        # Errors
        error = y_pred - y_train

        # Cost function (MSE)
        cost = (1 / (2 * m)) * np.sum(error ** 2)

        # Check convergence
        if abs(prev_cost - cost) < tolerance:
            break
        prev_cost = cost

        # Gradients
        d_theta0 = (1/m) * np.sum(error)
        d_theta1 = (1/m) * np.sum(error * X_train)

        # Update parameters
        theta_0 -= alpha * d_theta0
        theta_1 -= alpha * d_theta1

        epochs += 1

    print(f"✅ Training complete in {epochs} epochs")
    print(f"θ0 (intercept): {theta_0}")
    print(f"θ1 (slope): {theta_1}")
except Exception as e:
    print(f"❌ Error during training: {e}")


# ====== 7. TESTING ======
try:
    y_test_pred = theta_0 + theta_1 * X_test
    mse_test = (1 / (2 * len(X_test))) * np.sum((y_test_pred - y_test) ** 2)
    print(f"✅ Test MSE: {mse_test}")
except Exception as e:
    print(f"❌ Error during testing: {e}")


✅ Libraries imported successfully!


Saving univariate_data.csv to univariate_data.csv
✅ CSV loaded successfully!
✅ Data cleaned successfully!
✅ Normalization done!
✅ Data split into training and testing sets!
✅ Training complete in 560 epochs
θ0 (intercept): 0.02600558554475363
θ1 (slope): 0.9378602470328574
✅ Test MSE: 0.0004617023545927611


In [7]:
# ====== 1. IMPORT LIBRARIES ======
try:
    import numpy as np
    import pandas as pd
    from google.colab import files
    print("✅ Libraries imported successfully!")
except Exception as e:
    print(f"❌ Error importing libraries: {e}")


# ====== 2. LOAD CSV ======
try:
    uploaded = files.upload()  # Opens file picker popup in Colab
    file_name = list(uploaded.keys())[0]  # Get the uploaded file's name
    df = pd.read_csv(file_name)
    print("✅ CSV loaded successfully!")
except FileNotFoundError:
    print("❌ CSV file not found. Please upload it.")
except Exception as e:
    print(f"❌ Error reading CSV: {e}")


# ====== 3. DATA CLEANING ======
try:
    df = df.drop_duplicates()
    df = df.dropna()
    numeric_df = pd.DataFrame()
    for col in df.columns:
      numeric_df[col] = pd.to_numeric(df[col], errors='coerce')
    df = numeric_df.dropna()
    print("✅ Data cleaned successfully!")
except Exception as e:
    print(f"❌ Error cleaning data: {e}")


# ====== 4. STORE ORIGINAL MEAN & STD, THEN NORMALIZE ======
try:
    x_mean_original = df['X'].mean()
    x_std_original = df['X'].std()
    y_mean_original = df['Y'].mean()
    y_std_original = df['Y'].std()

    df = df.copy()
    df.loc[:, 'X_norm'] = (df['X'] - x_mean_original) / x_std_original
    df.loc[:, 'Y_norm'] = (df['Y'] - y_mean_original) / y_std_original

    print("✅ Z-score normalization done (original values preserved)!")
except Exception as e:
    print(f"❌ Error in normalization: {e}")


# ====== 5. SHUFFLE & SPLIT INTO TRAIN / TEST ======
try:
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)

    train_size = int(0.8 * len(df))
    train_data = df.iloc[:train_size]
    test_data = df.iloc[train_size:]

    X_train = train_data['X_norm'].values
    y_train = train_data['Y_norm'].values
    X_test = test_data['X_norm'].values
    y_test = test_data['Y_norm'].values

    print("✅ Data shuffled and split into training/testing sets!")
except Exception as e:
    print(f"❌ Error splitting data: {e}")


# ====== 6. GRADIENT DESCENT IMPLEMENTATION ======
try:
    theta_0 = 0
    theta_1 = 0
    alpha = 0.1
    m = len(X_train)
    epochs = 0
    max_epochs = 80
    tolerance = 0
    prev_cost = float('inf')

    while epochs < max_epochs:
        y_pred = theta_0 + theta_1 * X_train
        error = y_pred - y_train
        cost = (1 / (2 * m)) * np.sum(error ** 2)

        if abs(prev_cost - cost) < tolerance:
            break
        prev_cost = cost

        d_theta0 = (1/m) * np.sum(error)
        d_theta1 = (1/m) * np.sum(error * X_train)

        theta_0 -= alpha * d_theta0
        theta_1 -= alpha * d_theta1
        epochs += 1

    print(f"✅ Training complete in {epochs} epochs")
    print(f"θ0 (intercept): {theta_0}")
    print(f"θ1 (slope): {theta_1}")
except Exception as e:
    print(f"❌ Error during training: {e}")


# ====== 7. TESTING ======
try:
    y_test_pred = theta_0 + theta_1 * X_test
    mse_test = (1 / len(X_test)) * np.sum((y_test_pred - y_test) ** 2)
    print(f"✅ Test MSE (normalized space): {mse_test}")
except Exception as e:
    print(f"❌ Error during testing: {e}")


# ====== 8. PREDICT FOR A SINGLE USER-INPUT VALUE ======
try:
    raw_x = float(input("Enter the value of X (original/raw value): "))

    norm_x = (raw_x - x_mean_original) / x_std_original
    norm_y_pred = theta_0 + theta_1 * norm_x
    raw_y_pred = (norm_y_pred * y_std_original) + y_mean_original

    print(f"Predicted Y (normalized): {norm_y_pred}")
    print(f"Predicted Y (original scale): {raw_y_pred}")

except Exception as e:
    print(f"❌ Error during prediction: {e}")


✅ Libraries imported successfully!


Saving univariate_data.csv to univariate_data (5).csv
✅ CSV loaded successfully!
✅ Data cleaned successfully!
✅ Z-score normalization done (original values preserved)!
✅ Data shuffled and split into training/testing sets!
✅ Training complete in 80 epochs
θ0 (intercept): -9.654402233492311e-05
θ1 (slope): 0.9997339415345498
✅ Test MSE (normalized space): 9.124078677089417e-08
Enter the value of X (original/raw value): 4
Predicted Y (normalized): -1.7324822567709555
Predicted Y (original scale): 8.041416373425392
