Title: Train a Linear Regression Model

Task 1: Predicting House Prices<br>
Dataset: Use a dataset that contains various features of houses such as square footage, number of bedrooms, and age of the house.

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

def load_house_data():
    data = {
        'Square_Feet': [1400, 1600, 1700, 1875, 1100, 1550, 2350, 2450, 1425, 1700],
        'Bedrooms': [3, 3, 3, 3, 2, 3, 4, 4, 3, 3],
        'Age': [10, 15, 20, 5, 30, 5, 15, 10, 20, 25],
        'Price': [245000, 312000, 279000, 308000, 199000, 219000, 405000, 324000, 319000, 255000]
    }
    df = pd.DataFrame(data)
    if df.empty or df.isnull().values.any():
        raise ValueError("DataFrame is empty or contains null values")
    return df

def train_house_model(df):
    X = df[['Square_Feet', 'Bedrooms', 'Age']]
    y = df['Price']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    logger.info(f"House Prices - MSE: {mse:.2f}")
    print(f"House Price Predictions: {y_pred}")
    return mse

df_house = load_house_data()
train_house_model(df_house)


INFO:root:House Prices - MSE: 4444866754.53


House Price Predictions: [241533.44262295 258253.68852459]


4444866754.531712

Task 2: Predicting Car Mileage (MPG)<br>
Dataset: Use a dataset where features include horsepower, weight, and model year of cars.

In [2]:
def load_car_data():
    data = {
        'Horsepower': [130, 165, 150, 140, 120, 130, 160, 170, 150, 130],
        'Weight': [3500, 3000, 3200, 2800, 3000, 3500, 3400, 3700, 3200, 2800],
        'Model_Year': [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025],
        'MPG': [22, 20, 18, 22, 24, 25, 27, 26, 23, 21]
    }
    df = pd.DataFrame(data)
    if df.empty or df.isnull().values.any():
        raise ValueError("DataFrame is empty or contains null values")
    return df

def train_mpg_model(df):
    X = df[['Horsepower', 'Weight', 'Model_Year']]
    y = df['MPG']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    logger.info(f"MPG - MSE: {mse:.2f}")
    print(f"MPG Predictions: {y_pred}")
    return mse

df_car = load_car_data()
train_mpg_model(df_car)


INFO:root:MPG - MSE: 1.23


MPG Predictions: [24.50719727 19.57555922]


1.2258967852685116

Task 3: Predicting Student's Scores'
Dataset: Use a dataset that contains study hours and corresponding student test scores.

In [3]:
def load_student_data():
    data = {
        'Study_Hours': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'Test_Score': [35, 50, 65, 70, 80, 85, 90, 95, 99, 100]
    }
    df = pd.DataFrame(data)
    if df.empty or df.isnull().values.any():
        raise ValueError("DataFrame is empty or contains null values")
    return df

def train_student_model(df):
    X = df[['Study_Hours']]
    y = df['Test_Score']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    mse = mean_squared_error(y_test, y_pred)
    logger.info(f"Student Scores - MSE: {mse:.2f}")
    print(f"Test Score Predictions: {y_pred}")
    return mse

df_student = load_student_data()
train_student_model(df_student)


INFO:root:Student Scores - MSE: 9.13


Test Score Predictions: [101.63793103  53.36206897]


9.131093935790723