In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np


def linear_regression_pipeline(
    df,                      # Input DataFrame
    target_column,           # Name of target column
    numeric_features,        # List of numeric feature column names
    categorical_features,    # List of categorical feature column names
    test_size=0.2,           # Test set size
    custom_input=None        # Optional custom input for prediction (as DataFrame)
):
    # Split features and target
    X = df.drop(target_column, axis=1)
    y = df[target_column]

    # Define preprocessing
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])

    # Define pipeline
    model = Pipeline(steps=[
        ('preprocess', preprocessor),
        ('regressor', LinearRegression())
    ])

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Train model
    model.fit(X_train, y_train)

    # Predict on test set
    y_pred = model.predict(X_test)

    print("Test set predictions:\n", y_pred)
    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
    print("R² Score:", r2_score(y_test, y_pred))

    # Custom input prediction (if provided)
    if custom_input is not None:
        custom_output = model.predict(custom_input)
        print("\nCustom input:")
        print(custom_input)
        print("Predicted value:", custom_output)

    return model  # Return model in case user wants to reuse it

# ------------------ Example usage ------------------

# Sample dataset (replace with any regression dataset)
df = pd.read_csv('Housing.csv')

# Example custom input
custom_input = pd.DataFrame([{
    'bedrooms': 3,
    'bathrooms': 2,
    'area': 2200,
    'stories': 2,
    'parking': 1,
    'mainroad': 'yes',
    'guestroom': 'no',
    'basement': 'yes',
    'hotwaterheating': 'no',
    'airconditioning': 'yes',
    'prefarea': 'yes',
    'furnishingstatus': 'semi-furnished'
}])


linear_regression_pipeline(
    df=df,
    target_column='price',
    numeric_features=['bedrooms', 'bathrooms', 'area', 'stories', 'parking'],
    categorical_features=['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus'],
    test_size=0.3,
    custom_input=custom_input
)


Test set predictions:
 [5372312.61614641 7069241.00525806 3099290.78784024 4526446.61703975
 3281573.61192656 3589455.18752547 5728625.30074515 6422877.52476236
 2797870.49568591 2554357.99210552 9693560.19894656 2802903.76131031
 3026465.95300837 3433891.33202093 3792786.34565513 5321204.36478555
 2999852.37322994 4813306.52790376 4572077.61633938 3595733.80237382
 5605701.03333099 5838934.41614981 2730716.89440109 4837741.37096247
 5638460.49223856 7809681.30410654 3347443.37529207 5328750.6625753
 8323072.83237387 3374792.28854335 6379706.15324899 3397999.89564252
 6709115.82334019 4249666.18392567 3594972.20315591 5842994.13027721
 5115480.11172668 4359421.90319848 3046570.15372424 4597379.56416965
 4784358.17589609 3411743.78220679 7048978.02507302 4062741.36892573
 3785699.2416086  4286017.77250844 6729020.2073964  4133428.23821411
 3845434.09183082 3655159.46220358 7439302.79944483 2869444.70398563
 4468816.13830399 4474332.83085885 3790094.69366283 2607548.31509126
 7484173.674

In [3]:
import pandas as pd

df = pd.read_csv('Housing.csv')
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
