In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix, classification_report

print("Please provide the path to your CSV file:")
file_path = input("Enter file path: ")

try:
    df = pd.read_csv(file_path)
    print("\nDataset loaded successfully!")
    print(f"Shape: {df.shape}")
    print("\nColumns:\n", df.columns.tolist())
except Exception as e:
    print("Error loading dataset:", e)
    exit()

print("\nWhich regression would you like to perform?")
print("1️. Simple Linear Regression (1 predictor, 1 target)")
print("2️. Multiple Linear Regression (multiple predictors)")
print("3️. Logistic Regression (classification)")
choice = input("Enter your choice (1/2/3): ")


def is_num(col):
    return pd.api.types.is_numeric_dtype(df[col])


if choice == "1":
    print("\nSIMPLE LINEAR REGRESSION")
    target = input("Enter target (dependent) variable name: ")
    feature = input("Enter feature (independent) variable name: ")

    if target not in df.columns or feature not in df.columns:
        print("Invalid column names.")
    elif not is_num(df[target]) or not is_num(df[feature]):
        print("Both target and feature must be numeric.")
    else:
        X = df[[feature]]
        y = df[target]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = LinearRegression()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        print("\nModel Summary:")
        print(f"Intercept: {model.intercept_}")
        print(f"Coefficient for {feature}: {model.coef_[0]}")
        print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
        print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")

        # Visualization
        plt.scatter(X_test, y_test, color='blue', label='Actual')
        plt.plot(X_test, y_pred, color='red', label='Predicted Line')
        plt.xlabel(feature)
        plt.ylabel(target)
        plt.title(f"Simple Linear Regression: {target} vs {feature}")
        plt.legend()
        plt.show()

elif choice == "2":
    print("\nMULTIPLE LINEAR REGRESSION")
    target = input("Enter target (dependent) variable name: ")
    features = input("Enter feature (independent) columns separated by commas: ").split(',')

    features = [f.strip() for f in features]

    if target not in df.columns or not all(f in df.columns for f in features):
        print("Invalid column names.")
    elif not is_num(df[target]):
        print("Target variable must be numeric for linear regression.")
    else:
        X = df[features].select_dtypes(include=[np.number])
        y = df[target]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = LinearRegression()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        print("\nModel Summary:")
        print(f"Intercept: {model.intercept_}")
        coeffs = pd.DataFrame(model.coef_, X.columns, columns=["Coefficient"])
        print(coeffs)
        print(f"\nR² Score: {r2_score(y_test, y_pred):.4f}")
        print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")

        
        plt.scatter(y_test, y_pred, color='purple')
        plt.xlabel("Actual")
        plt.ylabel("Predicted")
        plt.title("Multiple Linear Regression: Actual vs Predicted")
        plt.show()

elif choice == "3":
    print("\nLOGISTIC REGRESSION")
    target = input("Enter target (categorical) variable name: ")
    features = input("Enter feature columns separated by commas: ").split(',')

    features = [f.strip() for f in features]

    if target not in df.columns or not all(f in df.columns for f in features):
        print("Invalid column names.")
    else:
    
        if not is_num(df[target]):
            y = df[target].astype('category').cat.codes
            print("Converted target classes to numeric codes:", list(enumerate(df[target].astype('category').cat.categories)))
        else:
            y = df[target]

        X = pd.get_dummies(df[features], drop_first=True)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = LogisticRegression(max_iter=1000)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        print("\n Model Summary:")
        print("Intercept:", model.intercept_)
        print("Coefficients:")
        print(pd.DataFrame(model.coef_, columns=X.columns))
        print("\nAccuracy:", accuracy_score(y_test, y_pred))
        print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
        print("\nClassification Report:\n", classification_report(y_test, y_pred))

        # Visualization
        sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap="Blues", fmt="d")
        plt.title("Confusion Matrix - Logistic Regression")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.show()

else:
    print("Invalid choice.")


Please provide the path to your CSV file:


Enter file path:  D:\College\FDS Experiments\archive\Iris.csv



Dataset loaded successfully!
Shape: (150, 6)

Columns:
 ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']

Which regression would you like to perform?
1️. Simple Linear Regression (1 predictor, 1 target)
2️. Multiple Linear Regression (multiple predictors)
3️. Logistic Regression (classification)


Enter your choice (1/2/3):  2



MULTIPLE LINEAR REGRESSION


Enter target (dependent) variable name:  Species
Enter feature (independent) columns separated by commas:  PetalWidthCm


KeyError: "None of [Index(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',\n       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',\n       'Iris-setosa', 'Iris-setosa',\n       ...\n       'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica',\n       'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica',\n       'Iris-virginica', 'Iris-virginica'],\n      dtype='object', length=150)] are in the [columns]"