In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression

def unknown_data(filename):
    """
    Reads a CSV file, fits a Linear Regression model (no intercept),
    and returns correlation coefficients of features vs. response.
    """
    # Load data
    data = pd.read_csv(filename)

    # Separate features and response
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    # Fit linear regression without intercept
    model = LinearRegression(fit_intercept=False)
    model.fit(X, y)

    # Compute correlations
    correlations = X.corrwith(y)

    # Display coefficients and correlations
    print("=== Linear Regression Coefficients ===")
    for i, coef in enumerate(model.coef_, start=1):
        print(f"Feature X{i}: {coef:.4f}")

    print("\n=== Feature–Response Correlations ===")
    for i, corr in enumerate(correlations, start=1):
        print(f"Feature X{i}: {corr:.4f}")

    return model.coef_, correlations

# Step 3: Run the function
file_path = "ah4323_unknown_data.csv"  # adjust if needed
coefficients, correlations = unknown_data(file_path)

# Step 4 (Optional): Check for weak features
print("\n=== Analysis ===")
weak_features = [f"X{i+1}" for i, corr in enumerate(correlations) if abs(corr) < 0.1]
if weak_features:
    print(f"Features that could be dropped (weak correlation): {', '.join(weak_features)}")
else:
    print("All features appear to have reasonable correlation with the response.")


=== Linear Regression Coefficients ===
Feature X1: 15.0000
Feature X2: -2.0000
Feature X3: -0.0000
Feature X4: -7.0000
Feature X5: 19.0000

=== Feature–Response Correlations ===
Feature X1: 0.6065
Feature X2: -0.1033
Feature X3: 0.0082
Feature X4: -0.2841
Feature X5: 0.7560

=== Analysis ===
Features that could be dropped (weak correlation): X3
