In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Prepare features and target variable
X = train_data.drop(columns=['ID', 'medv'])
y = train_data['medv']
X_test = test_data.drop(columns=['ID'], errors='ignore')

# Split the training data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_valid_pred = model.predict(X_valid_scaled)
mse = mean_squared_error(y_valid, y_valid_pred)
r2 = r2_score(y_valid, y_valid_pred)
print(f"Linear Regression - MSE: {mse:.2f}, R2 Score: {r2:.2f}")

# Make predictions on the test data
y_test_pred = model.predict(X_test_scaled)

# Save predictions to CSV
output = pd.DataFrame({'ID': test_data['ID'], 'medv': y_test_pred})
output.to_csv("linear_regression_predictions.csv", index=False)
print("Predictions have been saved.")
