In [None]:
#Q1 : Linear Regression - Predicting House Prices
#1.1: Preprocess the Data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

# Load the dataset
df = pd.read_csv('train.csv')

# Use only OverallQual as feature and SalePrice as target
X = df[['OverallQual']].values
y = df['SalePrice'].values

# Split the data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#1.2: Train Linear Regression from Scratch
def train_linear_regression(X, y):
    x_mean = np.mean(X)
    y_mean = np.mean(y)

    numerator = np.sum((X - x_mean) * (y - y_mean))
    denominator = np.sum((X - x_mean) ** 2)

    slope = numerator / denominator
    intercept = y_mean - slope * x_mean

    return slope, intercept

# Train on training set
slope, intercept = train_linear_regression(X_train.flatten(), y_train)

# Predict function
def predict(X, slope, intercept):
    return slope * X + intercept

# Predict on training data
y_train_pred_custom = predict(X_train.flatten(), slope, intercept)

#1.3: Test your Custom Model
# Predict on test data
y_test_pred_custom = predict(X_test.flatten(), slope, intercept)

# Calculate MSE
mse_custom = mean_squared_error(y_test, y_test_pred_custom)
print(f"Custom Model Test MSE: {mse_custom:.2f}")

#1.4: Train and Test Sklearn's Linear Regression Model
# Train sklearn model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict
y_train_pred_sklearn = lr.predict(X_train)
y_test_pred_sklearn = lr.predict(X_test)

# MSE
mse_sklearn = mean_squared_error(y_test, y_test_pred_sklearn)
print(f"Sklearn Model Test MSE: {mse_sklearn:.2f}")

#1.5: Create Plots
#1.5.1 - Train Comparison
plt.figure(figsize=(10, 6))
plt.scatter(X_train, y_train, color='gray', alpha=0.5, label='Actual Train Data')
plt.plot(X_train, y_train_pred_custom, color='red', label='Custom Model')
plt.plot(X_train, y_train_pred_sklearn, color='blue', linestyle='--', label='Sklearn Model')
plt.xlabel('OverallQual')
plt.ylabel('SalePrice')
plt.title('Train Set: Custom vs Sklearn Linear Regression')
plt.legend()
plt.show()

#1.5.2 - Test Comparison
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='gray', alpha=0.5, label='Actual Test Data')
plt.plot(X_test, y_test_pred_custom, color='red', label='Custom Model')
plt.plot(X_test, y_test_pred_sklearn, color='blue', linestyle='--', label='Sklearn Model')
plt.xlabel('OverallQual')
plt.ylabel('SalePrice')
plt.title('Test Set: Custom vs Sklearn Linear Regression')
plt.legend()
plt.show()


In [None]:
#Q2 : Logistic Regression - Predicting Student Pass/Fail Outcome
#2.1: Preprocess the Data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv('StudentsPerformance.csv')

# Create the target column
df['PassedMath'] = (df['math score'] >= 50).astype(int)

# Use only 'reading score' as feature
X = df[['reading score']].values
y = df['PassedMath'].values

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#2.2: Train Logistic Regression from Scratch
# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Training function
def train_logistic_regression(X, y, lr=0.01, iterations=1000):
    m = len(y)
    X_b = np.c_[np.ones((m, 1)), X]  # add bias term
    theta = np.zeros((X_b.shape[1], 1))

    y = y.reshape(-1, 1)

    for _ in range(iterations):
        z = X_b.dot(theta)
        h = sigmoid(z)
        gradient = X_b.T.dot(h - y) / m
        theta -= lr * gradient

    return theta

# Train the custom model
theta = train_logistic_regression(X_train, y_train)

#2.3: Test your Custom Model
# Predict function
def predict(X, theta):
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    probs = sigmoid(X_b.dot(theta))
    return (probs >= 0.5).astype(int)

# Predict on test set
y_pred_custom = predict(X_test, theta)

# Accuracy and Confusion Matrix
acc_custom = accuracy_score(y_test, y_pred_custom)
cm_custom = confusion_matrix(y_test, y_pred_custom)
print(f"Custom Logistic Regression Accuracy: {acc_custom:.2f}")

# Plot Confusion Matrix
disp_custom = ConfusionMatrixDisplay(confusion_matrix=cm_custom)
disp_custom.plot()
plt.title("Confusion Matrix - Custom Logistic Regression")
plt.show()

#2.4: Train and Test Sklearn's Logistic Regression Model
# Train sklearn model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict
y_pred_sklearn = model.predict(X_test)

# Accuracy and Confusion Matrix
acc_sklearn = accuracy_score(y_test, y_pred_sklearn)
cm_sklearn = confusion_matrix(y_test, y_pred_sklearn)
print(f"Sklearn Logistic Regression Accuracy: {acc_sklearn:.2f}")

# Plot Confusion Matrix
disp_sklearn = ConfusionMatrixDisplay(confusion_matrix=cm_sklearn)
disp_sklearn.plot()
plt.title("Confusion Matrix - Sklearn Logistic Regression")
plt.show()

#1.5: Additional Model Evaluation
def print_metrics(name, y_true, y_pred):
    print(f"\n{name}")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print("F1 Score:", f1_score(y_true, y_pred))

print_metrics("Custom Logistic Regression", y_test, y_pred_custom)
print_metrics("Sklearn Logistic Regression", y_test, y_pred_sklearn)
