# Regression Assignment Solution
This notebook contains solutions to the regression assignment tasks: linear, multiple, polynomial, and logistic regression.

Name: Suman Budhathoki
Studenmt ID: 0284292

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

sns.set(style='whitegrid')

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/nurfnick/Data_Sets_For_Stats/master/CuratedDataSets/hockey.csv')
df.head()

## Linear Regression: Goals vs Points

In [None]:
X = df[['G']]
y = df['PTS']
lin_reg = LinearRegression().fit(X, y)

print('Intercept:', lin_reg.intercept_)
print('Coefficient:', lin_reg.coef_)

# Predictions
preds = lin_reg.predict(X)

plt.scatter(X, y, label='Data')
plt.plot(X, preds, color='red', label='Linear Fit')
plt.xlabel('Goals')
plt.ylabel('Points')
plt.title('Linear Regression: Goals vs Points')
plt.legend()
plt.show()

print('R^2:', r2_score(y, preds))

## Multiple Regression: Goals + Assists vs Points

In [None]:
X_multi = df[['G','A']]
y = df['PTS']

multi_reg = LinearRegression().fit(X_multi, y)

print('Intercept:', multi_reg.intercept_)
print('Coefficients:', multi_reg.coef_)

preds_multi = multi_reg.predict(X_multi)

print('R^2:', r2_score(y, preds_multi))

## Polynomial Regression: Quadratic (Goals vs Points)

In [None]:
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(df[['G']])
poly_reg = LinearRegression().fit(X_poly, y)

print('Intercept:', poly_reg.intercept_)
print('Coefficients:', poly_reg.coef_)

preds_poly = poly_reg.predict(X_poly)

plt.scatter(df['G'], y, label='Data')
plt.scatter(df['G'], preds_poly, color='red', s=10, label='Poly Fit')
plt.xlabel('Goals')
plt.ylabel('Points')
plt.title('Polynomial Regression (Quadratic): Goals vs Points')
plt.legend()
plt.show()

print('R^2:', r2_score(y, preds_poly))

## Logistic Regression: Playoffs Prediction Based on Points

In [None]:
# Create a binary target: 1 if points > median, else 0
threshold = df['PTS'].median()
df['Playoffs'] = (df['PTS'] > threshold).astype(int)

X_log = df[['PTS']]
y_log = df['Playoffs']

log_reg = LogisticRegression().fit(X_log, y_log)

print('Intercept:', log_reg.intercept_)
print('Coefficient:', log_reg.coef_)

preds_log = log_reg.predict(X_log)

plt.scatter(df['PTS'], y_log, label='Actual')
plt.scatter(df['PTS'], preds_log, color='red', alpha=0.5, label='Predicted')
plt.xlabel('Points')
plt.ylabel('Playoffs (0/1)')
plt.title('Logistic Regression: Playoffs Prediction')
plt.legend()
plt.show()

## Analysis & Discussion
- **Linear vs Multiple Regression:** Multiple regression (Goals + Assists) explains points better than goals alone.
- **Polynomial Regression:** The quadratic model can capture curvature, but may risk overfitting compared to linear.
- **Logistic Regression:** Provides a way to model playoff likelihood (binary outcome) from points.

Overall, each model has trade-offs in interpretability and predictive power.