# Bias-Variance Analysis for Book Sales Prediction
This notebook is divided into tasks as per the project requirements.

## Task 1: Setup Environment and Install Libraries

In [None]:
!pip install numpy scikit-learn matplotlib pandas

## Task 2: Load and Prepare Data

In [None]:
import numpy as np
import pickle
with open('train_data.pkl', 'rb') as f:
    train_data = pickle.load(f)
with open('test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)
X_train, y_train = train_data[:, 0].reshape(-1, 1), train_data[:, 1]
X_test, y_test = test_data[:, 0].reshape(-1, 1), test_data[:, 1]

## Task 3: Resampling Data into 15 Subsets

In [None]:
np.random.seed(42)
subsets = []
for _ in range(15):
    indices = np.random.choice(len(X_train), len(X_train)//15, replace=True)
    subsets.append((X_train[indices], y_train[indices]))

## Task 4: Training Models and Calculating Bias, Variance, and Irreducible Error

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
def calculate_bias_variance(poly_degree, subsets, X_test, y_test):
    predictions = []
    for X_sub, y_sub in subsets:
        poly = PolynomialFeatures(degree=poly_degree)
        X_poly_sub = poly.fit_transform(X_sub)
        model = LinearRegression().fit(X_poly_sub, y_sub)
        X_poly_test = poly.transform(X_test)
        predictions.append(model.predict(X_poly_test))
    predictions = np.array(predictions)
    mean_prediction = np.mean(predictions, axis=0)
    bias = np.mean((y_test - mean_prediction)**2)
    variance = np.mean(np.var(predictions, axis=0))
    irreducible_error = np.mean((y_test - mean_prediction)**2 - bias - variance)
    return bias, variance, irreducible_error

In [None]:
biases, variances, irreducible_errors = [], [], []
for degree in range(1, 11):
    bias, var, err = calculate_bias_variance(degree, subsets, X_test, y_test)
    biases.append(bias)
    variances.append(var)
    irreducible_errors.append(err)

## Task 5: Plotting Bias² - Variance Graph

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), biases, label='Bias²', marker='o')
plt.plot(range(1, 11), variances, label='Variance', marker='o')
plt.plot(range(1, 11), [b+v for b, v in zip(biases, variances)], label='MSE', marker='o')
plt.xlabel('Polynomial Degree')
plt.ylabel('Error')
plt.title('Bias-Variance Tradeoff Analysis')
plt.legend()
plt.grid(True)
plt.show()

## Task 6: Tabulation and Observations

In [None]:
import pandas as pd
results = pd.DataFrame({
    'Polynomial Degree': range(1, 11),
    'Bias²': biases,
    'Variance': variances,
    'Irreducible Error': irreducible_errors
})
print(results)

### Write your detailed analysis and observations here.