In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics

df = pd.read_csv('kc_house_data.csv', encoding="ISO-8859-1")

df['date'] = pd.to_datetime(df['date']).dt.year
df['age'] = df['date'] - df['yr_built']

df.drop(['id', 'yr_built', 'date'], axis=1, inplace=True)

plt.figure(figsize=(20, 10))
sns.histplot(df.price, kde=False)
plt.show()

sns.histplot(df.age, kde=False)
sns.countplot(x='grade', data=df)
sns.boxenplot(x="sqft_living", data=df)


features = ["sqft_living", "grade", "bathrooms", "sqft_living15"]

for feature in features:
    x = df[feature].values.reshape(-1, 1)
    y = df["price"].values

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.45, random_state=45)
 
    model = LinearRegression()
    model.fit(x_train, y_train)
    predicted = model.predict(x_test)

    plt.scatter(x, y, color="b")
    plt.title(f"Linear Regression for {feature}")
    plt.ylabel("Price")
    plt.xlabel(feature)
    plt.plot(x, model.predict(x), color="k")
    plt.show()

    print(f"MSE for {feature}: {mean_squared_error(y_test, predicted)}")
    print(f"R squared for {feature}: {r2_score(y_test, predicted)}")

  
    poly = PolynomialFeatures(degree=3)
    x_poly = poly.fit_transform(x_train)
    model_poly = LinearRegression()
    model_poly.fit(x_poly, y_train)

    x_poly_test = poly.fit_transform(x_test)
    predicted_poly = model_poly.predict(x_poly_test)

    print(f"MSE (Polynomial) for {feature}: {mean_squared_error(y_test, predicted_poly)}")
    print(f"R squared (Polynomial) for {feature}: {r2_score(y_test, predicted_poly)}")


    plt.scatter(x, y, color="b")
    plt.title(f"Polynomial Regression for {feature}")
    plt.ylabel("Price")
    plt.xlabel(feature)
    plt.plot(x, model_poly.predict(poly.fit_transform(x)), color="r")
    plt.show()
