# House Price Prediction
This notebook walks through the process of predicting house prices using machine learning.

## 1. Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

## 2. Loading the Dataset

In [None]:
df = pd.read_csv('/mnt/data/kc_house_data.csv')
df.head()

## 3. Exploratory Data Analysis (EDA)

In [None]:

# Check for missing values
df.isnull().sum()

# Summary statistics
df.describe()

# Plotting distribution of target variable (Price)
sns.histplot(df['price'], bins=50, kde=True)
plt.title('Distribution of House Prices')
plt.show()


## 4. Data Preprocessing

In [None]:

# Handling missing values
df = df.fillna(0)

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Feature selection for the model
features = ['bedrooms', 'bathrooms', 'sqft_living', 'floors', 'waterfront', 'view', 
            'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 
            'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15']
target = 'price'

X = df[features]
y = df[target]

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## 5. Model Building and Training

In [None]:

# Build a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting on the test set
y_pred = model.predict(X_test)


## 6. Model Evaluation

In [None]:

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Visualizing the predicted vs actual prices
plt.scatter(y_test, y_pred, alpha=0.3)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Actual vs Predicted Prices')
plt.show()
