In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as gb
import plotly.express as ex
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
Data=pd.read_csv("/kaggle/input/housing-price-prediction-data/housing_price_dataset.csv")
Data

In [None]:
fig=ex.scatter(data_frame=Data,x="SquareFeet",y="Price",color="Neighborhood",size="YearBuilt",trendline="ols")
fig.show()

In [None]:
fig=ex.pie(Data,values="Price",names="Neighborhood")
fig.show()

In [None]:
Data["Neighborhood"].value_counts()
Data["Neighborhood"]=Data["Neighborhood"].map({"Suburb":0,"Rural" :1,"Urban":2}).astype(int)

In [None]:
X=Data.iloc[:,:-1]
y=Data.iloc[:,-1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
lr=LinearRegression()
lr.fit(X_train,y_train)

In [None]:
# Make predictions on the training and testing data
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

In [None]:
# Calculate metrics for evaluation
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [None]:
# Create DataFrames for training and testing datasets
train_data = pd.DataFrame({'Actual': y_train.ravel(), 'Predicted': y_train_pred.ravel(), 'Dataset': 'Training'})
test_data = pd.DataFrame({'Actual': y_test.ravel(), 'Predicted': y_test_pred.ravel(), 'Dataset': 'Testing'})

# Concatenate both DataFrames
combined_data = pd.concat([train_data, test_data], ignore_index=True)

In [None]:
# Plot the results using Seaborn
sns.set(style='ticks')
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Actual', y='Predicted', hue='Dataset', data=combined_data, s=100, alpha=0.8)
plt.plot([min(y.min(), y_train_pred.min()) - 1, max(y.max(), y_train_pred.max()) + 1],
         [min(y.min(), y_train_pred.min()) - 1, max(y.max(), y_train_pred.max()) + 1], 'k--')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs. Predicted (Training and Testing)')
plt.legend()
plt.grid(True)
plt.show()