In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('housing_data.csv')
df.head()
df.info()
df.describe()
sns.pairplot(df)

correlation_matrix = df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
# Histograms of individual features
df.hist(figsize=(12, 10), bins=20)
plt.show()

# Box plots to check for outliers
plt.figure(figsize=(12, 10))
for i, column in enumerate(df.columns):
    plt.subplot(3, 3, i+1)
    sns.boxplot(df[column])
    plt.title(column)
plt.tight_layout()
plt.show()
x = df[['MedInc']]
y = df['Price']
x_test,x_train,y_test,y_train = train_test_split(x,y,test_size=0.2,random_state=42)
simple_model=LinearRegression()
simple_model.fit(x_train,y_train)
# Predictions
y_pred = simple_model.predict(x_test)

# Evaluate the model
print(f'Mean Squared Error: {mean_squared_error(y_test, y_pred)}')
print(f'R^2 Score: {r2_score(y_test, y_pred)}')

# Plot the regression line
plt.scatter(x_test, y_test, color='blue', label='Actual data')
plt.plot(x_test, y_pred, color='red', linewidth=2, label='Regression line')
plt.xlabel('MedInc')
plt.ylabel('Price')
plt.title('Simple Linear Regression')
plt.legend()
plt.show()
# Assuming the model has already been trained and the relevant libraries have been imported

# Input MedInc value from the user
medinc_value = float(input("Enter the MedInc value: "))

# Convert the input to a 2D array for the model
medinc_value_array = np.array([[medinc_value]])

# Predict the house price using the trained model
predicted_price = simple_model.predict(medinc_value_array)

# Output the prediction
print(f"The predicted house price for MedInc value of {medinc_value} is: ${predicted_price[0]:.2f}")