<a href="https://colab.research.google.com/github/abdiwaberi33/assignment-6/blob/main/Copy_of_Assignment_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# link to my GitHub repository:  https://colab.research.google.com/drive/1Adzx7XRBBaTziu6gmE00N7x9Mkjn4RDT?usp=sharing

# 1. Imports
# Start by importing all necessary libraries for data handling, visualization and model building.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
# 2. Data Collection and Loading
# Load the 'Boston Housing' dataset from sklearn and convert it into a pandas DataFrame.
# Load dataset
boston = load_boston()
data = pd.DataFrame(boston.data, columns=boston.feature_names)
data['MEDV'] = boston.target  # Add target variable


In [None]:
# 3. Quick Check of Data
# Display the first few rows and inspect the dataset structure.
# Display first 5 rows
print(data.head())
# Check data types and statistics
print(data.info())
print(data.describe())


In [None]:
# 4. EDA and Data Preprocessing
# Check for missing values:
print(data.isnull().sum())


In [None]:
# Visualize relationships between features and target:
# Function to automate scatter plots
def plot_features_vs_target(features, target, data):
    for feature in features:
        plt.figure(figsize=(6, 4))
        plt.scatter(data[feature], data[target], alpha=0.5)
        plt.xlabel(feature)
        plt.ylabel(target)
        plt.title(f'{feature} vs {target}')
        plt.show()

# Example usage
features_to_plot = ['RM', 'LSTAT', 'AGE', 'CRIM']
plot_features_vs_target(features_to_plot, 'MEDV', data)


In [None]:
# 5. ML Model Training
# Split the dataset into training and testing sets:
X = data.drop('MEDV', axis=1)
y = data['MEDV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Choose and train a regression model (Random Forest example):
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


In [None]:
# 6. Model Evaluation
# Evaluate performance using RMSE and R-squared:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f'RMSE: {rmse:.2f}')
print(f'R-squared: {r2:.2f}')


In [None]:
# Parameter Tuning (Optional):
# Example: GridSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [50, 100], 'max_depth': [None, 10]}
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_


In [None]:
# 7. Model Prediction
# Predict house prices for new data:
new_data = [[0.2, 12.5, 7.07, 0, 0.5, 6.5, 68, 4.0, 2, 250, 17, 400, 12]]  # Example input
predicted_price = model.predict(new_data)
print(f'Predicted Price: ${predicted_price[0] * 1000:.2f}')
