In [None]:
# %%
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.linear_model import LinearRegression
from sklearn import metrics 



# %% [markdown]
# Load Data in a Data Frame

# %%
# Load CSV file

def load_dataset():
    dataset = pd.read_csv('boston (3).csv')
    return dataset
dataset=load_dataset()
# %% [markdown]
# Print First Rows

# %%
def print_summarize_dataset(dataset):
    """
    Print basic descriptive information about a dataset.
    """
    print("Dataset shape:", dataset.shape)
    print("\nFirst 10 rows:")
    print(dataset.head(10))

    print("\nSummary statistics:")
    print(dataset.describe(include="all"))
print_summarize_dataset(dataset)


# %% [markdown]
# Clean the dataset

# %%
def clean_dataset(dataset):
    return dataset.dropna()

dataset = clean_dataset(dataset)

# %% [markdown]
# Generate histograms

# %%
def print_histograms(dataset):
    dataset.hist(figsize=(12, 10), bins=20)
    plt.tight_layout()
    plt.show()

print_histograms(dataset)

# %% [markdown]
# Correlation Matrix

# %%
def compute_correlations_matrix(boston_dataframe):
    correlations_matrix = boston_dataframe.corr(method='pearson')
    shape = correlations_matrix.shape
    print(shape)
    correlations_dict = correlations_matrix.to_dict()
    return correlations_matrix

correlations = compute_correlations_matrix(dataset)

print(correlations['MDEV'])

# %% [markdown]
# Scatter Matrix

# %%

def print_scatter_matrix(boston_dataframe):
    sns.set(style='ticks')
    sns.pairplot(boston_dataframe)
    plt.show()
    
print_scatter_matrix(dataset)

# %%
def plot_scatter(dataset, x_col, y_col):
    """
    Plots a scatter plot for 2 given columns
    """
    plt.figure(figsize=(8, 6))
    plt.scatter(dataset[x_col], dataset[y_col], alpha=0.6)
    plt.title(f"{y_col} vs. {x_col}")
    plt.xlabel(x_col)
    plt.ylabel(y_col)
    plt.grid()
    plt.show()

    plot_scatter(dataset, 'RM', 'MDEV')
    plot_scatter(dataset, 'AGE', 'MDEV')
    plot_scatter(dataset, 'LSAT', 'MDEV')
    plot_scatter(dataset, 'CRIM', 'MDEV')


# %% [markdown]
# Model Training

# %%
def boston_fit_model(dataset):
    model_dataset = dataset[["RM", "MDEV"]]
    x = model_dataset.iloc[:, :-1].values
    y = model_dataset.iloc[:, 1].values
    regressor = LinearRegression()
    regressor.fit(x, y)
    return regressor

model = boston_fit_model(dataset)

# %% [markdown]
# 

# %% [markdown]
# Prediction

# %%

def boston_predict(estimator, array_to_predict):
    X = np.array(array_to_predict)
    if X.ndim == 1:
        X = X.reshape(1, -1)
    return estimator.predict(X)
data = dataset.iloc[:, :-1]   
estimator = boston_fit_model(dataset.iloc[:, :-1])
print(boston_predict(estimator, data))

# %% [markdown]
# Model Evaluation

# %%
def print_model_prediction_evaluator(base_test, prediction):
    print("Mean Absolute Error:", metrics.mean_absolute_error(base_test, prediction))
    print("Mean Squared Error:", metrics.mean_squared_error(base_test, prediction))
    print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(base_test, prediction)))

# Example usage with dummy data
base_test = [24, 21, 28]
predictions = [23.5, 21.2, 28.1]
print_model_prediction_evaluator(base_test, predictions)
