# **Decision Tree Regressor**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

# Load the dataset
URL = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%203/data/real_estate_data.csv"
data = pd.read_csv(URL)

# Create a DataFrame and handle missing values
data_cleaned = data.dropna()

# Check for missing values after cleaning
print("Missing values after cleaning:")
print(data_cleaned.isna().sum())

# Define features (X) and target (Y)
X = data_cleaned.drop(columns=["MEDV"])
Y = data_cleaned["MEDV"]

# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

# Initialize the DecisionTreeRegressor with 'absolute_error' criterion
regression_tree = DecisionTreeRegressor(criterion="absolute_error")

# Train the model on the training data
regression_tree.fit(X_train, Y_train)

# Evaluate the model's performance on the test set
score = regression_tree.score(X_test, Y_test)
print("Model R-squared score on test set:", score)

# Make predictions on the test set
predictions = regression_tree.predict(X_test)

# Output the predictions
print("Predictions on the test set:", predictions)

# Calculate and print the mean absolute error (MAE) in dollars
mae = (predictions - Y_test).abs().mean() * 1000  # Convert MAE to dollars
print(f"Mean Absolute Error (MAE): ${mae:,.2f}")


Missing values after cleaning:
CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
LSTAT      0
MEDV       0
dtype: int64
Model R-squared score on test set: 0.868782408318937
Predictions on the test set: [29.6 23.3 14.9 13.4 43.1 44.8 26.6 20.1 26.6 18.2 14.3 17.4 10.8 46.
 14.9 25.3 19.9 24.8 15.  24.1 30.1 17.4 13.8 24.3 15.   5.  19.4  5.
 23.6 25.  12.6 19.8 25.  19.8 33.4 19.7 46.  12.5 33.2 13.1 34.9 23.
 13.8 17.8 14.  45.4 18.8 24.8 24.3 19.5 21.7 30.8 23.9 22.6  6.3 26.7
  8.4 21.  13.5 16.3 10.9 23.3 34.9 19.6 32.  28.7 12.  20.4 11.3 14.4
 23.8 43.1 15.  21.  13.1 10.9 21.  19.5 19.3]
Mean Absolute Error (MAE): $2,678.48
