# CO2-Prediction in Real Estate

## Initial Setup

In [None]:
# Importing the required algorithms and software libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [None]:
# Loading the data
df = pd.read_csv("https://raw.githubusercontent.com/casbdai/datasets/refs/heads/main/co2_data_eng.csv")

## Data Exploration

In [None]:
df.head()

In [None]:
df.info()

## Lab Session 1: Training a decision tree

In [None]:
# Defining the "outcome" and the "features"
X = df.drop('CO2', axis=1)
y = df['CO2']

In [None]:
X

In [None]:
y

In [None]:
# Initialialising and training the decision tree
tree = DecisionTreeRegressor(max_depth=2)
tree.fit(X, y)

In [None]:
# Plotting the model
plt.figure(figsize=(12, 8))
plot_tree(tree, feature_names=X.columns, filled=True)
plt.show()

In [None]:
tree.predict(X)

### Let's play a little with hyperparameters

Hyperparameters to be varied:

- **max_depth:** Allowed number of maximum splits
- **min_samples_leaf:** The minimum number of customers that must be present in a leaf node.

In [None]:
# Adapting Hyperparameters
tree = DecisionTreeRegressor(max_depth=___, min_samples_leaf=___) #replace "___" with a number
tree.fit(X, y)

In [None]:
# Plotting the model
plt.figure(figsize=(20, 15))
plot_tree(tree, feature_names=X.columns, filled=True)
plt.show()

## Lab Session 2: Evaluating the "accuracy" of our prediction model
To determine whether the predictions made are accurate, we need to split our data into testing and training data. We have already created two partial data sets in a previous step: y and X. These are now split into a testing and a training part.

In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2)

In [None]:
# Initialize and train the regressor on the training set
regressor = DecisionTreeRegressor(max_depth=10, random_state=2)
regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = regressor.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error: {rmse}")

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Mean Absolute Percentage Error: {mape}")

## Lab Session 3: Training a Neural Network

In [None]:
# Defining the Neural Network
model = Sequential([
    Dense(46, activation='relu', input_shape=(46,)),  # input layer with 46 nodes
    Dense(32, activation='relu'),                     # hidden layer with 32 nodes
    Dense(16, activation='relu'),                     # hideen layer with 16 nodes
    Dense(1)                                          # output layer with 1 node
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.2)

In [None]:
y_pred =model.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error: {rmse}")

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Mean Absolute Percentage Error: {mape}")