In [None]:
import pandas as pd

# Loads file CSV
df = pd.read_csv('data/buildings.csv')

# Shose the first 5 rows
df.head()


Unnamed: 0,CO2_emissions,energy_use,building_size,sustainable
0,120,4000,85,yes
1,180,6000,120,no
2,90,3500,75,yes
3,220,7000,150,no
4,110,3700,80,yes


In [2]:
# Convert the 'sustainable' column from text ('yes'/'no') to numeric values (1/0)
df['sustainable'] = df['sustainable'].map({'yes': 1, 'no': 0})

# Show the updated dataframe
df.head()


Unnamed: 0,CO2_emissions,energy_use,building_size,sustainable
0,120,4000,85,1
1,180,6000,120,0
2,90,3500,75,1
3,220,7000,150,0
4,110,3700,80,1


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Separate features (input data) and target (label to predict)
X = df[['CO2_emissions', 'energy_use', 'building_size']]
y = df['sustainable']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a Decision Tree Classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Show accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Model accuracy:", accuracy)


Model accuracy: 1.0


In [5]:
# Predict if a new building is sustainable
# Example: CO2 = 150, energy = 5000, size = 100
import pandas as pd

new_building = pd.DataFrame([[150, 5000, 100]], columns=['CO2_emissions', 'energy_use', 'building_size'])
prediction = model.predict(new_building)

# Show result
if prediction[0] == 1:
    print("✅ This building is likely SUSTAINABLE.")
else:
    print("❌ This building is likely NOT sustainable.")


✅ This building is likely SUSTAINABLE.


In [None]:
import joblib

# Save model
joblib.dump(model, 'model.pkl')

# Later, you can load it like this:
# model = joblib.load('model.pkl')
