In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.read_csv('Carbon_Emission.csv',sep=";")
df.head()

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables
le = LabelEncoder()
for column in df.select_dtypes(include=['object']).columns:
    df[column] = le.fit_transform(df[column])

# Define features and target variable
X = df.drop(columns=['CarbonEmission'])
y = df['CarbonEmission']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Decision Tree
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [3]:
importances = clf.feature_importances_
feature_names = X.columns
sorted_indices = importances.argsort()[::-1]

for idx in sorted_indices:
    print(f'{feature_names[idx]}: {importances[idx]}')

Vehicle Monthly Distance Km: 0.07842697367195307
Monthly Grocery Bill: 0.07781014257148544
How Long TV PC Daily Hour: 0.0707761271642133
How Long Internet Daily Hour: 0.06968215748199433
How Many New Clothes Monthly: 0.06867498253249885
Cooking_With: 0.06798594559824206
Recycling: 0.06785707988412881
Waste Bag Weekly Count: 0.05699435435358447
Waste Bag Size: 0.05259446511288646
Body Type: 0.05073995189111814
Heating Energy Source: 0.05063160421634086
How Often Shower: 0.05052017127517062
Diet: 0.047912750176557564
Social Activity: 0.04345963494068061
Energy efficiency: 0.04208257349910269
Sex: 0.03462614033696919
Transport: 0.02550080300857217
Vehicle Type: 0.024686609698711667
Frequency of Traveling by Air: 0.01903753258578959


In [12]:

df.iloc[1]

Body Type                           1
Sex                                 0
Diet                                3
How Often Shower                    1
Heating Energy Source               2
Transport                           2
Vehicle Type                        5
Social Activity                     1
Monthly Grocery Bill              114
Frequency of Traveling by Air       2
Vehicle Monthly Distance Km         9
Waste Bag Size                      0
Waste Bag Weekly Count              3
How Long TV PC Daily Hour           9
How Many New Clothes Monthly       38
How Long Internet Daily Hour        5
Energy efficiency                   0
Recycling                           2
Cooking_With                        9
CarbonEmission                   1892
Name: 1, dtype: int64

In [16]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

# Erstellen Sie einen Dictionary von LabelEncodern für jede kategoriale Spalte
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Definieren Sie Features und Zielvariable
X = df.drop(columns=['CarbonEmission'])
y = df['CarbonEmission']

# Teilen Sie die Daten in Training- und Test-Sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Trainieren Sie den Decision Tree
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)



# Extrahieren Sie den user_input
user_input = df.iloc[1].drop('CarbonEmission')

# Stellen Sie sicher, dass user_input die gleiche Enkodierung wie das Training hat
for column in user_input.index:
    if column in label_encoders:
        user_input[column] = label_encoders[column].transform([user_input[column]])[0]

# Konvertieren Sie user_input in ein DataFrame und stellen Sie sicher, dass die gleiche Reihenfolge der Features beibehalten wird
user_input_df = pd.DataFrame([user_input])

# Treffen Sie Vorhersagen mit dem trainierten Decision Tree
prediction = clf.predict(user_input_df)

# Finden Sie die drei wichtigsten Features für Reduktionsempfehlungen
importances = clf.feature_importances_
feature_names = X.columns
sorted_indices = np.argsort(importances)[::-1]

# Vorschläge generieren
top_features = sorted_indices[:3]
suggestions = [feature_names[i] for i in top_features]

print(f'Um Ihre Kohlenstoffemissionen zu reduzieren, könnten Sie: {suggestions}')

def get_reduction_suggestions(user_input):
    # Transformieren Sie den user_input
    for column in user_input.index:
        if column in label_encoders:
            user_input[column] = label_encoders[column].transform([user_input[column]])[0]
    
    user_input_df = pd.DataFrame([user_input])
    prediction = clf.predict(user_input_df)
    top_features = sorted_indices[:3]
    suggestions = [feature_names[i] for i in top_features]
    return suggestions

# Extrahieren Sie den user_input aus dem DataFrame
user_input = df.iloc[1].drop('CarbonEmission')

# Generieren Sie Vorschläge
reduction_suggestions = get_reduction_suggestions(user_input)
print(f'Um Ihre Kohlenstoffemissionen zu reduzieren, könnten Sie: {reduction_suggestions}')


Um Ihre Kohlenstoffemissionen zu reduzieren, könnten Sie: ['Vehicle Monthly Distance Km', 'Monthly Grocery Bill', 'How Long TV PC Daily Hour']
Um Ihre Kohlenstoffemissionen zu reduzieren, könnten Sie: ['Vehicle Monthly Distance Km', 'Monthly Grocery Bill', 'How Long TV PC Daily Hour']


In [17]:
df.iloc[1]

Body Type                           1
Sex                                 0
Diet                                3
How Often Shower                    1
Heating Energy Source               2
Transport                           2
Vehicle Type                        5
Social Activity                     1
Monthly Grocery Bill              114
Frequency of Traveling by Air       2
Vehicle Monthly Distance Km         9
Waste Bag Size                      0
Waste Bag Weekly Count              3
How Long TV PC Daily Hour           9
How Many New Clothes Monthly       38
How Long Internet Daily Hour        5
Energy efficiency                   0
Recycling                           2
Cooking_With                        9
CarbonEmission                   1892
Name: 1, dtype: int64

In [19]:
from sklearn.metrics import r2_score


# Berechnen Sie die Vorhersagen für die Trainings- und Testdaten
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)

# Berechnen Sie den R^2-Score für die Trainings- und Testdaten
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

print(f'R^2-Score für Trainingsdaten: {r2_train}')
print(f'R^2-Score für Testdaten: {r2_test}')

R^2-Score für Trainingsdaten: 1.0
R^2-Score für Testdaten: 0.391733400867026
