# Exercises - Carbon Footprint Regression

## Import Libraries

In [None]:
import pandas as pd
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

## Import Data

In [None]:
df = pd.read_csv("./exercises-carbon-footprint-regression-dataset.csv")
pd.set_option('display.max_columns', None)
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isna().sum()

In [None]:
df.dtypes

In [None]:
df.nunique()

In [None]:
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoder = LabelEncoder()
for column in categorical_columns:
    df[column] = label_encoder.fit_transform(df[column])

In [None]:
correlations = df.corr(method='pearson')
plt.figure(figsize=(16, 12))
sns.heatmap(correlations, cmap="coolwarm", annot=True)
plt.show()

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Models Training

In [None]:
%%time
linearregression = LinearRegression()
decisiontreeregression = DecisionTreeRegressor()
supportvectorregression = SVR(kernel='rbf')
randomforestregression = RandomForestRegressor()
xgbregression = XGBRegressor()

linearregression.fit(X_train, y_train)
decisiontreeregression.fit(X_train, y_train)
supportvectorregression.fit(X_train, y_train)
randomforestregression.fit(X_train, y_train)
xgbregression.fit(X_train, y_train)

## Models Predict

In [None]:
%%time
y_lin = linearregression.predict(X_test)
y_dectree = decisiontreeregression.predict(X_test)
y_supvec = supportvectorregression.predict(X_test)
y_randfor = randomforestregression.predict(X_test)
y_xgb = xgbregression.predict(X_test)

## Scores

In [None]:
data1 = {"Regression Algorithms": ["Linear Regression", "Decision Tree Regression", 
                                       "Support Vector Regression", "Random Forest Classifier",
                                       "XGB Regression"],
      "Score": [r2_score(y_test,y_lin), r2_score(y_test, y_dectree), 
                r2_score(y_test, y_supvec), r2_score(y_test,y_randfor),
                r2_score(y_test, y_xgb) ]}

score = pd.DataFrame(data1)
print('r_squared metrics')
print(score)

In [None]:
data2 = {"Regression Algorithms": ["Linear Regression", "Decision Tree Regression", 
                                       "Support Vector Regression", "Random Forest Classifier",
                                       "XGB Regression"],
      "Score": [mean_absolute_error(y_test,y_lin), mean_absolute_error(y_test, y_dectree), 
                mean_absolute_error(y_test, y_supvec), mean_absolute_error(y_test,y_randfor),
                mean_absolute_error(y_test, y_xgb) ]}


score2 = pd.DataFrame(data2)
print('mean absolute error')
print(score2)

## Neural Network

In [None]:
model = Sequential()

model.add(Dense(256, activation='relu',input_dim=19))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='linear'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer = 'adam', loss = 'mean_absolute_error', metrics = ['mean_absolute_error'])

## Model Train

In [None]:
%%time 
model.fit(X_train, y_train, epochs=50)

## Model Predict

In [None]:
y_ann = model.predict(X_test)

In [None]:
mean_absolute_error(y_test, y_ann)