<a href="https://colab.research.google.com/github/baller01/Project-4/blob/Kamwana/CO2_emissions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install tensorflow



In [2]:
# Import the modules
import pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [4]:
# Read the csv file into a pandas DataFrame
carbon_df = pd.read_csv("CO2_Emissions_Canada.csv")

# Review the DataFrame
carbon_df.head()

Unnamed: 0,Make,Model,Vehicle Class,Engine Size(L),Cylinders,Transmission,Fuel Type,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Fuel Consumption Comb (L/100 km),Fuel Consumption Comb (mpg),CO2 Emissions(g/km)
0,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [5]:
carbon_df.dtypes

Make                                 object
Model                                object
Vehicle Class                        object
Engine Size(L)                      float64
Cylinders                             int64
Transmission                         object
Fuel Type                            object
Fuel Consumption City (L/100 km)    float64
Fuel Consumption Hwy (L/100 km)     float64
Fuel Consumption Comb (L/100 km)    float64
Fuel Consumption Comb (mpg)           int64
CO2 Emissions(g/km)                   int64
dtype: object

In [6]:
co2_df=carbon_df.drop(columns=['Make', 'Model', 'Fuel Consumption Comb (L/100 km)', 'Fuel Consumption Comb (mpg)'])
co2_df

Unnamed: 0,Vehicle Class,Engine Size(L),Cylinders,Transmission,Fuel Type,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),CO2 Emissions(g/km)
0,COMPACT,2.0,4,AS5,Z,9.9,6.7,196
1,COMPACT,2.4,4,M6,Z,11.2,7.7,221
2,COMPACT,1.5,4,AV7,Z,6.0,5.8,136
3,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,255
4,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,244
...,...,...,...,...,...,...,...,...
7380,SUV - SMALL,2.0,4,AS8,Z,10.7,7.7,219
7381,SUV - SMALL,2.0,4,AS8,Z,11.2,8.3,232
7382,SUV - SMALL,2.0,4,AS8,Z,11.7,8.6,240
7383,SUV - STANDARD,2.0,4,AS8,Z,11.2,8.3,232


In [7]:
#Evaluating composition of the column by value counts
co2_df["Vehicle Class"].value_counts()

SUV - SMALL                 1217
MID-SIZE                    1133
COMPACT                     1022
SUV - STANDARD               735
FULL-SIZE                    639
SUBCOMPACT                   606
PICKUP TRUCK - STANDARD      538
TWO-SEATER                   460
MINICOMPACT                  326
STATION WAGON - SMALL        252
PICKUP TRUCK - SMALL         159
MINIVAN                       80
SPECIAL PURPOSE VEHICLE       77
VAN - PASSENGER               66
STATION WAGON - MID-SIZE      53
VAN - CARGO                   22
Name: Vehicle Class, dtype: int64

In [8]:
#Evaluating composition of the column by value counts
co2_df["Transmission"].value_counts()

AS6     1324
AS8     1211
M6       901
A6       789
A8       490
AM7      445
A9       339
AS7      319
AV       295
M5       193
AS10     168
AM6      132
AV7      118
AV6      113
M7        91
A5        84
AS9       77
A4        65
AM8       62
A7        53
AV8       39
A10       31
AS5       26
AV10      11
AM5        4
AM9        3
AS4        2
Name: Transmission, dtype: int64

In [9]:
co2_df.loc[co2_df["CO2 Emissions(g/km)"] <= 255, "CO2"] = 1
co2_df.loc[co2_df["CO2 Emissions(g/km)"] > 255, "CO2"] = 0
co2_df
print(co2_df["CO2"].value_counts())

1.0    4182
0.0    3203
Name: CO2, dtype: int64


In [10]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
# Scaling the numeric columns
co2_scaled_df= StandardScaler().fit_transform(co2_df[["Engine Size(L)", "Cylinders", "Fuel Consumption City (L/100 km)", "Fuel Consumption Hwy (L/100 km)"]])
# Creating a DataFrame with with the scaled data
co2_transformed_df= pd.DataFrame(co2_scaled_df, columns= ["Engine Size(L)", "Cylinders", "Fuel Consumption City (L/100 km)", "Fuel Consumption Hwy (L/100 km)"])

co2_transformed_df

Unnamed: 0,Engine Size(L),Cylinders,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km)
0,-0.856721,-0.883408,-0.759002,-1.052781
1,-0.561317,-0.883408,-0.387577,-0.603202
2,-1.225976,-0.883408,-1.873275,-1.457401
3,0.251043,0.210575,0.040990,0.026208
4,0.251043,0.210575,-0.130437,-0.153624
...,...,...,...,...
7380,-0.856721,-0.883408,-0.530433,-0.603202
7381,-0.856721,-0.883408,-0.387577,-0.333455
7382,-0.856721,-0.883408,-0.244721,-0.198582
7383,-0.856721,-0.883408,-0.387577,-0.333455


In [11]:
# Transform the Card Type column using get_dummies()
dummies_df=pd.get_dummies(co2_df[["Vehicle Class", "Transmission",]])
dummies_df.head()

Unnamed: 0,Vehicle Class_COMPACT,Vehicle Class_FULL-SIZE,Vehicle Class_MID-SIZE,Vehicle Class_MINICOMPACT,Vehicle Class_MINIVAN,Vehicle Class_PICKUP TRUCK - SMALL,Vehicle Class_PICKUP TRUCK - STANDARD,Vehicle Class_SPECIAL PURPOSE VEHICLE,Vehicle Class_STATION WAGON - MID-SIZE,Vehicle Class_STATION WAGON - SMALL,...,Transmission_AS8,Transmission_AS9,Transmission_AV,Transmission_AV10,Transmission_AV6,Transmission_AV7,Transmission_AV8,Transmission_M5,Transmission_M6,Transmission_M7
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Concatenate the df_shopping_transformed and the card_dummies DataFrames
co2_new_df=pd.concat([co2_transformed_df, dummies_df], axis=1)
co2_new_df

Unnamed: 0,Engine Size(L),Cylinders,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Vehicle Class_COMPACT,Vehicle Class_FULL-SIZE,Vehicle Class_MID-SIZE,Vehicle Class_MINICOMPACT,Vehicle Class_MINIVAN,Vehicle Class_PICKUP TRUCK - SMALL,...,Transmission_AS8,Transmission_AS9,Transmission_AV,Transmission_AV10,Transmission_AV6,Transmission_AV7,Transmission_AV8,Transmission_M5,Transmission_M6,Transmission_M7
0,-0.856721,-0.883408,-0.759002,-1.052781,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,-0.561317,-0.883408,-0.387577,-0.603202,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,-1.225976,-0.883408,-1.873275,-1.457401,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0.251043,0.210575,0.040990,0.026208,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.251043,0.210575,-0.130437,-0.153624,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7380,-0.856721,-0.883408,-0.530433,-0.603202,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7381,-0.856721,-0.883408,-0.387577,-0.333455,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7382,-0.856721,-0.883408,-0.244721,-0.198582,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7383,-0.856721,-0.883408,-0.387577,-0.333455,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [13]:
# Split our preprocessed data into our features and target arrays
X=co2_new_df.values
y=co2_df["CO2"].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test= train_test_split(X,y, random_state= 1)

In [None]:
##Compile, Train and Evaluate the Model

In [14]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=20, activation="relu", input_dim= input_features))

# Second hidden layer
#nn.add(tf.keras.layers.Dense(units=nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 20)                960       
                                                                 
 dense_1 (Dense)             (None, 1)                 21        
                                                                 
Total params: 981
Trainable params: 981
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [16]:
# Train the model
fit_model = nn.fit(X_train, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [17]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test, y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

58/58 - 0s - loss: 0.0833 - accuracy: 0.9675 - 161ms/epoch - 3ms/step
Loss: 0.08331405371427536, Accuracy: 0.9675148725509644
