# Air Quality

Codacademy Exercise: Deep Learning Classification

Predict air quality dependent on different element compounds found in the air.

In [None]:
import pandas as pd
from collections import Counter
from sklearn.preprocessing import LabelEncoder

import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense

In [None]:
# load the training and testing datasets into pandas
train_data = pd.read_csv('air_quality_train.csv')
test_data = pd.read_csv('air_quality_test.csv')

# print columns and their respective types
print(train_data.info())

# print the class distribution
print(Counter(train_data['Air_Quality']))

In [None]:
# extract the features from the training data
x_train = train_data[['PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2', 'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI']]

# extract the label column from the training data
y_train = train_data['Air_Quality']

# extract the features from the test data
x_test = test_data[['PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2', 'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI']]

# extract the label column from the test data
y_test = test_data['Air_Quality']

In [None]:
# encode the labels into integers
le = LabelEncoder()

# convert the integer encoded labels into binary integers
y_train = le.fit_transform(y_train.astype(str))
y_test = le.transform(y_test.astype(str))

# convert the integer encoded labels into binary vectors
y_train = tensorflow.keras.utils.to_categorical(y_train, dtype = 'int64')
y_test = tensorflow.keras.utils.to_categorical(y_test, dtype = 'int64')

In [None]:
# design the model
model = Sequential()
# add the input layer
model.add(InputLayer(input_shape=(x_train.shape[1],)))
# add a hidden layer
model.add(Dense(10, activation='relu'))
# add an output layer
model.add(Dense(6, activation='softmax'))

# compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# train the model
model.fit(x_train, y_train, epochs = 30, batch_size = 16, verbose = 1)

In [None]:
import numpy as np
from sklearn.metrics import classification_report

# get additional statistics
y_estimate = model.predict(x_test)
y_estimate = np.argmax(y_estimate, axis = 1)
y_true = np.argmax(y_test, axis = 1)
print(classification_report(y_true, y_estimate))