<a href="https://colab.research.google.com/github/irenfonseca/ventilation-kpi/blob/main/kpi_ventilation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#KPI ventilation metrics

In [None]:
# Data import 
!pip install seaborn
import pathlib

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

from tensorflow.keras.callbacks import TensorBoard

print(tf.__version__)

## Person number based on wifi conexion (#Personwifi)

### Data preparation

#### Getting data from google drive
The data are preprocessed for nominal values with one-hot

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)
worksheet = gc.open('wifi').sheet1

# get_all_values gives a list of rows.
rows = worksheet.get_all_values()

import pandas as pd
datasetwifi = pd.DataFrame.from_records(rows)
datasetwifi = datasetwifi.astype(float)

print(datasetwifi.head())
print(datasetwifi.shape)

Checking all data are correct



In [None]:
datasetwifi.isna().sum()

#### Separate data into training and testing


In [None]:
train_dataset_wifi = datasetwifi.sample(frac=0.8,random_state=0)
train_dataset_wifi = train_dataset_wifi.astype(float)

test_dataset_wifi = datasetwifi.drop(train_dataset_wifi.index)
test_dataset_wifi = test_dataset_wifi.astype(float)

train_dataset_wifi.head()


#### Statistics

In [None]:
train_stats = train_dataset_wifi.describe()
train_stats.pop(15)
train_stats = train_stats.transpose()
train_stats


#### Separate features from tags.



In [None]:
train_labels_wifi = train_dataset_wifi.pop(15)
test_labels_wifi = test_dataset_wifi.pop(15)

#### Data normalization

In [None]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data_wifi = norm(train_dataset_wifi)
normed_test_data_wifi = norm(test_dataset_wifi)
normed_train_data_wifi = normed_train_data_wifi.astype(float)
normed_test_data_wifi = normed_test_data_wifi.astype(float)



### The model

In [None]:
def create_model():
  model = keras.Sequential([
    layers.Dense(20, activation='relu', input_shape=[len(train_dataset_wifi.keys())]),
    layers.Dense(12, activation='relu'),

    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse' ])
  return model

In [None]:
model = create_model()

In [None]:
model.summary()

#### Training 

Using  early stop to avoid overfit

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 300
normed_train_data_wifi = normed_train_data_wifi.astype(float)
train_labels_wifi = train_labels_wifi.astype(float)

tensorboardModelWifi = TensorBoard(log_dir='logs/modelwifi')

def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error')
  plt.plot(hist['epoch'], hist['mae'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error ')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend()
  plt.show()


# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(normed_train_data_wifi, train_labels_wifi, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot(), tensorboardModelWifi])

plot_history(history)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

##### Optional. Tensorboard for visualization

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/

#### Validation
Evaluation:

In [None]:
loss, mae, mse = model.evaluate(normed_test_data_wifi, test_labels_wifi, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} #Personwifi".format(mae))

Predictions:

In [None]:
test_predictions = model.predict(normed_test_data_wifi).flatten()

plt.scatter(test_labels_wifi, test_predictions)
plt.xlabel('True Values [#Personwifi]')
plt.ylabel('Predictions [#Personwifi]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])


Error distribution:

In [None]:
error = test_predictions - test_labels_wifi
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [#Personwifi]")
_ = plt.ylabel("Count")

## Person number based on CO2 (#PersonCO2)

### Data preparation

#### Getting data from google drive
The data are preprocessed for nominal values with one-hot

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)
worksheet = gc.open('co2').sheet1

# get_all_values gives a list of rows.
rows = worksheet.get_all_values()

import pandas as pd
datasetco2 = pd.DataFrame.from_records(rows)
datasetco2 = datasetco2.astype(float)

print(datasetco2.head())
print(datasetco2.shape)

Checking all data are correct



In [None]:
datasetco2.isna().sum()

#### Separate data into training and testing


In [None]:
train_dataset_co2 = datasetco2.sample(frac=0.8,random_state=0)
train_dataset_co2 = train_dataset_co2.astype(float)

test_dataset_co2 = datasetco2.drop(train_dataset_co2.index)
test_dataset_co2 = test_dataset_co2.astype(float)

train_dataset_co2.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
1649,950.0,175.89,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0
3122,515.0,153.21,3.1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1127,850.0,175.89,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0
599,450.0,175.89,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
2955,615.0,153.21,3.1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0


#### Statistics

In [None]:
train_stats = train_dataset_co2.describe()
train_stats.pop(23)
train_stats = train_stats.transpose()
train_stats


#### Separate features from tags.



In [None]:
train_labels_co2 = train_dataset_co2.pop(23)
test_labels_co2 = test_dataset_co2.pop(23)

#### Data normalization

In [None]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data_co2 = norm(train_dataset_co2)
normed_test_data_co2 = norm(test_dataset_co2)
normed_train_data_co2 = normed_train_data_co2.astype(float)
normed_test_data_co2 = normed_test_data_co2.astype(float)

### The model

In [None]:
def create_model():
  model = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=[len(train_dataset_co2.keys())]),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse' ])
  return model

In [None]:
model = create_model()

In [None]:
model.summary()

#### Training 

Using  early stop to avoid overfit

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 300
normed_train_data_co2 = normed_train_data_co2.astype(float)
train_labels_co2 = train_labels_co2.astype(float)

tensorboardModelCO2 = TensorBoard(log_dir='logs/modelco2')

def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error')
  plt.plot(hist['epoch'], hist['mae'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error ')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend()
  plt.show()


# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(normed_train_data_co2, train_labels_co2, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot(), tensorboardModelCO2])

plot_history(history)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

##### Optional. Tensorboard for visualization

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/

#### Validation
Evaluation:

In [None]:
loss, mae, mse = model.evaluate(normed_test_data_co2, test_labels_co2, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} #PersonCO2".format(mae))

Predictions:

In [None]:
test_predictions = model.predict(normed_test_data_co2).flatten()

plt.scatter(test_labels_co2, test_predictions)
plt.xlabel('True Values [#PersonCO2]')
plt.ylabel('Predictions [#PersonCO2]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])


Error distribution:

In [None]:
error = test_predictions - test_labels_co2
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [#PersonCO2]")
_ = plt.ylabel("Count")