In [11]:
import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf

from datetime import timedelta
from time import time

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, BatchNormalization
from tensorflow.keras.layers import LSTM
from tensorflow.keras.datasets import imdb

In [28]:
# Data Parameters
data_dir = "./data/"
seed = int(time())
random.seed(seed)

num_fan_level = 4
raw = [None] * num_fan_level

train_test = 0.7

# LSTM Parameters
maxlen = 512
batch_size = 8

In [3]:
# Utility
def split_continuous(data, tolerance=0):
    data["time"] = pd.to_datetime(data["time"])
    res = []
    tmp = []
    prev = data.iloc[0][0]
    cnt = 0
    for i in range(1,data.shape[0]):
        if (prev+timedelta(seconds=1)) == data.iloc[i][0]:
            tmp.append(np.array(data.iloc[i][1:3])) # PM2.5, PM10, FAN
        elif cnt < tolerance:
            tmp.append(np.array(data.iloc[i][1:3])) # PM2.5, PM10, FAN
            cnt += 1
        else:
            if tmp != []:
                res.append(tmp)
            tmp = []
            cnt = 0
        prev = data.iloc[i][0]
    return res

In [4]:
# Load Data
raw = [None] * num_fan_level
for file in os.listdir(data_dir):
    file = os.path.join(data_dir,file)
    data = pd.read_csv(file)
    print("Loading {}...  ".format(file),end='')
    for index in range(0,4):
        df = data[data["fan"]==index]
        if df.shape[0] > 0:
            res = split_continuous(df)
            if raw[index] == None:
                raw[index] = res
            else:
                raw[index].extend(res)
        index += 1
    print("Done.")

del df
del file
del data
del index

Loading ./data/2021-01-19oldFilter.csv...  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Done.
Loading ./data/2021-01-20-newFilter.csv...  Done.
Loading ./data/2021-01-21noFan.csv...  Done.
Loading ./data/2021-01-22noFan.csv...  Done.
Loading ./data/2021-01-23Fan3-night.csv...  Done.
Loading ./data/2021-01-23Fan3.csv...  Done.
Loading ./data/2021-01-24_18.csv...  Done.


In [5]:
print(len(raw[0]))
print(len(raw[1]))
print(len(raw[2]))
print(len(raw[3]))

45
18
7
39


In [6]:
# Train Test Split
def train_test_split(raw, train_test=0.7):
    trainX = []
    trainY = []
    testX = []
    testY = []
    
    random.shuffle(raw[0])
    trainX = raw[0][0:int(len(raw[0])*train_test)]
    trainY = [0]*len(trainX)
    testX = raw[0][int(len(raw[0])*train_test):]
    testY = [0]*len(testX)
    
    for i in range(1,len(raw)):
        random.shuffle(raw[i])
        trainX.extend(raw[i][0:int(len(raw[i])*train_test)])
        trainY.extend([i]*int(len(raw[i])*train_test))
        testX.extend(raw[i][int(len(raw[i])*train_test):])
        testY.extend([i]*(len(raw[i])-(int(len(raw[i])*train_test))))

    return trainX, np.array(trainY).astype("float32"), testX, np.array(testY).astype("float32")

trainX, trainY, testX, testY = train_test_split(raw, train_test=0.7)

print(len(trainX))
print(trainY)
print(len(testX))
print(testY)

74
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3.]
35
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 2. 2. 2. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]


In [14]:
# Data Pre-processing
trainX = sequence.pad_sequences(trainX, maxlen=maxlen, dtype='float32')
testX = sequence.pad_sequences(testX, maxlen=maxlen, dtype='float32')

print('Train X shape:', trainX.shape)
print('Test X shape:', testX.shape)

Train X shape: (74, 512, 2)
Test X shape: (35, 512, 2)


In [15]:
print(testX.shape, testY.shape)

(35, 512, 2) (35,)


In [31]:
# LSTM Model
model = Sequential()
model.add(BatchNormalization(axis=1))
model.add(LSTM(16))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

print('Train...')
model.fit(trainX, trainY,
          batch_size=batch_size,
          epochs=128,
          validation_data=(testX, testY))

model.summary()

Train...
Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch 74/128
Epoch 75/128
Epoch 76/128
Epoch 77/128

In [32]:
score, acc = model.evaluate(testX, testY,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: -6.419435977935791
Test accuracy: 0.20000000298023224
