In [2]:
from tensorflow.keras.layers import *
import tensorflow as tf
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data


# COMPLETE THE CODE IN THE FOLLOWING FUNCTION.
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    # YOUR CODE HERE
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(n_past + n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(n_past + n_future))
    ds = ds.shuffle(1000)
    ds = ds.map(lambda window: (window[:-n_future], window[-n_future:, :1]))
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [4]:
data = pd.read_excel('./dataset/paddy/all-sensors.xlsx', sheet_name='Sheet5')

In [5]:
data.head()

Unnamed: 0,datetime,Temperature,Humidity,Illumination,kat_temperature,kat_humidity,kat_illumination,Kipas,Humidifier,LED
0,03/07/2023 0:00:00,2800,7500,7661,Sedang,Sedang,Sangat Rendah,OFF,OFF,ON
1,03/07/2023 0:01:00,2800,7500,7533,Sedang,Sedang,Sangat Rendah,OFF,OFF,ON
2,03/07/2023 0:02:00,2800,7500,7449,Sedang,Sedang,Sangat Rendah,OFF,OFF,ON
3,03/07/2023 0:03:00,2800,7500,7449,Sedang,Sedang,Sangat Rendah,OFF,OFF,ON
4,03/07/2023 0:04:00,2800,7500,7449,Sedang,Sedang,Sangat Rendah,OFF,OFF,ON


In [6]:
data.columns

Index(['datetime', 'Temperature', 'Humidity', 'Illumination',
       'kat_temperature', 'kat_humidity', 'kat_illumination', 'Kipas',
       'Humidifier', 'LED'],
      dtype='object')

In [7]:
data = data.drop('ID', axis=1)
data.head()

KeyError: "['ID'] not found in axis"

In [9]:
print(list(set(data.kat_temperature)))
print(list(set(data.kat_humidity)))
print(list(set(data.kat_illumination)))

['Tinggi', 'Sedang']
['Tinggi', 'Sedang', 'Rendah']
['Sangat Rendah', 'Tinggi', 'Sangat Tinggi', 'Rendah', 'Normal']


In [10]:
def on_off_map(x):
    if x == 'ON':
        return 1
    return 0

data['Kipas'] = data['Kipas'].apply(on_off_map)
data['Humidifier'] = data['Humidifier'].apply(on_off_map)
data['LED'] = data['LED'].apply(on_off_map)

In [11]:
def translate(x):
    if x.lower() == 'tinggi':
        return 'high'
    elif x.lower() == 'rendah':
        return 'low'
    elif x.lower() in ['sedang', 'normal']:
        return 'medium'
    elif x.lower() == 'Sangat Rendah':
        return 'very low'
    return 'very high'

def remove_comma(x: str):
    return x.replace(',', '.')

In [12]:
data['kat_temperature'] = data['kat_temperature'].apply(translate)
data['kat_humidity'] = data['kat_humidity'].apply(translate)
data['kat_illumination'] = data['kat_illumination'].apply(translate)

In [13]:
data['Temperature'] = pd.to_numeric(data['Temperature'].apply(remove_comma), errors='coerce')
data['Illumination'] = pd.to_numeric(data['Illumination'].apply(remove_comma), errors='coerce')
data['Humidity'] = pd.to_numeric(data['Humidity'].apply(remove_comma), errors='coerce')

In [14]:
data.head()

Unnamed: 0,datetime,Temperature,Humidity,Illumination,kat_temperature,kat_humidity,kat_illumination,Kipas,Humidifier,LED
0,03/07/2023 0:00:00,28.0,75.0,76.61,medium,medium,very high,0,0,1
1,03/07/2023 0:01:00,28.0,75.0,75.33,medium,medium,very high,0,0,1
2,03/07/2023 0:02:00,28.0,75.0,74.49,medium,medium,very high,0,0,1
3,03/07/2023 0:03:00,28.0,75.0,74.49,medium,medium,very high,0,0,1
4,03/07/2023 0:04:00,28.0,75.0,74.49,medium,medium,very high,0,0,1


In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11507 entries, 0 to 11506
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   datetime          11507 non-null  object 
 1   Temperature       11507 non-null  float64
 2   Humidity          11507 non-null  float64
 3   Illumination      11063 non-null  float64
 4   kat_temperature   11507 non-null  object 
 5   kat_humidity      11507 non-null  object 
 6   kat_illumination  11507 non-null  object 
 7   Kipas             11507 non-null  int64  
 8   Humidifier        11507 non-null  int64  
 9   LED               11507 non-null  int64  
dtypes: float64(3), int64(3), object(4)
memory usage: 899.1+ KB


In [16]:
dummy = list(set(data.kat_temperature)) + list(set(data.kat_humidity)) + list(set(data.kat_illumination))
dummy = list(set(dummy))
dummy

['low', 'medium', 'very high', 'high']

In [17]:
le = LabelEncoder()
le.fit(dummy)

In [18]:
data['kat_temperature'] = le.transform(data['kat_temperature'])
data['kat_humidity'] = le.transform(data['kat_humidity'])
data['kat_illumination'] = le.transform(data['kat_illumination'])

In [19]:
data.head()

Unnamed: 0,datetime,Temperature,Humidity,Illumination,kat_temperature,kat_humidity,kat_illumination,Kipas,Humidifier,LED
0,03/07/2023 0:00:00,28.0,75.0,76.61,2,2,3,0,0,1
1,03/07/2023 0:01:00,28.0,75.0,75.33,2,2,3,0,0,1
2,03/07/2023 0:02:00,28.0,75.0,74.49,2,2,3,0,0,1
3,03/07/2023 0:03:00,28.0,75.0,74.49,2,2,3,0,0,1
4,03/07/2023 0:04:00,28.0,75.0,74.49,2,2,3,0,0,1


In [20]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11507 entries, 0 to 11506
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   datetime          11507 non-null  object 
 1   Temperature       11507 non-null  float64
 2   Humidity          11507 non-null  float64
 3   Illumination      11063 non-null  float64
 4   kat_temperature   11507 non-null  int32  
 5   kat_humidity      11507 non-null  int32  
 6   kat_illumination  11507 non-null  int32  
 7   Kipas             11507 non-null  int64  
 8   Humidifier        11507 non-null  int64  
 9   LED               11507 non-null  int64  
dtypes: float64(3), int32(3), int64(3), object(1)
memory usage: 764.3+ KB


In [21]:
data.to_excel('./dataset/paddy/clean-sensors.xlsx')