In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema
from sklearn.preprocessing import StandardScaler


In [14]:
df = pd.read_csv('../../data/H161.csv', parse_dates = ["utc_time"])
df["utc_time"] = df["utc_time"].dt.tz_localize(None)

df.columns = ['time','phase', 'step', 'statement', 'heat_time', 'weight', 'temp', 'pressure']

  df = pd.read_csv('../../data/H161.csv', parse_dates = ["utc_time"])


In [15]:
df = df.drop(['statement','step'], axis=1)

In [16]:
df.sample(40)

Unnamed: 0,time,phase,heat_time,weight,temp,pressure
563306,2021-04-18 09:34:27,FEEDING,0.0,3316.7,73.5,554.3
195948,2020-08-06 06:56:27,DRYING,105.3,3535.2,58.5,245.1
75063,2020-05-14 08:11:27,DISCHRG,0.0,-13.4,30.4,759.9
254059,2020-09-15 15:27:27,DRYING,5.1,3636.8,34.4,762.5
329697,2020-11-07 04:05:27,DRYING,133.3,3702.0,46.2,247.1
155031,2020-07-08 20:59:27,FEEDING,0.0,3286.0,62.4,754.2
873595,2021-11-19 21:03:27,FEEDING,0.0,3274.2,73.6,487.3
453449,2021-02-01 02:37:27,DRYING,109.8,3676.5,64.7,400.1
194650,2020-08-05 09:18:27,DRYING,33.9,3871.0,54.5,255.5
820059,2021-10-13 16:47:27,DRYING,0.0,3430.8,76.6,663.0


In [17]:
df = df[df['phase'].str.contains ('DRYING|DISCHRG') == True]

In [18]:
df.replace({'????????': None}, inplace=True)
df.dropna(inplace=True)

In [19]:
df['temp'] = df.temp.astype(float)
df['weight'] = df.weight.astype(float)
df['pressure'] = df.pressure.astype(float)
df['heat_time'] = df.heat_time.astype(float)
df['phase'] = df.phase.astype('category')
# df.set_index('time', inplace=True)


In [20]:
n = 180  # number of points to be checked before and after

# Find local peaks
df['max_temp'] = 0
df['max_temp'] = df.iloc[argrelextrema(df.temp.values, np.greater_equal, order=n)]['temp']

In [21]:
df['ready'] = 0

In [22]:
for i in range (1, len(df['ready']-1)):
    if df.max_temp.values[i] > 0:
            while df['phase'].values[i] == 'DRYING':
                i += 1
                df.ready.values[i] = 1
df = df.drop(['max_temp'], axis=1)

In [23]:
df.head(300)

Unnamed: 0,time,phase,heat_time,weight,temp,pressure,ready
11,2020-03-23 05:19:27,DRYING,67.5,3799.5,60.3,270.9,0
12,2020-03-23 05:20:27,DRYING,68.5,3800.0,60.3,272.3,0
13,2020-03-23 05:21:27,DRYING,69.5,3799.8,60.1,290.7,0
14,2020-03-23 05:22:27,DRYING,70.5,3797.7,59.8,305.8,0
15,2020-03-23 05:23:27,DRYING,71.5,3795.8,60.0,280.7,0
...,...,...,...,...,...,...,...
330,2020-03-23 10:38:27,DRYING,40.4,3530.1,23.8,761.3,1
331,2020-03-23 10:39:27,DRYING,41.4,3527.0,23.8,761.4,1
332,2020-03-23 10:40:27,DRYING,42.4,3528.0,23.7,761.4,1
333,2020-03-23 10:41:27,DRYING,43.4,3527.1,23.7,761.4,1


In [24]:
# Labeel encoding
df['phase'] = df['phase'].cat.codes
df

Unnamed: 0,time,phase,heat_time,weight,temp,pressure,ready
11,2020-03-23 05:19:27,1,67.5,3799.5,60.3,270.9,0
12,2020-03-23 05:20:27,1,68.5,3800.0,60.3,272.3,0
13,2020-03-23 05:21:27,1,69.5,3799.8,60.1,290.7,0
14,2020-03-23 05:22:27,1,70.5,3797.7,59.8,305.8,0
15,2020-03-23 05:23:27,1,71.5,3795.8,60.0,280.7,0
...,...,...,...,...,...,...,...
1036772,2022-03-13 04:40:27,0,0.0,164.4,16.2,761.7,0
1036773,2022-03-13 04:41:27,0,0.0,164.6,15.7,761.7,0
1036774,2022-03-13 04:42:27,0,0.0,164.9,15.2,761.7,0
1036775,2022-03-13 04:43:27,0,0.0,164.8,14.7,761.7,0


In [25]:
df['p_lagged_30'] = df['pressure'].shift(-30)
df['t_lagged_30'] = df['temp'].shift(-30)
df['w_lagged_30'] = df['weight'].shift(-30)
df['ps_lagged_30'] = df['phase'].shift(-30)
df['ht_lagged_30'] = df['heat_time'].shift(-30)

df['p_lagged_25'] = df['pressure'].shift(-25)
df['t_lagged_25'] = df['temp'].shift(-25)
df['w_lagged_25'] = df['weight'].shift(-25)
df['ps_lagged_25'] = df['phase'].shift(-25)
df['ht_lagged_25'] = df['heat_time'].shift(-25)

In [26]:
X = df.values[:,:-1]
y = df.iloc[:, -1].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
print(X_train)
print(y_train)
print(X_test)
print(y_test)


[[Timestamp('2021-03-16 12:41:27') 1 23.1 ... 66.3 3849.7 1.0]
 [Timestamp('2021-01-06 13:57:27') 1 77.9 ... 65.6 3581.2 1.0]
 [Timestamp('2021-01-25 18:11:27') 1 74.7 ... 67.0 3769.4 1.0]
 ...
 [Timestamp('2020-07-20 16:32:27') 1 133.3 ... 52.8 3699.8 1.0]
 [Timestamp('2021-07-09 10:54:27') 1 9.0 ... 72.9 3702.8 1.0]
 [Timestamp('2021-02-06 12:28:27') 1 53.7 ... 51.1 3731.3 1.0]]
[ 48.  103.1  99.6 ... 133.3   7.9  73.8]
[[Timestamp('2020-12-05 10:35:27') 1 15.3 ... 70.2 3737.5 1.0]
 [Timestamp('2020-08-19 17:07:27') 0 0.0 ... 29.7 -7.0 0.0]
 [Timestamp('2021-07-03 10:15:27') 0 0.0 ... 72.7 3776.4 1.0]
 ...
 [Timestamp('2020-10-09 20:28:27') 1 132.6 ... 55.1 3629.2 1.0]
 [Timestamp('2021-03-02 01:24:27') 1 92.0 ... 60.5 3656.2 1.0]
 [Timestamp('2022-02-17 10:21:27') 0 0.0 ... 46.9 2557.1 1.0]]
[  0.    0.   22.6 ... 133.3 117.   25.5]


In [27]:
df.tail(40)

Unnamed: 0,time,phase,heat_time,weight,temp,pressure,ready,p_lagged_30,t_lagged_30,w_lagged_30,ps_lagged_30,ht_lagged_30,p_lagged_25,t_lagged_25,w_lagged_25,ps_lagged_25,ht_lagged_25
1036737,2022-03-13 04:05:27,1,133.3,3405.5,42.9,535.4,1,761.1,19.5,193.2,0.0,0.0,756.5,24.4,1928.2,0.0,0.0
1036738,2022-03-13 04:06:27,1,133.3,3399.6,42.8,480.8,1,761.1,18.9,164.0,0.0,0.0,757.1,23.3,788.3,0.0,0.0
1036739,2022-03-13 04:07:27,1,133.3,3405.5,42.7,464.1,1,761.7,18.0,164.0,0.0,0.0,760.3,22.4,496.7,0.0,0.0
1036740,2022-03-13 04:08:27,1,133.3,3405.1,42.6,479.7,1,761.7,17.5,164.0,0.0,0.0,760.7,21.3,356.7,0.0,0.0
1036741,2022-03-13 04:09:27,1,133.3,3400.1,42.5,487.9,1,761.7,16.8,164.1,0.0,0.0,761.1,20.3,260.7,0.0,0.0
1036742,2022-03-13 04:10:27,1,119.7,3396.0,42.4,467.2,1,761.7,16.2,164.4,0.0,0.0,761.1,19.5,193.2,0.0,0.0
1036743,2022-03-13 04:11:27,1,103.8,3402.0,42.3,491.7,1,761.7,15.7,164.6,0.0,0.0,761.1,18.9,164.0,0.0,0.0
1036744,2022-03-13 04:12:27,1,87.8,3402.3,42.3,502.8,1,761.7,15.2,164.9,0.0,0.0,761.7,18.0,164.0,0.0,0.0
1036745,2022-03-13 04:13:27,1,71.9,3398.9,42.3,471.3,1,761.7,14.7,164.8,0.0,0.0,761.7,17.5,164.0,0.0,0.0
1036746,2022-03-13 04:14:27,1,55.9,3402.9,42.4,507.6,1,761.7,14.3,165.0,0.0,0.0,761.7,16.8,164.1,0.0,0.0
