In [1]:
# Libraries
import pandas as pd
import plotly.express as px
from datetime import datetime
from darts import TimeSeries
from darts.models import ExponentialSmoothing

  from tqdm.autonotebook import tqdm


In [2]:
df=pd.read_csv('sensor_data.csv')
df.head()

Unnamed: 0,time,SensorA,SensorB,SensorC
0,00:00,1.41625,4.21993,3.139646
1,00:01,3.534439,,3.064088
2,00:02,5.659733,,2.925565
3,00:03,5.640167,3.07365,5.38121
4,00:04,4.454474,1.554044,1.055965


We see this start at midnight and ends 24h later.

In [3]:
df.tail()

Unnamed: 0,time,SensorA,SensorB,SensorC
1435,23:55,8.417096,4.686364,9.014305
1436,23:56,12.413407,5.709658,7.005713
1437,23:57,11.400624,7.063344,7.522029
1438,23:58,9.707463,7.903916,
1439,23:59,11.220124,8.270642,6.609032


In [4]:
#Overview
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     1440 non-null   object 
 1   SensorA  1303 non-null   float64
 2   SensorB  1205 non-null   float64
 3   SensorC  1392 non-null   float64
dtypes: float64(3), object(1)
memory usage: 45.1+ KB


We can see some missing values in each sensor columns.
We also have a time column.
We can interpret this dataset in different ways. We do not have so much information so we can either say all the sensors are on the same spot and we can do the average of the 3. Or we can treat them individually assuming they are placed on different spots.

From the graph we can see the diffrence in measurements for the same time. So We hope those sensors are not based on the same spot...

So We will treat only the sensorA.

In [5]:
df.describe()

Unnamed: 0,SensorA,SensorB,SensorC
count,1303.0,1205.0,1392.0
mean,9.793641,3.215945,8.013604
std,2.038163,2.843696,2.138642
min,1.41625,-2.74,1.055965
25%,8.466171,1.114624,6.524029
50%,9.809673,3.210922,8.170571
75%,11.238712,5.464741,9.618772
max,18.107463,9.564,14.307993


To deal with missing values, replacing by the median or mean doest not seem to be a good option. moving average or mean between all the sensors for the same time...maybe..


In [44]:
sensorA= df.iloc[:,0:2]
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     1440 non-null   object 
 1   SensorA  1303 non-null   float64
dtypes: float64(1), object(1)
memory usage: 22.6+ KB


In [45]:
sensorA['time']=pd.to_datetime(sensorA['time'])
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   time     1440 non-null   datetime64[ns]
 1   SensorA  1303 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 22.6 KB


  sensorA['time']=pd.to_datetime(sensorA['time'])


In [46]:
sensorA

Unnamed: 0,time,SensorA
0,2024-05-20 00:00:00,1.416250
1,2024-05-20 00:01:00,3.534439
2,2024-05-20 00:02:00,5.659733
3,2024-05-20 00:03:00,5.640167
4,2024-05-20 00:04:00,4.454474
...,...,...
1435,2024-05-20 23:55:00,8.417096
1436,2024-05-20 23:56:00,12.413407
1437,2024-05-20 23:57:00,11.400624
1438,2024-05-20 23:58:00,9.707463


In [47]:
sensorA['time']=sensorA['time'].dt.strftime('%H:%M')
sensorA

Unnamed: 0,time,SensorA
0,00:00,1.416250
1,00:01,3.534439
2,00:02,5.659733
3,00:03,5.640167
4,00:04,4.454474
...,...,...
1435,23:55,8.417096
1436,23:56,12.413407
1437,23:57,11.400624
1438,23:58,9.707463


In [48]:
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     1440 non-null   object 
 1   SensorA  1303 non-null   float64
dtypes: float64(1), object(1)
memory usage: 22.6+ KB


In [None]:
'Until here...'

# Datetime 

# Missing Values
Base dataset has 1304/1440 non missing values.
Doing the moving average is increasing this number.
So we will first, delete those lines and then will perform a moving average to smooth this.


In [49]:
sensorA=sensorA.dropna(subset=['SensorA'])
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1303 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     1303 non-null   object 
 1   SensorA  1303 non-null   float64
dtypes: float64(1), object(1)
memory usage: 30.5+ KB


In [52]:
sensorA_nonan= sensorA.rolling(2,center=True).mean()



DataError: Cannot aggregate non-numeric type: datetime64[ns]

In [32]:
sensor=sensorA_nonan.dropna()
sensor.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1302 entries, 00:01:00 to 23:59:00
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   SensorA  1302 non-null   float64
dtypes: float64(1)
memory usage: 20.3+ KB


# Plots

In [33]:
sensor

Unnamed: 0_level_0,SensorA
time,Unnamed: 1_level_1
00:01:00,2.475345
00:02:00,4.597086
00:03:00,5.649950
00:04:00,5.047320
00:05:00,5.167151
...,...
23:55:00,9.831608
23:56:00,10.415252
23:57:00,11.907016
23:58:00,10.554043


# Darts


In [41]:
sensor=sensor.reset_index()
sensor

Unnamed: 0,index,time,SensorA
0,0,00:01:00,2.475345
1,1,00:02:00,4.597086
2,2,00:03:00,5.649950
3,3,00:04:00,5.047320
4,4,00:05:00,5.167151
...,...,...,...
1297,1297,23:55:00,9.831608
1298,1298,23:56:00,10.415252
1299,1299,23:57:00,11.907016
1300,1300,23:58:00,10.554043


In [42]:
sensor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1302 entries, 0 to 1301
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   index    1302 non-null   int64  
 1   time     1302 non-null   object 
 2   SensorA  1302 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 30.6+ KB


In [43]:
sensor['time']=pd.to_datetime(sensor['time'])

TypeError: <class 'datetime.time'> is not convertible to datetime, at position 0

In [38]:
sensor['time']=sensor['time'].dt.strftime('%H:%M')
sensor

AttributeError: Can only use .dt accessor with datetimelike values