In [31]:
# Libraries
import pandas as pd
import plotly.express as px
from datetime import datetime
from darts import TimeSeries
from darts.models import ExponentialSmoothing

In [57]:
df=pd.read_csv('sensor_data.csv')
df.head()

Unnamed: 0,time,SensorA,SensorB,SensorC
0,00:00,1.41625,4.21993,3.139646
1,00:01,3.534439,,3.064088
2,00:02,5.659733,,2.925565
3,00:03,5.640167,3.07365,5.38121
4,00:04,4.454474,1.554044,1.055965


We see this start at midnight and ends 24h later.

In [58]:
df.tail()

Unnamed: 0,time,SensorA,SensorB,SensorC
1435,23:55,8.417096,4.686364,9.014305
1436,23:56,12.413407,5.709658,7.005713
1437,23:57,11.400624,7.063344,7.522029
1438,23:58,9.707463,7.903916,
1439,23:59,11.220124,8.270642,6.609032


In [59]:
#Overview
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     1440 non-null   object 
 1   SensorA  1303 non-null   float64
 2   SensorB  1205 non-null   float64
 3   SensorC  1392 non-null   float64
dtypes: float64(3), object(1)
memory usage: 45.1+ KB


We can see some missing values in each sensor columns.
We also have a time column.
We can interpret this dataset in different ways. We do not have so much information so we can either say all the sensors are on the same spot and we can do the average of the 3. Or we can treat them individually assuming they are placed on different spots.

From the graph we can see the diffrence in measurements for the same time. So We hope those sensors are not based on the same spot...

So We will treat only the sensorA.

In [60]:
df.describe()

Unnamed: 0,SensorA,SensorB,SensorC
count,1303.0,1205.0,1392.0
mean,9.793641,3.215945,8.013604
std,2.038163,2.843696,2.138642
min,1.41625,-2.74,1.055965
25%,8.466171,1.114624,6.524029
50%,9.809673,3.210922,8.170571
75%,11.238712,5.464741,9.618772
max,18.107463,9.564,14.307993


To deal with missing values, replacing by the median or mean doest not seem to be a good option. moving average or mean between all the sensors for the same time...maybe..


In [110]:
sensorA= df.iloc[:,0:2]
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     1440 non-null   object 
 1   SensorA  1303 non-null   float64
dtypes: float64(1), object(1)
memory usage: 22.6+ KB


In [111]:
sensorA['time']=pd.to_datetime(sensorA['time'])
sensorA.info()

  sensorA['time']=pd.to_datetime(sensorA['time'])


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   time     1440 non-null   datetime64[ns]
 1   SensorA  1303 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 22.6 KB


In [112]:
sensorA

Unnamed: 0,time,SensorA
0,2024-05-17 00:00:00,1.416250
1,2024-05-17 00:01:00,3.534439
2,2024-05-17 00:02:00,5.659733
3,2024-05-17 00:03:00,5.640167
4,2024-05-17 00:04:00,4.454474
...,...,...
1435,2024-05-17 23:55:00,8.417096
1436,2024-05-17 23:56:00,12.413407
1437,2024-05-17 23:57:00,11.400624
1438,2024-05-17 23:58:00,9.707463


# Datetime 

In [113]:
sensorA
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1440 entries, 0 to 1439
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   time     1440 non-null   datetime64[ns]
 1   SensorA  1303 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 22.6 KB


In [119]:
sensorA=sensorA.set_index('time')
sensorA

Unnamed: 0_level_0,SensorA
time,Unnamed: 1_level_1
2024-05-17 00:00:00,1.416250
2024-05-17 00:01:00,3.534439
2024-05-17 00:02:00,5.659733
2024-05-17 00:03:00,5.640167
2024-05-17 00:04:00,4.454474
...,...
2024-05-17 23:55:00,8.417096
2024-05-17 23:56:00,12.413407
2024-05-17 23:57:00,11.400624
2024-05-17 23:58:00,9.707463


In [121]:
sensorA

Unnamed: 0_level_0,SensorA
time,Unnamed: 1_level_1
2024-05-17 00:00:00,1.416250
2024-05-17 00:01:00,3.534439
2024-05-17 00:02:00,5.659733
2024-05-17 00:03:00,5.640167
2024-05-17 00:04:00,4.454474
...,...
2024-05-17 23:55:00,8.417096
2024-05-17 23:56:00,12.413407
2024-05-17 23:57:00,11.400624
2024-05-17 23:58:00,9.707463


# Missing Values
Base dataset has 1304/1440 non missing values.
Doing the moving average is increasing this number.
So we will first, delete those lines and then will perform a moving average to smooth this.


In [130]:
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1440 entries, 2024-05-17 00:00:00 to 2024-05-17 23:59:00
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   SensorA  1303 non-null   float64
dtypes: float64(1)
memory usage: 22.5 KB


In [133]:
sensorA=sensorA.dropna(subset=['SensorA'])
sensorA.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1303 entries, 2024-05-17 00:00:00 to 2024-05-17 23:59:00
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   SensorA  1303 non-null   float64
dtypes: float64(1)
memory usage: 20.4 KB


In [134]:
sensorA_nonan= sensorA.rolling(2,center=True).mean()



Unnamed: 0_level_0,SensorA
time,Unnamed: 1_level_1
2024-05-17 00:00:00,
2024-05-17 00:01:00,2.475345
2024-05-17 00:02:00,4.597086
2024-05-17 00:03:00,5.649950
2024-05-17 00:04:00,5.047320
...,...
2024-05-17 23:55:00,9.831608
2024-05-17 23:56:00,10.415252
2024-05-17 23:57:00,11.907016
2024-05-17 23:58:00,10.554043


In [136]:
sensor=sensorA_nonan.dropna()
sensor.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1302 entries, 2024-05-17 00:01:00 to 2024-05-17 23:59:00
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   SensorA  1302 non-null   float64
dtypes: float64(1)
memory usage: 20.3 KB


# Plots

In [137]:
sensor

Unnamed: 0_level_0,SensorA
time,Unnamed: 1_level_1
2024-05-17 00:01:00,2.475345
2024-05-17 00:02:00,4.597086
2024-05-17 00:03:00,5.649950
2024-05-17 00:04:00,5.047320
2024-05-17 00:05:00,5.167151
...,...
2024-05-17 23:55:00,9.831608
2024-05-17 23:56:00,10.415252
2024-05-17 23:57:00,11.907016
2024-05-17 23:58:00,10.554043


# Darts


In [139]:
sensor=sensor.reset_index()
sensor

Unnamed: 0,time,SensorA
0,2024-05-17 00:01:00,2.475345
1,2024-05-17 00:02:00,4.597086
2,2024-05-17 00:03:00,5.649950
3,2024-05-17 00:04:00,5.047320
4,2024-05-17 00:05:00,5.167151
...,...,...
1297,2024-05-17 23:55:00,9.831608
1298,2024-05-17 23:56:00,10.415252
1299,2024-05-17 23:57:00,11.907016
1300,2024-05-17 23:58:00,10.554043


In [141]:
series=TimeSeries.from_dataframe(sensor,time_col='time',fill_missing_dates=True)

ValueError: Could not find a unique inferred frequency (not constant). Observed frequencies: {'min', '2min'}. If any of those is the actual frequency, try passing it with `fill_missing_dates=True` and `freq=your_frequency`. For more information about frequency aliases, read https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases


ValueError: Could not find a unique inferred frequency (not constant). Observed frequencies: {'min', '2min'}. If any of those is the actual frequency, try passing it with `fill_missing_dates=True` and `freq=your_frequency`. For more information about frequency aliases, read https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases

In [8]:
training, validation= series.split_after(0.85)

In [9]:
model= ExponentialSmoothing()
model.fit(training)
pred=model.predict(len(validation))


ValueError: seasonal_periods has not been provided and index does not have a known freq. You must provide seasonal_periods