<a href="https://colab.research.google.com/github/aid4mh/QPrism/blob/main/tests/Sensor/Non_numerical_record.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Install the dependencies and QPrism

In [None]:
! python3 -m pip install --upgrade pip
! pip install -r https://raw.githubusercontent.com/aid4mh/QPrism/main/requirements.txt

In [None]:
! pip install --no-deps QPrism

# Import QPrism

In [3]:
from QPrism.Sensor.DQM import DQM_single_record

# Now we create a dataframe with pandas datetime as the timestamp.

In [5]:
import pandas as pd

In [11]:
columns = ['timestamp', 'x', 'y']
data = [['2022-09-01', 1, 2],
        ['2022-09-02', 1, 2],
        ['2022-09-03', 1, 2],
        ['2022-09-04', 1, 2],]
df = pd.DataFrame(data = data, columns = columns)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.dtypes

timestamp    datetime64[ns]
x                     int64
y                     int64
dtype: object

# This dataframe will cause an error when computing the DQM as the timestamp is non-numerical.

In [12]:
error_example_DQM = DQM_single_record()
error_example_DQM.set_input_data(df)
error_example_DQM.compute_DQM()

TypeError: ignored

# To resolve this, we need to first convert the timestamp to the unix format. This can be done by simply calling the data processing function `timestamp_to_unix` provided by the package.

In [13]:
from QPrism.Sensor.data_processing import timestamp_to_unix
timestamp_to_unix(df)
df

Unnamed: 0,timestamp,x,y
0,1661990400000000000,1,2
1,1662076800000000000,1,2
2,1662163200000000000,1,2
3,1662249600000000000,1,2


# This processed dataframe will be error-free in DQM computing.

In [16]:
example_DQM = DQM_single_record()
example_DQM.set_input_data(df)
example_DQM.compute_DQM()

# Now we consider a dataframe with non-numerical data in it that cannot be converted.

In [34]:
columns = ['timestamp', 'x', 'y']
data = [[0.01, 1, 'some random text string1'],
        [0.02, 1, 'some random text string2'],
        [0.03, 1, 'some random text string3'],
        [0.04, 1, 'some random text string4'],]
df = pd.DataFrame(data = data, columns = columns)
df.dtypes

timestamp    float64
x              int64
y             object
dtype: object

# This record will also be causing an error, since we cannot compute our DQM for text entries, it makes no sense.

In [18]:
error_example_DQM = DQM_single_record()
error_example_DQM.set_input_data(df)
error_example_DQM.compute_DQM()

KeyError: ignored

# In this case, we can drop the text column, compute the DQM using the remaining columns.

In [35]:
df = df.drop(columns = ['y'])
df

Unnamed: 0,timestamp,x
0,0.01,1
1,0.02,1
2,0.03,1
3,0.04,1


In [36]:
example_DQM = DQM_single_record()
example_DQM.set_input_data(df)
example_DQM.compute_DQM()