# Notebook to Dictionary

In [1]:
import pandas as pd

In [20]:
file_path = r"C:\Users\Toshiba\Documents\PD2_john\databases\data_1687589315209.txt"

result = {}

with open(file_path, "r") as file:
    lines = file.readlines()

header = lines[0].strip().split("\t")  # Get the column names from the first line

for i in range(len(header)):
    column_values = [
        float(row.strip().split("\t")[i]) for row in lines[1:]
    ]  # Convert values to floats
    result[header[i]] = column_values

In [25]:
len(result["AirFlow"])

1810

### Create the database

In [None]:
import sqlite3

conn = sqlite3.connect(
    r"C:\Users\Toshiba\Documents\PD2_john\databases\SensorReadings.db"
)
cursor = conn.cursor()

# Create the SensorReadings table
cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS SensorReadings (
        ID INTEGER PRIMARY KEY,
        UserID INTEGER NOT NULL,
        Therm TEXT,
        ECG TEXT,
        Airflow TEXT,
        Snore TEXT,
        SpO2 TEXT,
        HR TEXT,
        TimeIn DATETIME,
        TimeOut DATETIME
    )
"""
)

# Commit the changes and close the connection
conn.commit()
conn.close()

### Create artificial data

In [1]:
import random
import string
from datetime import datetime, timedelta

start_time = datetime(2023, 6, 12, 8, 0, 0)

In [16]:
import random
import string
from datetime import datetime, timedelta

# Generate a random 6-digit userID using letters and digits
userID = "".join(random.choices(string.ascii_letters + string.digits, k=6))

# Define the start time and end time for TimeIn and TimeOut
start_time = datetime(2023, 6, 12, 8, 0, 0)
end_time = start_time + timedelta(hours=8)

# Define the number of data points
num_data_points = 691200

# Convert lists to strings
therm_data = str([random.randint(0, 500) for _ in range(num_data_points)])
ecg_data = str([random.randint(0, 500) for _ in range(num_data_points)])
airflow_data = str([random.randint(0, 500) for _ in range(num_data_points)])
snore_data = str([random.randint(0, 500) for _ in range(num_data_points)])
spo2_data = str([random.randint(0, 500) for _ in range(num_data_points)])
hr_data = str([random.randint(0, 500) for _ in range(num_data_points)])

# Create the dictionary with keys and values
data_dict = {
    "UserID": userID,
    "Therm": therm_data,
    "ECG": ecg_data,
    "Airflow": airflow_data,
    "Snore": snore_data,
    "SpO2": spo2_data,
    "HR": hr_data,
    "TimeIn": start_time.strftime("%Y-%m-%d %H:%M:%S"),
    "TimeOut": end_time.strftime("%Y-%m-%d %H:%M:%S"),
}

In [17]:
data_dict["TimeIn"]

'2023-06-12 08:00:00'

### Insert values into the database

In [19]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect(
    r"C:\Users\Toshiba\Documents\PD2_john\databases\SensorReadings.db"
)
cursor = conn.cursor()

# Insert the data into the table
cursor.execute(
    """
    INSERT INTO SensorReadings (UserID, Therm, ECG, Airflow, Snore, SpO2, HR, TimeIn, TimeOut)
    VALUES (:UserID, :Therm, :ECG, :Airflow, :Snore, :SpO2, :HR, :TimeIn, :TimeOut)
""",
    data_dict,
)

# Commit the transaction
conn.commit()

# Close the connection
conn.close()

### Retrieving Data

In [21]:
conn = sqlite3.connect(
    r"C:\Users\Toshiba\Documents\PD2_john\databases\SensorReadings.db"
)
cursor = conn.cursor()


# Function to retrieve a specific row based on UserID
def retrieve_row_by_userID(userID):
    # Execute the SELECT statement with a WHERE clause
    cursor.execute("SELECT * FROM SensorReadings WHERE UserID=?", (userID,))

    # Fetch the row from the result
    row = cursor.fetchone()

    if row is not None:
        # Extract the values from the row
        id_, userID, Therm, ECG, Airflow, Snore, SpO2, HR, TimeIn, TimeOut = row

        # Create a dictionary to store the retrieved data
        data_dict2 = {
            "UserID": userID,
            "Therm": Therm,
            "ECG": ECG,
            "Airflow": Airflow,
            "Snore": Snore,
            "SpO2": SpO2,
            "HR": HR,
            "TimeIn": TimeIn,
            "TimeOut": TimeOut,
        }

        return data_dict2

    else:
        return "row data does not exist"


# Retrieve a specific row based on the provided UserID
input_userID = "OYKKRb"
retrieved_data = retrieve_row_by_userID(input_userID)

# Process the retrieved data
if retrieved_data is not None:
    print("Retrieved Data Successfully")
else:
    print("No data found for the provided UserID.")

# Close the connection
conn.close()

Retrieved Data Successfully


### Turn Sensor Data from Str to Lists

In [21]:
import ast

In [37]:
therm_data = ast.literal_eval(retrieved_data["Therm"])

In [39]:
type(therm_data), len(therm_data)

(list, 691200)

### Sending to the Backend to save something in the database

Do a post request and see if it will go through

In [1]:
import requests

In [46]:
url = "http://127.0.0.1:5000/data"  # Replace with your backend URL

data = {"color": "red"}  # Replace with your data

response = requests.post(url, json=data)
print(response.status_code, response.reason, response.text)

200 OK Data received and processed


Send Data to the database using expected sensor readings

In [49]:
data_dict_2 = data_dict.copy()

In [50]:
data_dict_2["UserID"] = "ACC123"
data_dict_2["UserID"]

'ACC123'

In [8]:
url = "http://127.0.0.1:5000/insert"  # Replace with your backend URL

data = data_dict_2  # Replace with your data

response = requests.post(url, json=data)
print(response.status_code, response.reason, response.text)

200 OK


### (JOSHUA) Retrieve data from using backend

* the following retreival process has been editted to allow the following scenerios:
    * see the UserID and all the readings associated with userID. each readings will be differentiated by the difference in time in and time out values
    * allow the user to choose which recording is to be taken (one at a time)
    
* Future updates
    * allow the user to retreive all the data associated with a certain UserID

In [1]:
import requests
import ast

In [3]:
url = "http://192.168.1.103:5000/retrieveUserData"  # endpoint to retreive all the specific records of a certain user
data = {"UserID": "Beb123"}  # Replace with your data

response = requests.post(url, json=data)

if response.status_code == 200:
    data_dict2 = response.json()
    print(response.status_code, response.reason)
    print(data_dict2)
else:
    print("Error:", response.status_code, response.reason, response.text)

200 OK
{'Time': [{'TimeIn': '2023-06-20 08:00:00', 'TimeOut': '2023-06-20 16:00:00'}], 'UserID': 'Beb123'}


Code above retreives all the timeins and timeouts associated with a single user. Also works if user does not exist but you will get an error message

In [5]:
url = "http://192.168.1.103:5000/retrieveUserInstance"  # endpoint to retreive all the specific records of a certain user
data3 = {
    "UserID": "Beb123",
    "TimeIn": "2023-06-20 08:00:00",
    "TimeOut": "2023-06-20 16:00:00",
}  # Replace with your data after retreive the data associated with the user

response3 = requests.post(url, json=data3)

if response3.status_code == 200:
    data_dict3 = response3.json()
    print(response3.status_code, response3.reason)
else:
    print("Error:", response3.status_code, response3.reason, response3.text)

200 OK


In [6]:
data_dict3.keys()

dict_keys(['AHI', 'Airflow', 'Apnea', 'ECG', 'HR', 'Hypopnea', 'Normal', 'Severity', 'Snore', 'SpO2', 'Therm', 'TimeIn', 'TimeOut', 'UserID'])

In [57]:
(data_dict3["TimeIn"])

'2023-06-20 08:00:00'

In [58]:
data_dict3["Therm"][0:100]

'[-2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -2.35, -'

In [62]:
for i in data_dict3.keys():
    if i not in [
        "UserID",
        "TimeIn",
        "TimeOut",
        "AHI",
        "Apnea",
        "Hypopnea",
        "Normal",
        "Severity",
    ]:
        data_dict3[i] = ast.literal_eval(data_dict3[i])

In [63]:
for i in data_dict3.keys():
    print(i, type(data_dict3[i]))

AHI <class 'int'>
Airflow <class 'list'>
Apnea <class 'int'>
ECG <class 'list'>
HR <class 'list'>
Hypopnea <class 'int'>
Normal <class 'int'>
Severity <class 'str'>
Snore <class 'list'>
SpO2 <class 'list'>
Therm <class 'list'>
TimeIn <class 'str'>
TimeOut <class 'str'>
UserID <class 'str'>


# Pipeline to get data from the sqlite and make prediction
* take data from database
* turn data into data to be accepted with minirocket
    * turn data into pandas series and create csv out of them
    * upsample the data into 34 hz per second
* use MiniRocket 
* Use the SVC pipeline

In [1]:
import requests
import ast
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy.interpolate import interp1d
import numba
import pickle
import sktime
import joblib
import pandas as pd

In [2]:
url = "http://localhost:5000/retrieve"  # Replace with your backend URL

data = {"UserID": "OYKKRb"}  # Replace with your data

response = requests.post(url, json=data)

if response.status_code == 200:
    data_dict2 = response.json()
    print(response.status_code, response.reason)
else:
    print("Error:", response.status_code, response.reason, response.text)

200 OK


In [4]:
AHI_table = {
    "Severity": None,
    "AHI": None,
    "TimeIn": data_dict2["TimeIn"],
    "TimeOut": data_dict2["TimeOut"],
    "UserID": data_dict2["UserID"],
    "Normal": 0,
    "Apnea": 0,
    "Hypopnea": 0,
}

In [8]:
for i in data_dict2.keys():
    if i not in ["UserID", "TimeIn", "TimeOut"]:
        data_dict2[i] = ast.literal_eval(data_dict2[i])

In [9]:
for i in data_dict2.keys():
    print(i, type(data_dict2[i]))

Airflow <class 'list'>
ECG <class 'list'>
HR <class 'list'>
Snore <class 'list'>
SpO2 <class 'list'>
Therm <class 'list'>
TimeIn <class 'str'>
TimeOut <class 'str'>
UserID <class 'str'>


In [10]:
def get_df_from_signals_dict(signals_list):
    all_df = []
    for i in range(len(signals_list)):
        signals = signals_list[i]
        pd_dict = {}
        signals_keys = list(signals.keys())
        epochs = int(len(signals[list(signals.keys())[0]]) / 24 / 30)
        for i in range(len(signals_keys)):
            pd_list = []
            curr_col = signals_keys[i]
            samples = int(len(signals[curr_col]) / epochs)
            for i in range(0, epochs):
                pd_list.append(
                    pd.Series(signals[curr_col][samples * i : samples * i + samples])
                )
            pd_dict[curr_col] = pd_list

        pd_dict = pd.DataFrame(pd_dict)
        # for i in range(pd_dict.columns.size):
        #   curr_col = pd_dict.columns[i]
        #   for x in pd_dict.index:
        #     pd_dict[curr_col][x]= pd_dict[curr_col][x].mean()
        all_df.append(pd_dict)

    final_df = pd.concat(all_df, ignore_index=True)
    return final_df

In [11]:
# create another dictionary to copy data_dict2 but not include UserID, TimeIn, TimeOut

excluded_columns = ["UserID", "TimeOut", "TimeIn"]  # Columns to exclude from the copy

new_dict = {
    key: value for key, value in data_dict2.items() if key not in excluded_columns
}

print(new_dict.keys())

dict_keys(['Airflow', 'ECG', 'HR', 'Snore', 'SpO2', 'Therm'])


In [12]:
df_inputs = get_df_from_signals_dict([new_dict])

In [13]:
df_inputs

Unnamed: 0,Airflow,ECG,HR,Snore,SpO2,Therm
0,0 106 1 196 2 50 3 375 4 ...,0 241 1 317 2 11 3 280 4 ...,0 159 1 381 2 436 3 365 4 ...,0 353 1 392 2 393 3 497 4 ...,0 299 1 134 2 98 3 327 4 ...,0 224 1 127 2 426 3 166 4 ...
1,0 369 1 78 2 248 3 177 4 ...,0 436 1 235 2 493 3 21 4 ...,0 454 1 7 2 62 3 260 4 ...,0 425 1 142 2 240 3 402 4 ...,0 422 1 62 2 33 3 314 4 ...,0 349 1 434 2 351 3 145 4 ...
2,0 202 1 217 2 266 3 158 4 ...,0 437 1 193 2 173 3 34 4 ...,0 367 1 399 2 165 3 404 4 ...,0 186 1 45 2 244 3 23 4 ...,0 191 1 410 2 188 3 28 4 ...,0 443 1 207 2 439 3 321 4 ...
3,0 247 1 256 2 125 3 237 4 ...,0 254 1 289 2 353 3 320 4 ...,0 493 1 411 2 14 3 267 4 ...,0 208 1 396 2 226 3 93 4 ...,0 51 1 454 2 190 3 253 4 ...,0 107 1 83 2 78 3 78 4 ...
4,0 380 1 446 2 197 3 457 4 ...,0 88 1 197 2 318 3 34 4 ...,0 334 1 8 2 355 3 311 4 ...,0 343 1 437 2 85 3 485 4 ...,0 446 1 237 2 388 3 114 4 ...,0 308 1 65 2 76 3 345 4 ...
...,...,...,...,...,...,...
955,0 174 1 427 2 154 3 380 4 ...,0 296 1 166 2 350 3 188 4 ...,0 497 1 68 2 373 3 180 4 ...,0 196 1 361 2 178 3 291 4 ...,0 259 1 37 2 41 3 446 4 ...,0 473 1 196 2 236 3 286 4 ...
956,0 372 1 456 2 180 3 428 4 ...,0 97 1 367 2 459 3 39 4 ...,0 191 1 259 2 421 3 312 4 ...,0 80 1 250 2 451 3 231 4 ...,0 447 1 58 2 415 3 211 4 ...,0 19 1 296 2 366 3 154 4 ...
957,0 81 1 158 2 494 3 498 4 ...,0 262 1 16 2 91 3 497 4 ...,0 333 1 187 2 151 3 23 4 ...,0 383 1 289 2 121 3 499 4 ...,0 500 1 487 2 356 3 131 4 ...,0 268 1 0 2 379 3 258 4 ...
958,0 131 1 371 2 253 3 248 4 ...,0 363 1 33 2 240 3 248 4 ...,0 399 1 85 2 74 3 451 4 ...,0 56 1 408 2 450 3 169 4 ...,0 410 1 441 2 39 3 94 4 ...,0 357 1 56 2 167 3 196 4 ...


In [14]:
signals_and_sample_rates = {}
for i in df_inputs.columns[:]:
    signals_and_sample_rates[i] = []
    for x in range(len(df_inputs)):
        if len(df_inputs.loc[x, i]) / 30 not in signals_and_sample_rates[i]:
            signals_and_sample_rates[i].append(len(df_inputs.loc[x, i]) / 30)
print(signals_and_sample_rates)

{'Airflow': [24.0], 'ECG': [24.0], 'HR': [24.0], 'Snore': [24.0], 'SpO2': [24.0], 'Therm': [24.0]}


In [15]:
# Fix their sample rates to have 34 samples per second
# FUNCTION FOR UPSAMPLING USING CUBIC SPLINE INTERPOLATION
def cubic_spline_interpolation(data):
    # Define the original 300 data point signal
    l = len(data)
    x = np.linspace(0, 30, l)
    y = data.values

    # Normalize the data
    scaler = StandardScaler()
    y_norm = scaler.fit_transform(y.reshape(-1, 1)).flatten()

    # Define the new time points for upsampling
    x_new = np.linspace(0, 30, 1020)

    # Upsample using cubic spline interpolation
    f_cubic = interp1d(x, y_norm, kind="cubic")
    y_cubic = f_cubic(x_new)

    # Denormalize the data
    y_rescaled = scaler.inverse_transform(y_cubic.reshape(-1, 1)).flatten()
    y_rescaled_series = pd.Series(y_rescaled)
    return y_rescaled_series

In [16]:
df_temp1 = df_inputs.copy()

In [17]:
# create a new dataframe with the same shape as the original dataframe
new_df = pd.DataFrame(index=df_temp1.index, columns=df_temp1.columns)

# iterate over each cell in the dataframe
for i in range(df_temp1.shape[0]):
    for j in range(df_temp1.shape[1]):
        cell_value = df_temp1.iloc[i, j]
        # check if the cell needs to be upsampled/downsampled
        if isinstance(cell_value, pd.Series) and len(cell_value) != 1020:
            # apply the cubic_spline_interpolation function to the cell value
            new_series = cubic_spline_interpolation(cell_value)
            # fill the new dataframe with the upsampled/downsampled values
            new_df.iloc[i, j] = new_series
        else:
            # if the cell doesn't need to be upsampled/downsampled, fill the new dataframe with the original value
            new_df.iloc[i, j] = cell_value

# replace the old dataframe with the new one
df_temp1 = new_df

In [18]:
signals_and_sample_rates = {}
for i in df_temp1.columns[:]:
    signals_and_sample_rates[i] = []
    for x in range(len(df_temp1)):
        if len(df_temp1.loc[x, i]) / 30 not in signals_and_sample_rates[i]:
            signals_and_sample_rates[i].append(len(df_temp1.loc[x, i]) / 30)
print(signals_and_sample_rates)

{'Airflow': [34.0], 'ECG': [34.0], 'HR': [34.0], 'Snore': [34.0], 'SpO2': [34.0], 'Therm': [34.0]}


In [19]:
df_temp1.tail(2)

Unnamed: 0,Airflow,ECG,HR,Snore,SpO2,Therm
958,"[131.0, 373.43917690138403, 320.18505119204775...","[363.0, 27.645378352257325, 112.49019751638068...","[399.0, 160.85451594413797, 24.886679640510295...","[56.0, 326.52585125323947, 475.55658303715313,...","[410.0, 525.3694447990622, 266.4647322047152, ...","[357.0, 69.35163643456059, 93.02755235868437, ..."
959,"[173.0, 208.3229580285688, 347.1726460972568, ...","[12.999999999999972, 395.3931049168309, 393.29...","[170.0, 451.56759613646807, 511.37713536926873...","[468.0, 285.79856881871865, 453.84142507015247...","[94.0, 504.3691752261143, 212.3485059663618, 6...","[239.0, 0.8394265068872642, 161.28955000941278..."


In [20]:
# Turn the cells from numpy arrays back into pd Series
df_series = df_temp1.applymap(lambda x: pd.Series(x.tolist()))

In [21]:
df_series.tail(2)

Unnamed: 0,Airflow,ECG,HR,Snore,SpO2,Therm
958,0 131.000000 1 373.439177 2 ...,0 363.000000 1 27.645378 2 ...,0 399.000000 1 160.854516 2 ...,0 56.000000 1 326.525851 2 ...,0 410.000000 1 525.369445 2 ...,0 357.000000 1 69.351636 2 ...
959,0 173.000000 1 208.322958 2 ...,0 13.000000 1 395.393105 2 ...,0 170.000000 1 451.567596 2 ...,0 468.000000 1 285.798569 2 ...,0 94.000000 1 504.369175 2 ...,0 239.000000 1 0.839427 2 ...


## Use minirocket to transform df_series into an input for our model

In [22]:
# load the minirocket in our local file system
# remove HR from the dataframe since it was not included in our training
df_series.drop("HR", axis=1, inplace=True)
df_series.head(2)

Unnamed: 0,Airflow,ECG,Snore,SpO2,Therm
0,0 106.000000 1 250.713451 2 ...,0 241.000000 1 400.365784 2 ...,0 353.000000 1 395.194174 2 ...,0 299.000000 1 193.067202 2 ...,0 224.000000 1 39.828052 2 ...
1,0 369.000000 1 65.726544 2 ...,0 436.000000 1 151.886849 2 ...,0 425.000000 1 172.074492 2 ...,0 422.000000 1 139.518724 2 ...,0 349.000000 1 413.041706 2 ...


In [23]:
# Change the names of the columns to match the names in the minirocket model
df_series.rename(
    columns={
        "Airflow": "Flow1",
        "ECG": "EKG",
        "Snore": "Snore",
        "SpO2": "SpO2",
        "Therm": "Flow2",
    },
    inplace=True,
)

# Verify the new column names
print(df_series.columns)

Index(['Flow1', 'EKG', 'Snore', 'SpO2', 'Flow2'], dtype='object')


In [24]:
# change the order of the columns to match the order in the minirocket model

desired_order = ["EKG", "Snore", "Flow1", "Flow2", "SpO2"]
df_series = df_series.reindex(columns=desired_order)

# Verify the new column order
print(df_series.columns)

Index(['EKG', 'Snore', 'Flow1', 'Flow2', 'SpO2'], dtype='object')


In [25]:
with open(
    r"C:\Users\Toshiba\Documents\PD2_john\modelsAndTransformers\MiniRV2_FitOnTrainingSetOnly_1020.pickle",
    "rb",
) as f:
    miniR = pickle.load(f)

In [26]:
sensor_readings = df_series.copy()

In [27]:
sensor_readings_transformed = miniR.transform(sensor_readings)

In [28]:
sensor_readings_transformed.shape

(960, 49980)

## Load the Model Pipeline and use it to predict

In [29]:
model = joblib.load(
    r"C:\Users\Toshiba\Documents\PD2_john\modelsAndTransformers\MiniR_pipeline2_1020_SVC_Recall_76_76_avg_75.joblib"
)

### try to predict and use the model first with our own data that we know comes from real sensors

In [63]:
import pandas as pd

# Specify the file path or URL of the CSV file
file_path = (
    r"C:\Users\Toshiba\Documents\PD2_john\modelsAndTransformers\XTestMiniRWithYtest.csv"
)

# Read the CSV file into a dataframe
df = pd.read_csv(file_path)

# Now you can work with the dataframe 'df'
# For example, you can display the first few rows using the head() method
print(df.head(2))

     EKG__0    EKG__1    EKG__2    EKG__3    EKG__4    EKG__5    EKG__6  \
0  0.100980  0.948039  0.000000  0.634314  1.000000  0.045098  0.857843   
1  0.363725  0.659804  0.212745  0.487255  0.704902  0.273529  0.622549   

     EKG__7    EKG__8    EKG__9  ...  SpO2__9987  SpO2__9988  SpO2__9989  \
0  0.000000  0.166667  0.984314  ...         0.0         0.0         1.0   
1  0.187255  0.408824  0.667647  ...         0.0         0.0         1.0   

   SpO2__9990  SpO2__9991  SpO2__9992  SpO2__9993  SpO2__9994  SpO2__9995  \
0    0.169608    0.626471    1.000000         0.0         1.0         0.0   
1    0.228431    0.583333    0.935294         0.0         1.0         0.0   

   Events  
0       0  
1       0  

[2 rows x 49981 columns]


In [70]:
df.shape

(292, 49980)

In [64]:
# Assuming your dataframe is called 'df' and you want to drop the column 'ColumnName'
df = df.drop("Events", axis=1)

# Verify the updated dataframe
print(df.head())

     EKG__0    EKG__1    EKG__2    EKG__3    EKG__4    EKG__5    EKG__6  \
0  0.100980  0.948039  0.000000  0.634314  1.000000  0.045098  0.857843   
1  0.363725  0.659804  0.212745  0.487255  0.704902  0.273529  0.622549   
2  0.335294  0.814706  0.006863  0.648039  0.976471  0.189216  0.737255   
3  0.439216  0.715686  0.251961  0.575490  0.787255  0.360784  0.625490   
4  0.334314  0.710784  0.134314  0.503922  0.877451  0.274510  0.600980   

     EKG__7    EKG__8    EKG__9  ...  SpO2__9986  SpO2__9987  SpO2__9988  \
0  0.000000  0.166667  0.984314  ...    0.736275         0.0         0.0   
1  0.187255  0.408824  0.667647  ...    0.670588         0.0         0.0   
2  0.004902  0.373529  0.925490  ...    0.626471         0.0         0.0   
3  0.163725  0.498039  0.746078  ...    0.635294         0.0         0.0   
4  0.049020  0.383333  0.767647  ...    0.750980         0.0         0.0   

   SpO2__9989  SpO2__9990  SpO2__9991  SpO2__9992  SpO2__9993  SpO2__9994  \
0         1.0  

In [66]:
# check what the model looks like
for step_name, step in model.steps:
    print("Step:", step_name)
    print("Type:", type(step))

    # Check if the step is a classifier
    if hasattr(step, "get_params"):
        print("Classifier Parameters:")
        print(step.get_params())

    # Check if the step is a StandardScaler
    if isinstance(step, StandardScaler):
        print("StandardScaler Parameters:")
        print(step.get_params())

    print("-----------------------")

Step: scaler
Type: <class 'sklearn.preprocessing._data.StandardScaler'>
Classifier Parameters:
{'copy': True, 'with_mean': True, 'with_std': True}
StandardScaler Parameters:
{'copy': True, 'with_mean': True, 'with_std': True}
-----------------------
Step: svc
Type: <class 'sklearn.svm._classes.SVC'>
Classifier Parameters:
{'C': 100, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'auto', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
-----------------------


In [30]:
df_predict = model.predict(sensor_readings_transformed)

In [35]:
from datetime import datetime


def create_integer_counts_dict(arr, AHI_table):
    for num in arr:
        if num == 0:
            AHI_table["Normal"] += 1
        elif num == 1:
            AHI_table["Apnea"] += 1
        elif num == 2:
            AHI_table["Hypopnea"] += 1
    from datetime import datetime

    # Convert the datetime strings into datetime objects
    datetime_str1 = AHI_table["TimeIn"]
    datetime_str2 = AHI_table["TimeOut"]
    datetime_format = "%Y-%m-%d %H:%M:%S"  # Format of the datetime strings

    datetime_obj1 = datetime.strptime(datetime_str1, datetime_format)
    datetime_obj2 = datetime.strptime(datetime_str2, datetime_format)

    # Calculate the time difference in hours
    time_difference = (datetime_obj2 - datetime_obj1).total_seconds() / 3600

    # Calculate the AHI
    AHI = (AHI_table["Apnea"] + AHI_table["Hypopnea"]) / time_difference

    # Add the AHI to the dictionary
    AHI_table["AHI"] = AHI

    # Add the severity to the dictionary

    if AHI < 5:
        AHI_table["Severity"] = "Normal"
    elif AHI >= 5 and AHI < 15:
        AHI_table["Severity"] = "Mild"
    elif AHI >= 15 and AHI < 30:
        AHI_table["Severity"] = "Moderate"
    elif AHI >= 30:
        AHI_table["Severity"] = "Severe"

    return AHI_table


AHI_table = create_integer_counts_dict(df_predict, AHI_table)

# Print the dictionary
print(AHI_table)

{'Severity': 'Normal', 'AHI': 0.0, 'TimeIn': '2023-06-12 08:00:00', 'TimeOut': '2023-06-12 16:00:00', 'UserID': 'OYKKRb', 'Normal': 1920, 'Apnea': 0, 'Hypopnea': 0}
