In [1]:
import pandas as pd
import os
import numpy as np
import pytz
import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots as sp
import pytz as tz
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.discriminant_analysis import StandardScaler

In [2]:
RESPECK_FILE = '../../data/cheyne-stokes/CSR003 13.06.2025/Respeck_CSR003_35c5fa1801da28b5_D11B86C3EA87(6AL)_2025-06-13_decrypted.csv'
PSG_FILE = '../../data/cheyne-stokes/CSR003 13.06.2025/EDF CSR003 13.06.2025_csv.csv'
LABELS_FILE = '../../data/bishkek_csr/02_prepped/event_exports/26-04-2025_event_export.csv'

# --- Load Data ---
print("Loading data...")

respeck_df = pd.read_csv(RESPECK_FILE)
respeck_df['timestamp'] = pd.to_datetime(respeck_df['interpolatedPhoneTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
respeck_df['timestamp'] = respeck_df['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)
print(respeck_df['timestamp'])
respeck_df.set_index('timestamp', inplace=True, drop=False)

psg_df = pd.read_csv(PSG_FILE)
psg_df['timestamp'] = pd.to_datetime(psg_df['UnixTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
psg_df['timestamp'] = psg_df['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)

psg_df.set_index('timestamp', inplace=True, drop=False)

labels_df = pd.read_csv(LABELS_FILE)
labels_df['timestamp'] = pd.to_datetime(labels_df['UnixTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
labels_df['timestamp'] = labels_df['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)

labels_df.set_index('timestamp', inplace=True)

Loading data...
0        2025-06-13 22:58:47.365000+06:00
1        2025-06-13 22:58:47.445000+06:00
2        2025-06-13 22:58:47.525000+06:00
3        2025-06-13 22:58:47.605000+06:00
4        2025-06-13 22:58:47.685000+06:00
                       ...               
646267   2025-06-14 13:40:50.184000+06:00
646268   2025-06-14 13:40:50.264000+06:00
646269   2025-06-14 13:40:50.344000+06:00
646270   2025-06-14 13:40:50.424000+06:00
646271   2025-06-14 13:40:50.424000+06:00
Name: timestamp, Length: 646272, dtype: datetime64[ns, Asia/Bishkek]


In [3]:
# PSG TEST BREATHING RATE
psg_df

Unnamed: 0_level_0,time_s,UnixTimestamp,Resp nasal,Resp cpap,Resp chest,Resp abdomen,Position,Light,Pulse,SaO2 SpO2,Pulsewave,Sound microphone,Obstr,BR flow,timestamp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-06-13 23:04:54+06:00,0.00,1749834294000,1.541526e-14,-4.400000,100.000000,100.000000,153.000000,9990.000000,1.541526e-14,-5.395341e-14,-2.697671e-14,45.246052,-1.926908e-15,2.312289e-14,2025-06-13 23:04:54+06:00
2025-06-13 23:04:54.010000+06:00,0.01,1749834294010,5.325330e+00,-4.299280,95.417350,95.450053,152.992405,9988.337169,4.405123e-02,1.678427e-02,2.059395e+01,28.610666,3.176170e-02,-1.850294e+00,2025-06-13 23:04:54.010000+06:00
2025-06-13 23:04:54.020000+06:00,0.02,1749834294020,1.276932e+01,-4.057712,88.502670,88.562663,152.951257,9983.386373,1.135815e-01,6.414891e-02,5.586023e+01,21.647974,6.014809e-02,-3.179044e+00,2025-06-13 23:04:54.020000+06:00
2025-06-13 23:04:54.030000+06:00,0.03,1749834294030,2.205695e+01,-3.688625,79.534150,79.614568,152.876576,9975.150552,2.085642e-01,1.420653e-01,5.931480e+01,50.080110,8.242404e-02,-3.985859e+00,2025-06-13 23:04:54.030000+06:00
2025-06-13 23:04:54.040000+06:00,0.04,1749834294040,3.280459e+01,-3.212671,68.910070,69.002764,152.768403,9963.634588,3.289579e-01,2.504868e-01,3.828183e+01,66.665141,9.647325e-02,-4.299275e+00,2025-06-13 23:04:54.040000+06:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-14 04:27:06.950000+06:00,19332.95,1749853626950,5.804051e+00,-2.904465,88.007535,87.791754,152.535334,9949.028117,1.618016e-01,3.745182e-01,5.918939e+01,49.864958,-1.056035e-01,1.507757e+01,2025-06-14 04:27:06.950000+06:00
2025-06-14 04:27:06.960000+06:00,19332.96,1749853626960,8.905736e-01,-3.419565,94.366407,94.201134,152.695123,9963.781053,7.858777e-02,2.386051e-01,7.070420e+01,49.851225,-9.997579e-02,1.159630e+01,2025-06-14 04:27:06.960000+06:00
2025-06-14 04:27:06.970000+06:00,19332.97,1749853626970,-2.434560e+00,-3.848965,99.078634,98.959285,152.821553,9975.260527,2.075937e-02,1.331439e-01,8.834156e+01,49.944305,-8.465816e-02,8.235215e+00,2025-06-14 04:27:06.970000+06:00
2025-06-14 04:27:06.980000+06:00,19332.98,1749853626980,-3.829842e+00,-4.167159,101.753410,101.676739,152.914545,9983.459750,-1.163474e-02,5.819648e-02,7.421988e+01,50.151827,-6.123005e-02,5.114930e+00,2025-06-14 04:27:06.980000+06:00


In [4]:
mean_resp_nasal = psg_df['Resp nasal'].mean()
psg_df['Resp nasal'] = psg_df['Resp nasal'] - mean_resp_nasal
print(psg_df['Resp nasal'].mean())

-8.985805771770911e-15


In [5]:
# BREATHING FUNCTIONS

def getBreaths(df):
    minThreshold = 0.001
    mult = 1e-2
    
    signal = list(df.breathingSignal)
    
    time_diff = df['timestamp'].diff()
    time_diff.map(lambda x: x.total_seconds()).mean()
    
    window_size = int((30 / time_diff.dropna().apply(lambda x: x.total_seconds()).mean()) // 2)
    threshs = calculateThresholdLevels(list(signal), window_size, window_size, mult, False)
    posThresh = threshs[:, 0]
    negThresh = threshs[:, 1]

    times = calculateBreathTimes(list(signal), posThresh, negThresh, minThreshold, False)

    total = set()
    minBreathLength = float("inf")
    maxBreathLength = float("-inf")
    for i in range(0, len(times)):
        vals = times[i]
        for j in range(0, len(vals)-1):
            start, end = vals[j], vals[j+1]
            minBreathLength = min(minBreathLength, end-start+1)
            maxBreathLength = max(maxBreathLength, end-start+1)
            for k in range(start, end+1):
                total.add(k)

    f = list(df.breathingSignal.dropna())
    a = f"Uses Breath From {len(total)}/{len(f)} = {round((len(total)/len(f)) * 100, 2)}% Signal"
    b = f"Max Breath Length: {maxBreathLength} points. Min Breath Length: {minBreathLength} points"
    print(a)
    print(b)
        
    return times

def countLocalMaximas(values):
    count = 0
    if len(values) < 3:
        return 1
    if len(values) > 1 and values[0] > values[1]:
        count += 1
    if len(values) > 1 and values[-1] > values[-2]:
        count += 1
    for i in range(1, len(values) - 1):
        if values[i] > values[i - 1] and values[i] > values[i + 1]:
            count += 1
    return count

def countLocalMinimas(values):
    count = 0
    if len(values) < 3:
        return 1
    if len(values) > 1 and values[0] < values[1]:
        count += 1
    if len(values) > 1 and values[-1] < values[-2]:
        count += 1
    for i in range(1, len(values) - 1):
        if values[i] < values[i - 1] and values[i] < values[i + 1]:
            count += 1
    return count

def calculateBreathTimes(signal, posThresholds, negThresholds, minThreshold, zeroCrossingBreathStart):
    
    def breathTimes(startIndex, endIndex):

        def setInitialState(startValue, posThreshold, negThreshold):
            if startValue < negThreshold:
                state = LOW
            elif startValue > posThreshold:
                state = HIGH
            else:
                state = MID_UNKNOWN
            return state
    
        state = setInitialState(signal[startIndex], posThresholds[startIndex], negThresholds[startIndex])
        times = []
    
        for i in range(startIndex + 1, endIndex + 1):
            posThreshold = posThresholds[i]
            negThreshold = negThresholds[i]
            if state == LOW and signal[i] > negThreshold:
                state = MID_RISING
            elif state == HIGH and signal[i] < posThreshold:
                state = MID_FALLING
            elif (state == MID_RISING or state == MID_UNKNOWN) and signal[i] > posThreshold:
                state = HIGH
            elif (state == MID_FALLING or state == MID_UNKNOWN) and signal[i] < negThreshold:
                state = LOW
                times.append(i)

        if zeroCrossingBreathStart:
            zeroCrossingBreathTimes = []
            for t in times:
                for i in range(t,-1,-1):
                    if signal[i] >= 0:
                        zeroCrossingBreathTimes.append(i)
                        break
            return zeroCrossingBreathTimes
        else:
            return times

    LOW, MID_FALLING, MID_UNKNOWN, MID_RISING, HIGH = range(5)

    
    invalidated = np.ones(np.shape(signal), dtype=bool)
    for i in range(len(invalidated)):
        if posThresholds[i] > minThreshold or negThresholds[i] < -minThreshold:
            invalidated[i] = False
    

    minIslandLength = 0
    islandLimits = findIslandLimits(invalidated, minIslandLength)
    
    times = []
    for (start, end) in islandLimits:
        bt = breathTimes(start, end)
        if len(bt) > 0:
            times.append(bt)

    return times

def calculateThresholdLevels(signal, rmsBackwardLength, rmsForwardLength, rmsMultiplier, symmetrical):
    result = nans((len(signal), 2))
    
    if not symmetrical:
        
        #fill sum of squares buffers
        posValues = []
        negValues = []
        windowLength = rmsBackwardLength + rmsForwardLength
        if len(signal) < windowLength:
            return result
        
        lastBananaIndex = np.nan
            
        for i in range(windowLength - 1):
            if signal[i] >= 0:
                posValues.append(signal[i])
            elif signal[i] < 0:
                negValues.append(signal[i])
            else: # if nan
                lastBananaIndex = i
                
        posArray = np.array(posValues)
        negArray = np.array(negValues)
        
        sumOfSquaresPos = np.sum(posArray**2)
        posCount = len(posArray)
        sumOfSquaresNeg = np.sum(negArray**2)
        negCount = len(negArray)
        
        for i in range(0, len(signal)):
            if i < rmsBackwardLength or i >= len(signal) - rmsForwardLength:
                posResult = np.nan
                negResult = np.nan
            else:
                newValue = signal[i+rmsForwardLength-1]
                if np.isnan(newValue):
                    lastBananaIndex = i+rmsForwardLength-1
                else:
                    if newValue >= 0:
                        sumOfSquaresPos += newValue**2
                        posCount += 1
                    elif newValue < 0:
                        sumOfSquaresNeg += newValue**2
                        negCount += 1
                
                if not np.isnan(lastBananaIndex) and i - lastBananaIndex <= rmsBackwardLength:
                    posResult = np.nan
                    negResult = np.nan
                else:
                    posResult = np.sqrt(sumOfSquaresPos / posCount) * rmsMultiplier
                    negResult = -np.sqrt(sumOfSquaresNeg / negCount) * rmsMultiplier
                
                oldValue = signal[i-rmsBackwardLength]
                
                if oldValue >= 0:
                    sumOfSquaresPos -= oldValue**2
                    posCount -= 1
                elif oldValue < 0:
                    sumOfSquaresNeg -= oldValue**2
                    negCount -=1
            result[i,0] = posResult
            result[i,1] = negResult
            
        return result
    
    else:
        #fill sum of squares buffers
        allValues = []
        windowLength = rmsBackwardLength + rmsForwardLength
        if len(signal) < windowLength:
            return result
        
        #print "signal length: " + str(len(signal))
        #print "windowLength: " + str(windowLength)
        #print "backward length: " + str(rmsBackwardLength)
        #print "forward length: " + str(rmsForwardLength)
        
        lastBananaIndex = np.nan
        
        for i in range(windowLength - 1):
            if not np.isnan(signal[i]):
                allValues.append(signal[i])
            else:
                lastBananaIndex = i
        allArray = np.array(allValues)
        
        sumOfSquaresAll = np.sum(allArray**2)
        allCount = len(allArray)
        
        for i in range(0, len(signal)):
            if i < rmsBackwardLength or i >= len(signal) - rmsForwardLength:
                allResult = np.nan
            else:
                newValue = signal[i+rmsForwardLength-1]
                if np.isnan(newValue):
                    lastBananaIndex = i+rmsForwardLength-1
                else:
                    sumOfSquaresAll += newValue**2
                    allCount += 1
                
                if not np.isnan(lastBananaIndex) and i - lastBananaIndex <= rmsBackwardLength:
                    allResult = np.nan
                else:
                    allResult = np.sqrt(sumOfSquaresAll / allCount) * rmsMultiplier
                
                oldValue = signal[i-rmsBackwardLength]
                if not np.isnan(oldValue):
                    sumOfSquaresAll -= oldValue**2
                    allCount -= 1
                    
            result[i,0] = allResult
            result[i,1] = -allResult
        #figure()
        #plot(signal)
        #plot(result)
        #show()
        return result
    

def nans(dims):
    a = np.empty(dims)
    a[:] = np.nan
    return a

''' Find the RMS value of an input signal in array form. '''
def rms(signal):
    return np.sqrt(np.mean(signal**2))

def rmsHamming(signal):
    squares = signal**2
    weights = np.hamming(len(signal))
    weightedSum = 0.0
    weightsSum = 0.0

    for i in range(len(signal)):
        weightedSum += squares[i] * weights[i]
        weightsSum += weights[i]

    return np.sqrt(weightedSum / weightsSum)

''' Find islands of defined values in a signal that may contain NaNs. '''
def findIslandLimits(signal, minIslandLength=0, minIslandGap=0):

    islands = []

    start = None
    end = None
    foundIsland = False

    for i in range(len(signal)):
        if not signal[i]:
            if start == None:
                start = i
            else:
                end = i + 1
                if i == len(signal) - 1:
                    foundIsland = True
        else:
            if start != None:
                if end != None:
                    foundIsland = True
                else:
                    start = None

        if foundIsland:
            if (minIslandGap > 0) and (len(islands) > 0):
                prevIslandStart = islands[-1][0]
                prevIslandEnd = islands[-1][1]
                islandGap = start - prevIslandEnd - 1
                if islandGap < minIslandGap:
                    # merge the new island with the previous one
                    islands[-1] = ((prevIslandStart, end))
                else:
                    islands.append((start, end))
            else:    
                islands.append((start, end))

            start = None
            end = None
            foundIsland = False
            
    # now return only the islands that are long enough
    longIslands = []
    for island in islands:
        if (island[1] - island[0]) >= minIslandLength:
            longIslands.append(island)

    return longIslands


In [6]:
psg_df = psg_df.rename(columns={'Resp nasal': 'breathingSignal'})
breathLocations = getBreaths(psg_df)
total_breaths = 0
for island_times in breathLocations:
    total_breaths += len(island_times)
print(total_breaths)


  negResult = -np.sqrt(sumOfSquaresNeg / negCount) * rmsMultiplier
  posResult = np.sqrt(sumOfSquaresPos / posCount) * rmsMultiplier
  posResult = np.sqrt(sumOfSquaresPos / posCount) * rmsMultiplier


Uses Breath From 1922244/1933300 = 99.43% Signal
Max Breath Length: 35970 points. Min Breath Length: 11 points
9757


In [7]:

# Breathing rate
psg_df = psg_df.rename_axis(None)
df_temp = psg_df.reset_index()
df_temp.rename(columns={'index': 'row_index'}, inplace=True)  # Use a different name

# Now extract both the row indices and timestamps
all_breath_indices = [idx for sublist in breathLocations for idx in sublist]

# Create a clean dataframe with row indices and datetime from the index
breath_time_data = pd.DataFrame({
    'row_index': all_breath_indices,
    'datetime': psg_df.index[all_breath_indices]
})


window_size = 30

# Set datetime as index for resampling
breaths_df_indexed = breath_time_data.set_index('datetime')

# Resample to count breaths in 30-second windows
breathing_rate_windows = breaths_df_indexed.resample('60S').size()

# Create summary dataframe
breathing_rate_summary_df = pd.DataFrame({
    'Start_Time': breathing_rate_windows.index,
    'Breaths_per_30_seconds': breathing_rate_windows.values
})

print(breathing_rate_summary_df)


                   Start_Time  Breaths_per_30_seconds
0   2025-06-13 23:06:00+06:00                      18
1   2025-06-13 23:07:00+06:00                      33
2   2025-06-13 23:08:00+06:00                      36
3   2025-06-13 23:09:00+06:00                      32
4   2025-06-13 23:10:00+06:00                      33
..                        ...                     ...
316 2025-06-14 04:22:00+06:00                      30
317 2025-06-14 04:23:00+06:00                      28
318 2025-06-14 04:24:00+06:00                      28
319 2025-06-14 04:25:00+06:00                      26
320 2025-06-14 04:26:00+06:00                      23

[321 rows x 2 columns]


  breathing_rate_windows = breaths_df_indexed.resample('60S').size()


In [8]:

fig = px.line(breathing_rate_summary_df, 
              x='Start_Time', 
              y='Breaths_per_30_seconds',
              title='Breathing Rate Over Time (PSG)',
              labels={
                  'Start_Time': 'Time',
                  'Breaths_per_30_seconds': 'Breaths per 30 seconds'
              })

fig.update_traces(line_color='#2E86AB', line_width=2)
fig.update_layout(template='plotly_white')
fig.show()

In [9]:
# respeck breathing rate
respeck_df = respeck_df.rename(columns={'Resp nasal': 'breathingSignal'})
breathLocations_respeck = getBreaths(respeck_df)
total_breaths_resepck = 0
for island_times in breathLocations_respeck:
    total_breaths_resepck += len(island_times)
print(total_breaths_resepck)



Uses Breath From 120344/638626 = 18.84% Signal
Max Breath Length: 232 points. Min Breath Length: 5 points
4152


In [10]:
# Breathing rate
respck_df = respeck_df.rename_axis(None)
r_temp = respck_df.reset_index()
r_temp.rename(columns={'index': 'row_index'}, inplace=True)  # Use a different name

# Now extract both the row indices and timestamps
all_breath_indices = [idx for sublist in breathLocations_respeck for idx in sublist]

# Create a clean dataframe with row indices and datetime from the index
breath_time_data_respeck = pd.DataFrame({
    'row_index': all_breath_indices,
    'datetime': psg_df.index[all_breath_indices]
})


window_size = 30

# Set datetime as index for resampling
breaths_df_indexed_respeck = breath_time_data_respeck.set_index('datetime')

# Resample to count breaths in 30-second windows
breathing_rate_windows = breaths_df_indexed_respeck.resample('60S').size()

# Create summary dataframe
breathing_rate_summary_df_respeck = pd.DataFrame({
    'Start_Time': breathing_rate_windows.index,
    'Breaths_per_30_seconds': breathing_rate_windows.values
})
print(breathing_rate_summary_df_respeck)

                   Start_Time  Breaths_per_30_seconds
0   2025-06-13 23:05:00+06:00                      59
1   2025-06-13 23:06:00+06:00                     180
2   2025-06-13 23:07:00+06:00                     233
3   2025-06-13 23:08:00+06:00                     178
4   2025-06-13 23:09:00+06:00                      82
..                        ...                     ...
101 2025-06-14 00:46:00+06:00                       0
102 2025-06-14 00:47:00+06:00                       0
103 2025-06-14 00:48:00+06:00                       0
104 2025-06-14 00:49:00+06:00                       0
105 2025-06-14 00:50:00+06:00                       5

[106 rows x 2 columns]



'S' is deprecated and will be removed in a future version, please use 's' instead.



In [11]:
fig = px.line(breathing_rate_summary_df_respeck, 
              x='Start_Time', 
              y='Breaths_per_30_seconds',
              title='Breathing Rate Over Time (resepck)',
              labels={
                  'Start_Time': 'Time',
                  'Breaths_per_30_seconds': 'Breaths per 30 seconds'
              })

fig.update_traces(line_color='#2E86AB', line_width=2)
fig.update_layout(template='plotly_white')
fig.show()

**Plotting new Cheyne-Stokes data**

In [12]:
# Load Cheyne-stokes

RESPECK = '../../data/cheyne-stokes/CSR003 13.06.2025/Respeck_CSR003_35c5fa1801da28b5_D11B86C3EA87(6AL)_2025-06-13_decrypted.csv'
PSG = '../../data/cheyne-stokes/CSR003 13.06.2025/EDF CSR003 13.06.2025_csv.csv'

# --- Load Data ---
print("Loading data...")

respeck_cs = pd.read_csv(RESPECK)
respeck_cs['timestamp'] = pd.to_datetime(respeck_cs['interpolatedPhoneTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
respeck_cs['timestamp'] = respeck_cs['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)
print(respeck_cs['timestamp'])
respeck_cs.set_index('timestamp', inplace=True, drop=False)

psg_df_cs = pd.read_csv(PSG_FILE)
psg_df_cs['timestamp'] = pd.to_datetime(psg_df_cs['UnixTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
psg_df_cs['timestamp'] = psg_df_cs['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)
print(psg_df_cs['timestamp'])
psg_df_cs.set_index('timestamp', inplace=True, drop=False)


Loading data...
0        2025-06-13 22:58:47.365000+06:00
1        2025-06-13 22:58:47.445000+06:00
2        2025-06-13 22:58:47.525000+06:00
3        2025-06-13 22:58:47.605000+06:00
4        2025-06-13 22:58:47.685000+06:00
                       ...               
646267   2025-06-14 13:40:50.184000+06:00
646268   2025-06-14 13:40:50.264000+06:00
646269   2025-06-14 13:40:50.344000+06:00
646270   2025-06-14 13:40:50.424000+06:00
646271   2025-06-14 13:40:50.424000+06:00
Name: timestamp, Length: 646272, dtype: datetime64[ns, Asia/Bishkek]
0                2025-06-13 23:04:54+06:00
1         2025-06-13 23:04:54.010000+06:00
2         2025-06-13 23:04:54.020000+06:00
3         2025-06-13 23:04:54.030000+06:00
4         2025-06-13 23:04:54.040000+06:00
                        ...               
1933295   2025-06-14 04:27:06.950000+06:00
1933296   2025-06-14 04:27:06.960000+06:00
1933297   2025-06-14 04:27:06.970000+06:00
1933298   2025-06-14 04:27:06.980000+06:00
1933299   2025-06-14 04

In [13]:
overlap_start = max(respeck_cs['timestamp'].min(), psg_df_cs['timestamp'].min())
overlap_end = min(respeck_cs['timestamp'].max(), psg_df_cs['timestamp'].max())
respeck_overlap = respeck_cs[
    (respeck_cs['timestamp'] <= overlap_end)
]

psg_overlap = psg_df_cs[
   (psg_df_cs['timestamp'] <= overlap_end)
]
fig = go.Figure()

# --- Add Traces from the First DataFrame ---
# Add the 'breathingSignal' from df1 and assign it to the secondary y-axis
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp'],
    y=respeck_overlap['breathingSignal'],
    mode='lines',
    name='Breathing Signal - Respeck',
    yaxis='y2' # <--- This is the key change for this trace
))
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp'],
    y=respeck_overlap['x'],
    mode='lines',
    name='Breathing Signal - Respeck X'
))
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp'],
    y=respeck_overlap['y'],
    mode='lines',
    name='Breathing Signal - Respeck Y'
))
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp'],
    y=respeck_overlap['z'],
    mode='lines',
    name='Breathing Signal - Respeck Z'
))
# Add the 'Resp nasal' from df1
fig.add_trace(go.Scatter(
    x=psg_overlap['timestamp'],
    y=psg_overlap['Resp nasal'],
    mode='lines',
    name='Resp Nasal - PSG'
))


# --- Update the Layout for a Professional Look with a Second Y-Axis ---
fig.update_layout(
    title='Comparison of Breathing Signals and Nasal Respiration',
    xaxis_title='Timestamp',
    yaxis_title='Signal Amplitude (Primary Axis)', # Updated left axis title
    legend_title='Data Source',
    hovermode='x unified', # For better tooltip experience
    # --- This section creates and styles the secondary y-axis ---
    yaxis2=dict(
        title='Breathing Signal Amplitude (Secondary Axis)',
        overlaying='y',
        side='right'
    )
    # ----------------------------------------------------------------
)

# --- Show the Figure ---
fig.show()
fig.write_html("pre-alignment.html")

In [14]:
target_tz = 'Asia/Bishkek'

start_window = pd.to_datetime('2025-06-13 22:55:00').tz_localize(target_tz)
end_window = pd.to_datetime('2025-06-13 23:05:00').tz_localize(target_tz)


respeck_zoom = respeck_cs[
    (respeck_cs['timestamp'] >= start_window) & (respeck_cs['timestamp'] <= end_window)
]

spike_row = respeck_zoom.loc[respeck_zoom['breathingSignal'].idxmax()]
respeck_spike_timestamp = spike_row['timestamp']

print(f"Respeck Spike Timestamp found at: {respeck_spike_timestamp}")


=== BREATHING RATE ALIGNMENT ANALYSIS ===

1. DATA OVERVIEW:
PSG data: 321 time windows
   Time range: 2025-06-13 23:06:00+06:00 to 2025-06-14 04:26:00+06:00
   Breathing rate range: 0-61

Respeck data: 106 time windows
   Time range: 2025-06-13 23:05:00+06:00 to 2025-06-14 00:50:00+06:00
   Breathing rate range: 0-233

2. TIME OVERLAP:
Overlapping period: 2025-06-13 23:06:00+06:00 to 2025-06-14 00:50:00+06:00
Overlap duration: 1.73 hours

3. DATA PREPARATION:
Valid data points: 209 out of 209

4. CROSS-CORRELATION RESULTS:
Best correlation coefficient: 62.475
Best time lag: -6.00 minutes
Interpretation: PSG leads Respeck by 6.00 minutes
Alignment quality: Excellent



'S' is deprecated and will be removed in a future version, please use 's' instead.




=== SUMMARY ===
Time lag between sensors: -6.00 minutes
Correlation strength: 62.475
Alignment quality: Excellent
⚠️  Large time lag detected - check sensor synchronization!


In [15]:
from scipy.signal import find_peaks

# Create a zoomed-in dataframe for the PSG data
psg_zoom = psg_df_cs[
    (psg_df_cs['timestamp'] <= end_window)
]
# Find peaks in the PSG signal. You will need to adjust the 'height' value.
# Look at your graph: the normal signal seems to be between 0 and 100.
# The disturbance might go slightly higher. Start with a height around 100 and adjust.
psg_peaks_indices, _ = find_peaks(psg_zoom['Resp nasal'])

# Get the timestamp of the FIRST peak that meets the criteria
psg_spike_index = psg_zoom.index[psg_peaks_indices[0]]
psg_spike_timestamp = psg_zoom.loc[psg_spike_index, 'timestamp']

print(f"PSG Spike Timestamp found at: {psg_spike_timestamp}")

                   Start_Time  Breaths_per_30_seconds
0   2025-06-13 23:05:00+06:00                      59
1   2025-06-13 23:06:00+06:00                     180
2   2025-06-13 23:07:00+06:00                     233
3   2025-06-13 23:08:00+06:00                     178
4   2025-06-13 23:09:00+06:00                      82
..                        ...                     ...
101 2025-06-14 00:46:00+06:00                       0
102 2025-06-14 00:47:00+06:00                       0
103 2025-06-14 00:48:00+06:00                       0
104 2025-06-14 00:49:00+06:00                       0
105 2025-06-14 00:50:00+06:00                       5

[106 rows x 2 columns]



'S' is deprecated and will be removed in a future version, please use 's' instead.



In [None]:
time_offset = psg_spike_timestamp - respeck_spike_timestamp
print(time_offset)

In [16]:
respeck_cs['timestamp_aligned'] = respeck_cs['timestamp'] + time_offset
print("Created 'timestamp_aligned' column in the Respeck DataFrame.")
print(respeck_cs)

SyntaxError: invalid syntax (1965335765.py, line 1)

In [None]:
overlap_start = max(respeck_cs['timestamp_aligned'].min(), psg_df_cs['timestamp'].min())
overlap_end = min(psg_df_cs['timestamp'].max(), psg_df_cs['timestamp'].max())
respeck_overlap = respeck_cs[
    (respeck_cs['timestamp_aligned'] <= overlap_end)
]

psg_overlap = psg_df_cs[
   (psg_df_cs['timestamp'] <= overlap_end)
]
fig = go.Figure()

# --- Add Traces from the First DataFrame ---
# Add the 'breathingSignal' from df1 and assign it to the secondary y-axis
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp_aligned'],
    y=respeck_overlap['breathingSignal'],
    mode='lines',
    name='Breathing Signal - Respeck',
    yaxis='y2' # <--- This is the key change for this trace
))
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp_aligned'],
    y=respeck_overlap['x'],
    mode='lines',
    name='Breathing Signal - Respeck X'
))
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp_aligned'],
    y=respeck_overlap['y'],
    mode='lines',
    name='Breathing Signal - Respeck Y'
))
fig.add_trace(go.Scatter(
    x=respeck_overlap['timestamp_aligned'],
    y=respeck_overlap['z'],
    mode='lines',
    name='Breathing Signal - Respeck Z'
))
# Add the 'Resp nasal' from df1
fig.add_trace(go.Scatter(
    x=psg_overlap['timestamp'],
    y=psg_overlap['Resp nasal'],
    mode='lines',
    name='Resp Nasal - PSG'
))


# --- Update the Layout for a Professional Look with a Second Y-Axis ---
fig.update_layout(
    title='Comparison of Breathing Signals and Nasal Respiration (aligned)',
    xaxis_title='Timestamp',
    yaxis_title='Signal Amplitude (Primary Axis)', # Updated left axis title
    legend_title='Data Source',
    hovermode='x unified', # For better tooltip experience
    # --- This section creates and styles the secondary y-axis ---
    yaxis2=dict(
        title='Breathing Signal Amplitude (Secondary Axis)',
        overlaying='y',
        side='right'
    )
    # ----------------------------------------------------------------
)

# --- Show the Figure ---
fig.show()
fig.write_html("post_alignment.html")