In [4]:
import pandas as pd
import pytz
from datetime import datetime, timedelta

In [12]:
RESPECK_FILE = '../data/bishkek_csr/03_train_ready/CSR005/16-06-2025_respeck.csv'
PSG_FILE = '../data/bishkek_csr/03_train_ready/CSR005/16-06-2025_nasal.csv'
LABELS_FILE = '../data/bishkek_csr/03_train_ready/CSR005/16-06-2025_event_export.csv'

# --- Load Data ---
print("Loading data...")

respeck_df = pd.read_csv(RESPECK_FILE)
respeck_df['interpolatedPhoneTimestamp'] = respeck_df['interpolatedPhoneTimestamp'].astype(int)
respeck_df['timestamp'] = pd.to_datetime(respeck_df['interpolatedPhoneTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
respeck_df['timestamp'] = respeck_df['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)
# respeck_df.set_index('timestamp', inplace=True)
print(respeck_df['timestamp'])

psg_df = pd.read_csv(PSG_FILE)
psg_df['timestamp'] = pd.to_datetime(psg_df['UnixTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
psg_df['timestamp'] = psg_df['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)
# psg_df.set_index('timestamp', inplace=True)
print(psg_df['timestamp'])

labels_df = pd.read_csv(LABELS_FILE)
labels_df['timestamp'] = pd.to_datetime(labels_df['UnixTimestamp'], unit='ms')
tz = pytz.timezone('Asia/Bishkek')
labels_df['timestamp'] = labels_df['timestamp'].dt.tz_localize('UTC').dt.tz_convert(tz)
#labels_df.set_index('timestamp', inplace=True)
print(labels_df['timestamp'])


Loading data...
0        2025-06-16 20:22:48.889000+06:00
1        2025-06-16 20:22:48.969000+06:00
2        2025-06-16 20:22:49.049000+06:00
3        2025-06-16 20:22:49.129000+06:00
4        2025-06-16 20:22:49.209000+06:00
                       ...               
493344   2025-06-17 07:41:53.084000+06:00
493345   2025-06-17 07:41:53.102000+06:00
493346   2025-06-17 07:41:53.120000+06:00
493347   2025-06-17 07:41:53.138000+06:00
493348   2025-06-17 07:41:53.157000+06:00
Name: timestamp, Length: 493349, dtype: datetime64[ns, Asia/Bishkek]
0                2025-06-16 23:00:01+06:00
1         2025-06-16 23:00:01.010000+06:00
2         2025-06-16 23:00:01.020000+06:00
3         2025-06-16 23:00:01.030000+06:00
4         2025-06-16 23:00:01.040000+06:00
                        ...               
2879995   2025-06-17 07:00:00.950000+06:00
2879996   2025-06-17 07:00:00.960000+06:00
2879997   2025-06-17 07:00:00.970000+06:00
2879998   2025-06-17 07:00:00.980000+06:00
2879999   2025-06-17 07

In [8]:
def add_timestamp_export(csv_path: str, tz_name: str = 'Asia/Bishkek') -> None:
    """
    Reads a CSV file with a 'Time' column in 'DD.MM.YYYY HH:MM:SS' format,
    localizes each datetime to the specified timezone, computes a 'UnixTimestamp'
    column (seconds since epoch), inserts it immediately after the 'Duration' column,
    and overwrites the original file.
    
    Parameters
    ----------
    csv_path : str
        Path to the CSV file to read and overwrite.
    tz_name : str, optional
        Timezone name (default: 'Asia/Bishkek').
    """
    df = pd.read_csv(csv_path)
    tz = pytz.timezone(tz_name)
    dt_series = pd.to_datetime(df['Time'], format='%d.%m.%Y %H:%M:%S', dayfirst=True)
    dt_series = dt_series.dt.tz_localize(tz)
    ts_series = (dt_series.astype('int64') // 10**6).astype(int)
    duration_idx = df.columns.get_loc('Duration') 
    df.insert(duration_idx + 1, 'UnixTimestamp', ts_series)

    df.to_csv(csv_path, index=False)


def add_timestamp_comments(csv_path: str, date_str: str, tz_name: str = 'Asia/Bishkek') -> None:
    """
    Reads a CSV file with a 'Time' column in 'HH:MM:SS' format (and other columns),
    combines each time with the provided date (YYYY-MM-DD), localizes to the specified
    timezone, computes a 'UnixTimestamp' column (seconds since epoch), inserts it 
    immediately after the 'Time' column, and overwrites the original file.
    
    Parameters
    ----------
    csv_path : str
        Path to the CSV file to read and overwrite.
    date_str : str
        Date string in 'YYYY-MM-DD' format to combine with each time.
    tz_name : str, optional
        Timezone name (default: 'Asia/Bishkek').
    """
    # Load CSV
    df = pd.read_csv(csv_path)
    tz = pytz.timezone(tz_name)
    dt_series = pd.to_datetime(date_str + ' ' + df['Time'], format='%Y-%m-%d %H:%M:%S')
    dt_series = dt_series.dt.tz_localize(tz)
    ts_series = (dt_series.astype('int64') // 10**9).astype(int)
    time_idx = df.columns.get_loc('Time')
    df.insert(time_idx + 1, 'UnixTimestamp', ts_series)
    df.to_csv(csv_path, index=False)

def add_timestamp_nasal(
    csv_path: str,
    start_datetime_str: str,
    tz_name: str = "Asia/Bishkek"
) -> None:
    """
    Reads a CSV whose first column is elapsed time in seconds (float or int) named arbitrarily,
    adds that many seconds onto the given start_datetime_str (YYYY-MM-DD HH:MM:SS),
    localizes to the specified timezone, computes Unix timestamps, and inserts them right
    after the elapsed‐seconds column. Finally, overwrites the CSV in place.

    Parameters
    ----------
    csv_path : str
        Path to the CSV file to read and overwrite.
    start_datetime_str : str
        The starting point (date + time) in 'YYYY-MM-DD HH:MM:SS' format.
        This should already be in the Bishkek zone (UTC+6).
    tz_name : str, optional
        Timezone name (default: 'Asia/Bishkek').
    """
    df = pd.read_csv(csv_path)

    elapsed_col = df.columns[0]

    tz = pytz.timezone(tz_name)

    base_dt_naive = pd.to_datetime(start_datetime_str, format="%Y-%m-%d %H:%M:%S")
    base_dt = tz.localize(base_dt_naive)
    elapsed_td = pd.to_timedelta(df[elapsed_col], unit="s")

    dt_series = base_dt + elapsed_td
    unix_series = (dt_series.view("int64") // 10**6).astype(int)
    insert_idx = 1 
    df.insert(insert_idx, "UnixTimestamp", unix_series)
    df.to_csv(csv_path, index=False)


In [5]:

def remove_col(file, col_name):
    df = pd.read_csv(file)
    df = df.drop(col_name, axis=1)
    df.to_csv(file, index=False)


In [11]:
remove_col('../data/bishkek_csr/03_train_ready/respeck/25-04-2025_respeck.csv', 'timestamp')
# add_timestamp_nasal('../data/bishkek_csr/03_train_ready/CSR005/16-06-2025_nasal.csv', '2025-06-16 23:00:01')
