In [7]:
import polars as pl
import duckdb
# TODO: Fix this import
# from databasing.database_calls import get_device_as_pl_df

In [8]:
con = duckdb.connect(database='/media/shortterm_ssd/Clay/databases/duckdb/rcs-db.duckdb', read_only=True)

In [9]:
def get_device_as_pl_df(device, db_con, lazy=False, time_zone='America/Los_Angeles'):
    """
    Accesses duckdb database and returns columns of interest, labeled reasonably with session identifiers cast as categoricals.
    :param device: device name (str) (e.g. '02L')
    :param db_con: duckdb connection object
    :return: polars Dataframe
    """
    db_con.sql(f"SET TIMEZONE = '{time_zone}'")
    if lazy:
        return db_con.sql(f"select DerivedTime, columns('localTime'), columns('^Session|TD_|Power_Band'), SleepStage from overnight.r{device}").pl().lazy().with_columns(
            pl.col('^Session.*$').cast(pl.Categorical)
        ).sort('localTime').select(pl.all().shrink_dtype())
    else:
        return db_con.sql(f"select DerivedTime, columns('localTime'), columns('^Session|TD_|Power_Band'), SleepStage from overnight.r{device}").pl().with_columns(
            pl.col('^Session.*$').cast(pl.Categorical)
        ).sort('localTime').select(pl.all().shrink_dtype())

In [10]:
df = get_device_as_pl_df('02L', con, lazy=False)

FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

In [11]:
df.head()

DerivedTime,localTime,TD_BG,TD_key2,TD_key3,Power_Band1,Power_Band2,Power_Band5,Power_Band6,Power_Band7,Power_Band8,Session#,SessionIdentity,SleepStage
f32,"datetime[μs, America/Los_Angeles]",f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,cat,f32
1650500000000.0,2022-04-20 23:14:10.001 PDT,,,,,,,,,,"""Session1650521…","""02L_04-21-22""",6.0
1650500000000.0,2022-04-20 23:14:10.003 PDT,,,,,,,,,,"""Session1650521…","""02L_04-21-22""",6.0
1650500000000.0,2022-04-20 23:14:10.005 PDT,,,,,,,,,,"""Session1650521…","""02L_04-21-22""",6.0
1650500000000.0,2022-04-20 23:14:10.007 PDT,,,,,,,,,,"""Session1650521…","""02L_04-21-22""",6.0
1650500000000.0,2022-04-20 23:14:10.009 PDT,,,,,,,,,,"""Session1650521…","""02L_04-21-22""",6.0


In [14]:
df = df.select(pl.all().exclude('^DerivedTime$|^Power_Band.*$'))

In [19]:
sleep_stage_mapping = {
    1: 0, 2: 1, 3: 1, 4: 0, 5: 0, 6: 0
}

df_session = df.filter(pl.col('SessionIdentity') == '02L_04-21-22').select(pl.all().exclude('^Session.*$')).with_columns(
    pl.col('TD_BG').alias('TD_BG_copy'),
    pl.lit(500).alias('samplerate'),
    pl.lit(2.7).alias('stim'),
    pl.col("SleepStage").map_dict(sleep_stage_mapping).alias("SleepStageBinary")
).filter(pl.col('TD_BG').is_not_null())

In [20]:
df_session.head()

localTime,TD_BG,TD_key2,TD_key3,SleepStage,TD_BG_copy,samplerate,stim,SleepStageBinary
"datetime[μs, America/Los_Angeles]",f32,f32,f32,f32,f32,i32,f64,i64
2022-04-20 23:14:10.101 PDT,-0.860712,2.581962,2.417944,6.0,-0.860712,500,2.7,0
2022-04-20 23:14:10.103 PDT,-1.307446,2.581962,2.318603,6.0,-1.307446,500,2.7,0
2022-04-20 23:14:10.105 PDT,-1.138362,2.581962,2.123734,6.0,-1.138362,500,2.7,0
2022-04-20 23:14:10.107 PDT,-0.70204,2.581962,2.242573,6.0,-0.70204,500,2.7,0
2022-04-20 23:14:10.109 PDT,-0.901275,2.581962,2.371215,6.0,-0.901275,500,2.7,0


In [25]:
df_session = df_session.select(
    pl.col('localTime'),
    pl.col('TD_BG').alias('key0'),
    pl.col('TD_BG_copy').alias('key1'),
    pl.col('TD_key2').alias('key2'),
    pl.col('TD_key3').alias('key3'),
    pl.col('stim'),
    pl.col('samplerate'),
    pl.col('^SleepStage.*$')
)

In [22]:
df_session.write_csv('/media/longterm_hdd/Clay/02L_04-21-22.csv')