In [2]:
# Installs
!pip install polars
!pip install lets-plot

# Imports
import statistics
import optuna
import plotly
import polars as pl
import numpy as np
import os
import plotly.figure_factory as ff

from lets_plot import *
from lets_plot.bistro.corr import *
from lets_plot.mapping import as_discrete
from os import listdir

# So the plots look nice
LetsPlot.setup_html()
plotly.offline.init_notebook_mode(connected = True)

[0mCollecting lets-plot
  Downloading lets_plot-4.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting pypng
  Downloading pypng-0.20220715.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.1/58.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pypng, lets-plot
Successfully installed lets-plot-4.1.0 pypng-0.20220715.0
[0m

  shapely_geos_version, geos_capi_version_string


<a id="section-one"></a>
<p style="font-family: monospace; 
          font-weight: bold; 
          letter-spacing: 2px; 
          color: black; 
          font-size: 200%; 
          text-align: left;
          padding: 0px; 
          border-bottom: 4px solid #78D0AF" >Data and Cohort Characteristics</p>

In [4]:
# Read in the data
df_defog_meta = pl.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/defog_metadata.csv")
df_daily_meta = pl.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/daily_metadata.csv")
df_subjects = pl.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/subjects.csv")
df_events = pl.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/events.csv")
df_tasks = pl.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/tasks.csv")

In [5]:
# Subjects data charcteristics
print('Subjects data characteristics:')
df_subjects.glimpse()

Subjects data characteristics:
Rows: 173
Columns: 8
$ Subject      <str> 00f674, 00f674, 02bc69, 040587, 040587, 056372, 07285e, 082f01, 08de77, 08de77
$ Visit        <i64> 2, 1, None, 2, 1, 2, None, None, 1, 2
$ Age          <i64> 63, 63, 69, 75, 75, 69, 58, 64, 71, 71
$ Sex          <str> M, M, M, M, M, M, M, M, F, F
$ YearsSinceDx <f64> 27.0, 27.0, 4.0, 26.0, 26.0, 13.0, 1.0, 17.0, 16.0, 16.0
$ UPDRSIII_On  <i64> 43, 31, 21, 52, 47, 44, 18, 45, 47, 38
$ UPDRSIII_Off <i64> 49, 30, None, 69, 75, 50, 26, 57, 54, 49
$ NFOGQ        <i64> 24, 26, 22, 21, 24, 22, 10, 24, 17, 22



In [6]:
# Events data characteristics
print('Events data characteristics:')
df_events.glimpse()

Events data characteristics:
Rows: 3544
Columns: 5
$ Id         <str> 003f117e14, 009ee11563, 009ee11563, 011322847a, 01d0fe7266, 03bb0c0c46, 03bb0c0c46, 03c9d8dbfa, 03c9d8dbfa, 03c9d8dbfa
$ Init       <f64> 8.61312, 11.3847, 54.6647, 28.0966, 30.3184, 16.0447, 19.6047, 16.4894, 34.8494, 37.2894
$ Completion <f64> 14.7731, 41.1847, 58.7847, 30.2966, 31.8784, 17.2447, 20.6047, 17.6094, 36.9694, 39.8494
$ Type       <str> Turn, Turn, Turn, Turn, Turn, Turn, Turn, Turn, Turn, Turn
$ Kinetic    <i64> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1



In [7]:
# Tasks data characteristics
print('Tasks data characteristics:')
df_tasks.glimpse()

Tasks data characteristics:
Rows: 2817
Columns: 4
$ Id    <str> 02ab235146, 02ab235146, 02ab235146, 02ab235146, 02ab235146, 02ab235146, 02ab235146, 02ab235146, 02ab235146, 02ab235146
$ Begin <f64> 10.0, 211.24, 505.88, 577.96, 701.32, 738.32, 747.08, 775.64, 803.68, 829.44
$ End   <f64> 190.48, 271.56, 522.4, 594.64, 715.28, 743.16, 752.16, 787.56, 813.56, 831.296
$ Task  <str> Rest1, Rest2, 4MW, 4MW-C, MB1, MB2a, MB2b, MB3-R, MB3-R, MB3-L



In [10]:
import seaborn as sns
print(sns.color_palette("pastel").as_hex())

['#a1c9f4', '#ffb482', '#8de5a1', '#ff9f9b', '#d0bbff', '#debb9b', '#fab0e4', '#cfcfcf', '#fffea3', '#b9f2f0']


In [14]:
colors = ['#a1c9f4', '#ffb482', '#8de5a1', '#ff9f9b', '#d0bbff']

plt1 = (
    ggplot(df_subjects) +
    geom_bar(aes(x=as_discrete('Sex'), fill='Sex'), size=0.5) +
    scale_fill_manual(values=colors) +
    theme(plot_title=element_text(hjust=0.5)) +
    labs(y='Count', title='Males and Females') +
    coord_flip()
)

bunch = GGBunch().add_plot(plt1, 0, 0, 700, 250)


In [12]:
colors = ['#a1c9f4', '#ffb482', '#8de5a1', '#ff9f9b', '#d0bbff']

plt1 = (
    ggplot(df_subjects) +
    geom_histogram(aes(x='Age'), fill=colors[2], color='gray') +
    theme(plot_title=element_text(hjust=0.5)) +
    xlim(20, 100) +
    labs(y='Count', x='', title='Age')
)

plt2 = (
    ggplot(df_subjects.with_columns([(pl.col('Age') - pl.col('YearsSinceDx')).alias('AgeAtDx')])) +
    geom_histogram(aes(x='AgeAtDx'), fill=colors[3], color='gray') +
    theme(plot_title=element_text(hjust=0.5)) +
    xlim(20, 100) +
    labs(y='Count', x='', title='Age at Diagnosis')
)

plt3 = (
    ggplot(df_subjects) +
    geom_histogram(aes(x='YearsSinceDx'), fill=colors[4], color='gray') +
    theme(plot_title=element_text(hjust=0.5)) +
    labs(y='Count', x='', title='Years Since Diagnosis')
)

bunch = GGBunch().add_plot(plt1, 0, 0, 700, 300).add_plot(plt2, 0, 320, 700, 300).add_plot(plt3, 0, 640, 700, 300)


In [13]:
# Initializing df for plotting
updrs_on = df_subjects['UPDRSIII_On'].to_list()
updrs_off = df_subjects['UPDRSIII_Off'].to_list()

df_updrs_combined = pl.DataFrame({
    'UPDRSIII_Combined': updrs_on + updrs_off,
    'UPDRSIII_OnOff': ["On"] * len(updrs_on) + ["Off"] * len(updrs_off),
    'Sex': df_subjects['Sex'].to_list() * 2
})

# Initializing colors
color_combined, color_on, color_off = '#80C680', '#D3BDF9', '#FFB266'

# Combined
plt1 = (
    ggplot(df_updrs_combined) +
    geom_histogram(aes(x='UPDRSIII_Combined'), fill=color_combined, color='black') +
    theme(plot_title=element_text(hjust=0.5)) +
    xlim(0, 90) +
    labs(x='UPDRS Score', y='Count', title='UPDRS Score')
)

# By medication status
plt2 = (
    ggplot(df_updrs_combined) +
    geom_density(aes(x='UPDRSIII_Combined', fill='UPDRSIII_OnOff'), color='gray', alpha=0.5) +
    scale_fill_manual(values=[color_on, color_off]) +
    theme(legend_position='top') +
    xlim(0, 90) +
    labs(y='Density', x='UPDRS Score', title="", fill='Medication Status')
)

plt3 = (
    ggplot(df_updrs_combined) +
    geom_boxplot(aes(y='UPDRSIII_Combined', x='UPDRSIII_OnOff', fill=as_discrete('UPDRSIII_OnOff')), outlier_shape=21) +
    scale_fill_manual(values=[color_on, color_off]) +
    theme(legend_position='top', legend_title=element_blank(), axis_line_x=element_blank()) +
    ylim(0, 90) +
    coord_flip() +
    labs(x='Medication Status', y='UPDRS Score', title="", fill='Medication Status')
)

# By Sex
plt4 = (
    ggplot(df_updrs_combined) +
    geom_density(aes(x='UPDRSIII_Combined', fill='Sex'), color='gray', alpha=0.5) +
    scale_fill_manual(values=[color1, color2]) +
    theme(legend_position='top') +
    xlim(0, 90) +
    labs(y='Density', x='UPDRS Score', title="", fill='Sex')
)

plt5 = (
    ggplot(df_updrs_combined) +
    geom_boxplot(aes(y='UPDRSIII_Combined', x='Sex', fill=as_discrete('Sex')), outlier_shape=21) +
    scale_fill_manual(values=[color1, color2]) +
    theme(legend_position='top', legend_title=element_blank(), axis_line_x=element_blank()) +
    ylim(0, 90) +
    coord_flip() +
    labs(x='Sex', y='UPDRS Score', title="", fill='Sex')
)

# Bunch
bunch = GGBunch().add_plot(plt1, 0, 0, 700, 250).add_plot(plt2, 0, 250 * 1, 700, 250).add_plot(plt3, 0, 250 * 2, 700, 250).add_plot(plt4, 0, 250 * 3, 700, 250).add_plot(plt5, 0, 250 * 4, 700, 250)
