![image](https://i.pinimg.com/originals/05/46/ca/0546caeb51e42b4227d0f91a8b6d3e4f.gif)

[Image Source](https://i.pinimg.com/originals/05/46/ca/0546caeb51e42b4227d0f91a8b6d3e4f.gif)

# <div style="color:blue;display:inline-block;border-radius:5px;background-color:#E6FFE6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:blue;overflow:hidden;font-size:90%;letter-spacing:0.5px;margin:0"><b> </b> Import Modules</p></div>

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

import dask.dataframe as dd

import warnings
warnings.filterwarnings("ignore")

import math

rc = {
    "axes.facecolor": "#E6FFE6",
    "figure.facecolor": "#E6FFE6",
    "axes.edgecolor": "#000000",
    "grid.color": "#EBEBE7",
    "font.family": "serif",
    "axes.labelcolor": "#000000",
    "xtick.color": "#000000",
    "ytick.color": "#000000",
    "grid.alpha": 0.4
}

sns.set(rc=rc)

from colorama import Style, Fore
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
mgt = Style.BRIGHT + Fore.MAGENTA
gld = Style.BRIGHT + Fore.YELLOW
res = Style.RESET_ALL

# <div style="color:blue;display:inline-block;border-radius:5px;background-color:#E6FFE6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:blue;overflow:hidden;font-size:90%;letter-spacing:0.5px;margin:0"><b> </b>Load the Datasets</p></div>


In [None]:
# Load datasets
train_events = pd.read_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv')
train_series = dd.read_parquet('/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet')

# <div style="color:blue;display:inline-block;border-radius:5px;background-color:#E6FFE6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:blue;overflow:hidden;font-size:90%;letter-spacing:0.5px;margin:0"><b> </b>Data Cleaning and Preprocessing</p></div>



In [None]:
series_has_NaN = train_events.groupby('series_id')['step'].apply(lambda x: x.isnull().any())
series_has_NaN.value_counts()

In [None]:
no_NaN_series = series_has_NaN[~series_has_NaN].index.tolist()

In [None]:
# drop the two truncated events series:
no_NaN_series.remove('31011ade7c0a') 
no_NaN_series.remove('a596ad0b82aa')

# <div style="color:blue;display:inline-block;border-radius:5px;background-color:#E6FFE6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:blue;overflow:hidden;font-size:90%;letter-spacing:0.5px;margin:0"><b> </b>Exploratory Data Analysis (EDA)</p></div>



In [None]:
# Visualize distribution of events in train.csv
plt.figure(figsize=(8, 6))
sns.countplot(data=train_events, x='event')
plt.title("Distribution of Events in Train Data", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.savefig('Distribution of Events in Train Data.png')
plt.show()

In [None]:
def get_train_series(series):
    train_series = dd.read_parquet("/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet", filters=[('series_id','=',series)])
    train_events = pd.read_csv("/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv").query('series_id == @series')
    
    # cleaning etc.
    train_events = train_events.dropna()
    train_events["step"]  = train_events["step"].astype("int")
    train_events["awake"] = train_events["event"].replace({"onset":1,"wakeup":0})

    train_series = train_series.compute()  # Convert Dask DataFrame to pandas DataFrame
    
    train = pd.merge(train_series, train_events[['step','awake']], on='step', how='left')
    train["awake"] = train["awake"].bfill(axis ='rows')
    
    train['awake'] = train['awake'].fillna(1) # awake
    train["awake"] = train["awake"].astype("int")
    return train


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

def visualize_awake_state(train):
    plt.figure(figsize=(20, 3))
    ax = sns.lineplot(data=train, x="step", y="awake", color='blue')
    ax.set_title('Awake State Over Time', fontsize = 14, fontweight = 'bold', color = 'darkgreen')
    ax.set_xlabel('Step', fontsize = 12, fontweight = 'bold', color = 'darkblue')
    ax.set_ylabel('Awake State', fontsize = 12, fontweight = 'bold', color = 'blue')

    # Customize x-axis ticks for better readability
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    plt.savefig('Awake State Over Time.png')

    plt.show()

# Assuming you have already loaded your data and have 'train' DataFrame
train = get_train_series('08db4255286f')  # Replace with actual series ID

visualize_awake_state(train)


In [None]:
def visualize_anglez_enmo(train):
    plt.figure(figsize=(20, 3))
    sns.scatterplot(data=train, x='anglez', y='enmo', hue='awake', palette={0: 'red', 1: 'blue'})
    plt.title('anglez vs. enmo with Awake State', fontsize = 14, fontweight = 'bold', color = 'darkgreen')
    plt.savefig('anglez vs. enmo with Awake State.png')
    plt.show()

train = get_train_series('08db4255286f')  # Replace with actual series ID

visualize_anglez_enmo(train)

In [None]:
fig, ax = plt.subplots(figsize=(20, 3))
sns.lineplot(data=train, x="step", y="enmo",hue="awake", linewidth = 0.5)
plt.savefig('step-enmo.png')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20, 3))
sns.lineplot(data=train, x="step", y="anglez",hue="awake", linewidth = 0.5)
plt.savefig('step-anglez.png')
plt.show()

In [None]:
def visualize_anglez_enmo(train):
    plt.figure(figsize=(20, 3))
    sns.scatterplot(data=train, x='anglez', y='enmo', hue='awake', palette={0: 'red', 1: 'blue'})
    plt.title('anglez vs. enmo with Awake State', fontsize = 14, fontweight = 'bold', color = 'darkgreen')
    plt.savefig('anglez vs. enmo with Awake State-67f5fc60e494.png')
    plt.show()

train = get_train_series('67f5fc60e494')  # Replace with actual series ID

visualize_anglez_enmo(train)


In [None]:
fig, ax = plt.subplots(figsize=(20, 3))
sns.lineplot(data=train, x="step", y="enmo",hue="awake", linewidth = 0.5)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20, 3))
sns.lineplot(data=train, x="step", y="anglez",hue="awake", linewidth = 0.5)
plt.show()

In [None]:
import matplotlib.pyplot as plt

def hexbin_density_plot(train):
    plt.figure(figsize=(10, 6))
    hb = plt.hexbin(train['anglez'], train['enmo'], gridsize=50, cmap='inferno', mincnt=1)
    plt.colorbar(label='Density')
    plt.xlabel('anglez')
    plt.ylabel('enmo')
    plt.title('Hexbin Density Plot for anglez and enmo', fontsize = 14, fontweight = 'bold', color = 'darkgreen')
    plt.savefig('Hexbin Density Plot for anglez and enmo.png')
    plt.show()

train = get_train_series('67f5fc60e494')  # Replace with actual series ID

hexbin_density_plot(train)


In [None]:
def polar_plot(train):
    # Convert anglez and enmo to radians
    anglez_rad = np.deg2rad(train['anglez'])
    enmo = train['enmo']
    
    # Create polar plot
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111, polar=True)
    
    # Plot the data
    ax.plot(anglez_rad, enmo, 'o', color='blue')
    ax.set_rmax(max(enmo) + 1)  # Set maximum radius
    
    plt.title('Polar Plot of anglez vs. enmo', fontsize = 14, fontweight = 'bold', color = 'darkgreen')
    plt.savefig('Polar Plot of anglez and enmo.png')
    plt.show()

train = get_train_series('67f5fc60e494')  # Replace with actual series ID

polar_plot(train)


<div class="alert alert-block alert-info">"Your positive feedback and upvote mean a lot! It motivates me to create more valuable content and helps others discover it too. Let's build a thriving community of knowledge-sharing. Thank you for your support! 😊"</div>