In [1]:
import pandas as  pd
import numpy as np

In [2]:
sleep = pd.read_csv('Organized_Sleep_Data(Jan-May).csv')

In [3]:
#Preview the dataset
sleep.head()

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep
0,2020-01-01 1:16AM,2020-01-01 8:28AM,334,98,20,432,47.0,242.0,45.0
1,2020-01-01 11:00PM,2020-01-02 7:27AM,414,93,33,507,50.0,346.0,18.0
2,2020-01-02 11:03PM,2020-01-03 5:32AM,331,58,27,389,31.0,278.0,22.0
3,2020-01-03 8:53PM,2020-01-04 6:06AM,464,89,36,553,84.0,341.0,39.0
4,2020-01-04 8:55PM,2020-01-05 7:47AM,526,126,46,652,79.0,401.0,46.0


In [4]:
#View data types & any missing values
sleep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153 entries, 0 to 152
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Start Time            153 non-null    object 
 1   End Time              153 non-null    object 
 2   Minutes Asleep        153 non-null    int64  
 3   Minutes Awake         153 non-null    int64  
 4   Number of Awakenings  153 non-null    int64  
 5   Time in Bed           153 non-null    object 
 6   Minutes REM Sleep     145 non-null    float64
 7   Minutes Light Sleep   145 non-null    float64
 8   Minutes Deep Sleep    145 non-null    float64
dtypes: float64(3), int64(3), object(3)
memory usage: 10.9+ KB


In [5]:
#Convert Start & End Time columns to datetime format
sleep['Start Time'] = pd.to_datetime(sleep['Start Time'])
sleep['End Time'] = pd.to_datetime(sleep['End Time'])

In [6]:
#View data types
sleep.dtypes

Start Time              datetime64[ns]
End Time                datetime64[ns]
Minutes Asleep                   int64
Minutes Awake                    int64
Number of Awakenings             int64
Time in Bed                     object
Minutes REM Sleep              float64
Minutes Light Sleep            float64
Minutes Deep Sleep             float64
dtype: object

In [7]:
#This export had Time in Bed formated as an object, we will remove the commas out of any values and then convert to integer
#Function that removes commas from select column values and converts them to integer data type
sleep['Time in Bed'] = sleep['Time in Bed'].replace(',','', regex=True)
sleep['Time in Bed'] = sleep['Time in Bed'].astype(str).astype(int)

In [8]:
#Verify data type has changed
sleep.dtypes

Start Time              datetime64[ns]
End Time                datetime64[ns]
Minutes Asleep                   int64
Minutes Awake                    int64
Number of Awakenings             int64
Time in Bed                      int32
Minutes REM Sleep              float64
Minutes Light Sleep            float64
Minutes Deep Sleep             float64
dtype: object

In [9]:
#Check for missing values
sleep[sleep.isnull().any(axis=1)]

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep
33,2020-02-02 09:20:00,2020-02-02 11:15:00,105,10,5,115,,,
93,2020-04-02 11:30:00,2020-04-03 06:40:00,474,292,64,1150,,,
104,2020-04-13 04:22:00,2020-04-13 07:20:00,166,12,1,178,,,
117,2020-04-28 05:28:00,2020-04-28 07:37:00,126,3,1,129,,,
137,2020-05-18 22:54:00,2020-05-19 00:32:00,79,18,4,97,,,
139,2020-05-20 00:34:00,2020-05-20 03:28:00,150,24,2,174,,,
140,2020-05-20 05:37:00,2020-05-20 07:08:00,79,12,1,91,,,
146,2020-05-26 03:42:00,2020-05-26 05:17:00,88,7,2,95,,,


In [10]:
#Replace the missing values with median of entire column values
for i in ['Minutes REM Sleep', 'Minutes Light Sleep', 'Minutes Deep Sleep']:
    sleep[i] = sleep[i].fillna(round(sleep[i].dropna().astype('int').median(),2))

In [11]:
#Verify missing data has been removed
sleep[sleep.isnull().any(axis=1)]

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep


In [12]:
#Preview dataset
sleep

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep
0,2020-01-01 01:16:00,2020-01-01 08:28:00,334,98,20,432,47.0,242.0,45.0
1,2020-01-01 23:00:00,2020-01-02 07:27:00,414,93,33,507,50.0,346.0,18.0
2,2020-01-02 23:03:00,2020-01-03 05:32:00,331,58,27,389,31.0,278.0,22.0
3,2020-01-03 20:53:00,2020-01-04 06:06:00,464,89,36,553,84.0,341.0,39.0
4,2020-01-04 20:55:00,2020-01-05 07:47:00,526,126,46,652,79.0,401.0,46.0
...,...,...,...,...,...,...,...,...,...
148,2020-05-27 22:55:00,2020-05-28 05:21:00,313,73,23,386,40.0,226.0,47.0
149,2020-05-28 22:25:00,2020-05-29 04:38:00,325,48,22,373,56.0,259.0,10.0
150,2020-05-29 21:32:00,2020-05-30 05:18:00,394,72,27,466,52.0,285.0,57.0
151,2020-05-30 19:52:00,2020-05-31 05:00:00,441,107,32,548,71.0,320.0,50.0


In [13]:
sleep.to_csv('Clean_Sleep_Data(Jan-May).csv', index=False)