In [1]:
import pandas as  pd
import numpy as np

In [2]:
sleep = pd.read_csv('Organized_Sleep_Data.csv')

### Preview first 10 rows of data and display data types

In [3]:
sleep.head(10)

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep
0,2020-01-01 1:16AM,2020-01-01 8:28AM,334,98,20,432,47.0,242.0,45.0
1,2020-01-01 11:00PM,2020-01-02 7:27AM,414,93,33,507,50.0,346.0,18.0
2,2020-01-02 11:03PM,2020-01-03 5:32AM,331,58,27,389,31.0,278.0,22.0
3,2020-01-03 8:53PM,2020-01-04 6:06AM,464,89,36,553,84.0,341.0,39.0
4,2020-01-04 8:55PM,2020-01-05 7:47AM,526,126,46,652,79.0,401.0,46.0
5,2020-01-05 10:11PM,2020-01-06 5:31AM,366,74,23,440,59.0,262.0,45.0
6,2020-01-06 8:47PM,2020-01-07 5:48AM,442,99,38,541,65.0,285.0,92.0
7,2020-01-07 11:19PM,2020-01-08 3:38AM,211,48,15,259,36.0,138.0,37.0
8,2020-01-08 9:51PM,2020-01-09 4:38AM,337,70,26,407,34.0,244.0,59.0
9,2020-01-09 10:55PM,2020-01-10 6:01AM,325,101,26,426,29.0,270.0,26.0


In [4]:
sleep.dtypes

Start Time               object
End Time                 object
Minutes Asleep            int64
Minutes Awake             int64
Number of Awakenings      int64
Time in Bed              object
Minutes REM Sleep       float64
Minutes Light Sleep     float64
Minutes Deep Sleep      float64
dtype: object

### Converting the **Start and End Time** to datetime format

In [5]:
sleep['Start Time'] = pd.to_datetime(sleep['Start Time'])
sleep['End Time'] = pd.to_datetime(sleep['End Time'])

### **Time in Bed** is still an object so we can convert that to a float, but we need to first replace the comma

In [6]:
sleep['Time in Bed'] = sleep['Time in Bed'].replace(',','', regex=True)
sleep['Time in Bed'] = sleep['Time in Bed'].astype(str).astype(int)

In [7]:
sleep.dtypes

Start Time              datetime64[ns]
End Time                datetime64[ns]
Minutes Asleep                   int64
Minutes Awake                    int64
Number of Awakenings             int64
Time in Bed                      int32
Minutes REM Sleep              float64
Minutes Light Sleep            float64
Minutes Deep Sleep             float64
dtype: object

### Check for any missing values

In [8]:
sleep[sleep.isnull().any(axis=1)]

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep
33,2020-02-02 09:20:00,2020-02-02 11:15:00,105,10,5,115,,,
93,2020-04-02 11:30:00,2020-04-03 06:40:00,474,292,64,1150,,,
104,2020-04-13 04:22:00,2020-04-13 07:20:00,166,12,1,178,,,
117,2020-04-28 05:28:00,2020-04-28 07:37:00,126,3,1,129,,,
137,2020-05-18 22:54:00,2020-05-19 00:32:00,79,18,4,97,,,
139,2020-05-20 00:34:00,2020-05-20 03:28:00,150,24,2,174,,,
140,2020-05-20 05:37:00,2020-05-20 07:08:00,79,12,1,91,,,
146,2020-05-26 03:42:00,2020-05-26 05:17:00,88,7,2,95,,,
159,2020-06-10 09:39:00,2020-06-10 10:44:00,61,1,0,65,,,
174,2020-06-23 22:47:00,2020-06-24 01:33:00,151,15,6,166,,,


## Replace the missing data with respective column mean

In [9]:
for i in ['Minutes REM Sleep', 'Minutes Light Sleep', 'Minutes Deep Sleep']:
    sleep[i] = sleep[i].fillna(round(sleep[i].dropna().astype('float64').mean(),2))

Verify all missing data has been removed

In [10]:
sleep[sleep.isnull().any(axis=1)]

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep


In [11]:
sleep

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep
0,2020-01-01 01:16:00,2020-01-01 08:28:00,334,98,20,432,47.0,242.0,45.0
1,2020-01-01 23:00:00,2020-01-02 07:27:00,414,93,33,507,50.0,346.0,18.0
2,2020-01-02 23:03:00,2020-01-03 05:32:00,331,58,27,389,31.0,278.0,22.0
3,2020-01-03 20:53:00,2020-01-04 06:06:00,464,89,36,553,84.0,341.0,39.0
4,2020-01-04 20:55:00,2020-01-05 07:47:00,526,126,46,652,79.0,401.0,46.0
...,...,...,...,...,...,...,...,...,...
208,2020-07-26 22:14:00,2020-07-27 06:09:00,406,69,24,475,89.0,264.0,53.0
209,2020-07-28 00:26:00,2020-07-28 07:21:00,367,48,24,415,79.0,237.0,51.0
210,2020-07-28 22:13:00,2020-07-29 06:46:00,457,56,28,513,99.0,267.0,91.0
211,2020-07-29 22:24:00,2020-07-30 06:04:00,399,61,29,460,66.0,289.0,44.0


### **Save to CSV**

In [12]:
sleep.to_csv('Clean_Sleep_Data.csv', index=False)