# This Code
##### Data Cleanup
* Find/address dates missing values in "Awake" table
* Find/remove isolated dates in "Awake" table
* Find/address null values or duplicated rows in "Awake" and "Feedings" tables
##### Data Transformation
* Transform the "Sleep" information to create an "Awake" table.
* Estimate ounces by breast feeding time
* Using fives files, "Sleep_Adjusted", "Feedings_Adjusted", "Awake", "Notes" and "Calendar", create one dataframe to use for our analysis

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta # two classes from datetime module: datetime for the format, and timedelta for duration

# Set the file name for the files I've already dowloaded and stored within this project's folder
sleep_file = "Sleep.xlsx"
feeding_file = "Feedings.xlsx"
calendar_file = "Calendar.xlsx"
weight_file = "Weight.xlsx"
notes_file = "Notes.xlsx"

# Function with two inputs/parameters
def load_with_datetime(path, datetime_cols):
    # uses pandas to read excel file located at path and stores in df
    df = pd.read_excel(path)
    # for-loop: datetime_cols is a list of column names that should be treated as dates and times
    for col in datetime_cols:
        # df[col] selects a specificc olumn from the df
        df[col] = pd.to_datetime(df[col])
    # necessary. returns adjusted df to be used
    return df

# Load the excel files into Pandas
calendar_df = load_with_datetime(calendar_file,['Date'])
sleep_df = load_with_datetime(sleep_file, ["Time_Start", "Time_End"])
feeding_df = load_with_datetime(feeding_file, ["Date"])
weight_df = load_with_datetime(weight_file, ["Date"])
notes_df = load_with_datetime(notes_file, ["Date"])

# Data Cleanup
## Data Cleanup 1.) How many dates have only 3, 2, 1 occurrences in our sleep df?

In [2]:
# Count the number of rows per group
group_sizes = sleep_df.groupby("Time_Start_Date").size()

# Count how many dates have exactly 1, 2, or 3 entries
count_1 = (group_sizes == 1).sum()
count_2 = (group_sizes == 2).sum()
count_3 = (group_sizes == 3).sum()

print(f"Number of unique dates with 3 occurrences: {count_3}")
print(f"Number of unique dates with 2 occurrences: {count_2}")
print(f"Number of unique dates with 1 occurrence: {count_1}")

Number of unique dates with 3 occurrences: 5
Number of unique dates with 2 occurrences: 0
Number of unique dates with 1 occurrence: 0


##### I reviewed these dates in the file against my notebooks.
##### Manually added in sythetic rows that matched patterns for days before and after.
##### Did this for Dates:
* 9/27
* 10/12
* 10/14
* 12/7
* 1/22
* 1/28
* 2/4
##### The remainder of days (5) with only 3 sleep occurrences are accurate. 

## Data Cleanup 2.) How many dates (if any) are isolated, without a day before or after in our sleep df?

In [3]:
# Assign unique dates to an object
all_dates = set(sleep_df["Time_Start_Date"].unique())

# Define our date list
isolated_dates = []

# loop through our list and if condition is met, append to our date list
for date in all_dates:
    prev_day = date - timedelta(days=1)
    next_day = date + timedelta(days=1)
    if prev_day not in all_dates and next_day not in all_dates:
        isolated_dates.append(date)

# Step 4: Print result
print(f"Isolated dates: {isolated_dates}")

Isolated dates: [Timestamp('2025-02-19 00:00:00')]


In [4]:
dates_to_drop = [pd.to_datetime("2025-02-19")]

# Drop rows
sleep_df = sleep_df[~sleep_df["Time_Start_Date"].isin(dates_to_drop)]

# Save to Excel
sleep_df.to_excel("Sleep_Adjusted.xlsx", index=False)

## Data Cleanup 3.) How are we on null or duplicate values?

In [5]:
# Quick looksie for duplicate rows
duplicates_sleep = sleep_df[sleep_df.duplicated()]
duplicates_feeding = feeding_df[feeding_df.duplicated()]

duplicates_sleep

Unnamed: 0,Time_Start,Time_End,Duration,Time_Start_Date,Occurrence


In [6]:
duplicates_feeding

Unnamed: 0,Date,Feed_Number,Time,Ounces,Bottle_Breast,Breast_Time,Weeks_Old


In [7]:
# Quick looksie for null values and number of rows in sleep df
sleep_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 887 entries, 0 to 886
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Time_Start       887 non-null    datetime64[ns]
 1   Time_End         887 non-null    datetime64[ns]
 2   Duration         887 non-null    object        
 3   Time_Start_Date  887 non-null    datetime64[ns]
 4   Occurrence       887 non-null    int64         
dtypes: datetime64[ns](3), int64(1), object(1)
memory usage: 41.6+ KB


In [8]:
# Quick looksie for null values and number of rows in feeding df
# I expect null values for Ounces and Breast_Time, but not for both within the same row
feeding_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 895 entries, 0 to 894
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           895 non-null    datetime64[ns]
 1   Feed_Number    895 non-null    int64         
 2   Time           895 non-null    object        
 3   Ounces         786 non-null    float64       
 4   Bottle_Breast  895 non-null    object        
 5   Breast_Time    142 non-null    float64       
 6   Weeks_Old      895 non-null    int64         
dtypes: datetime64[ns](1), float64(2), int64(2), object(2)
memory usage: 49.1+ KB


In [9]:
# Find rows where both Ounces and Breast_Time are missing
missing_both = feeding_df[feeding_df["Ounces"].isna() & feeding_df["Breast_Time"].isna()]

# Display the result
print(f"Number of entries with both missing: {len(missing_both)}")
missing_both

Number of entries with both missing: 0


Unnamed: 0,Date,Feed_Number,Time,Ounces,Bottle_Breast,Breast_Time,Weeks_Old


In [10]:
# Removing rows where both Ounces and Breast_Time are missing
feeding_df = feeding_df[~(feeding_df["Ounces"].isna() & feeding_df["Breast_Time"].isna())]

# Resetting index
feeding_df.reset_index(drop=True, inplace=True)

In [11]:
# Just double checking... did we remove the row missing both?
missing_both = feeding_df[feeding_df["Ounces"].isna() & feeding_df["Breast_Time"].isna()]

# Display the result
print(f"Number of entries with both missing: {len(missing_both)}")

Number of entries with both missing: 0


# Transformations
## Transform 1.): Create an "Awake" dataframe based on "Sleep"

In [12]:
# Sort by Time_Start just in case
sleep_df = sleep_df.sort_values(by='Time_Start').reset_index(drop=True)

# List to hold the awake periods
awake_periods = []

# Go through each sleep period and calculate the awake period in between
for i in range(len(sleep_df) - 1):
    awake_start = sleep_df.loc[i, 'Time_End']
    awake_end = sleep_df.loc[i + 1, 'Time_Start']
    
    # Calculate duration
    duration = awake_end - awake_start
    
    # Exclude any durations greater than 13 hours
    if duration <= timedelta(hours=13):
        awake_periods.append({
            'Time_Start': awake_start,
            'Time_End': awake_end,
            'Duration': duration,
            'Time_Start_Date': awake_start.date(),
            'Occurrence': sleep_df.loc[i, 'Occurrence']
        })

# Convert to DataFrame
awake_df = pd.DataFrame(awake_periods)

# Save to Excel
awake_df.to_excel("Awake.xlsx", index=False)

awake_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 883 entries, 0 to 882
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype          
---  ------           --------------  -----          
 0   Time_Start       883 non-null    datetime64[ns] 
 1   Time_End         883 non-null    datetime64[ns] 
 2   Duration         883 non-null    timedelta64[ns]
 3   Time_Start_Date  883 non-null    object         
 4   Occurrence       883 non-null    int64          
dtypes: datetime64[ns](2), int64(1), object(1), timedelta64[ns](1)
memory usage: 34.6+ KB


## Transform 2.): Estimate ounces by breast feeding time

In [13]:
# Step 1: Calculate average feeding time per week
weekly_avg = feeding_df.groupby('Weeks_Old')['Breast_Time'].transform('mean')
feeding_df['AvgFeedingTimePerWeek'] = weekly_avg

# Step 2: Estimate ounces based on time relative to weekly average
OuncesAtAvg = 2  
feeding_df['Estimated_Ounces'] = (feeding_df['Breast_Time'].fillna(0) / feeding_df['AvgFeedingTimePerWeek']) * OuncesAtAvg
feeding_df['Estimated_Ounces'] = feeding_df['Estimated_Ounces'].clip(upper=5)  # Cap at 5 ounces

# Step 3: Combine with actual ounces (e.g., bottle-fed)
feeding_df['Combined_Ounces'] = feeding_df['Ounces'].fillna(0)  + feeding_df['Estimated_Ounces'].fillna(0)

# Save to Excel
feeding_df.to_excel("Feedings_Adjusted.xlsx", index=False)

# View the final result
feeding_df

Unnamed: 0,Date,Feed_Number,Time,Ounces,Bottle_Breast,Breast_Time,Weeks_Old,AvgFeedingTimePerWeek,Estimated_Ounces,Combined_Ounces
0,2024-09-03,1,00:30:00,,Breast,30.0,2,21.52381,2.787611,2.787611
1,2024-09-03,2,04:30:00,,Breast,13.0,2,21.52381,1.207965,1.207965
2,2024-09-03,3,06:00:00,,Breast,13.0,2,21.52381,1.207965,1.207965
3,2024-09-03,4,09:00:00,,Breast,40.0,2,21.52381,3.716814,3.716814
4,2024-09-03,5,12:30:00,,Breast,22.0,2,21.52381,2.044248,2.044248
...,...,...,...,...,...,...,...,...,...,...
890,2025-02-19,1,07:00:00,6.0,Bottle,,26,,,6.000000
891,2025-02-19,2,09:45:00,6.0,Bottle,,26,,,6.000000
892,2025-02-19,3,13:45:00,6.0,Bottle,,26,,,6.000000
893,2025-02-19,4,16:35:00,6.0,Bottle,,26,,,6.000000


## Transform 3): Create one dataframe using five files
##### One row per day with columns
* Date
* Birth_Week
* Birth_Month
* PrevDayOunces
* CurDayOunces
* FeedType
* DaytimeSleep
* NumbofDayNaps
* NighttimeSleep
* Weight
* ColdYN

## Calendar + Feedings

In [14]:
def classify_feeding_type(values):
    total = len(values)
    breast_count = sum(v == "Breast" for v in values)
    bottle_count = sum(v == "Bottle" for v in values)
    
    breast_pct = breast_count / total
    bottle_pct = bottle_count / total
    
    if breast_pct >= 0.65:
        return "Breast"
    elif bottle_pct >= 0.65:
        return "Bottle"
    else:
        return "Both"

feeding_type = feeding_df.groupby('Date')['Bottle_Breast'].agg(classify_feeding_type).reset_index()
feeding_type.rename(columns={'Bottle_Breast': 'Feeding_Type'}, inplace=True)

# Step 2: Summarize Feedings by Date
feedings_summary = feeding_df.groupby('Date')['Combined_Ounces'].sum().reset_index()
feedings_summary.rename(columns={'Combined_Ounces': 'CurDayOunces'}, inplace=True)

# Step 3: Calculate Previous Day's Ounces
feedings_summary['PrevDay'] = feedings_summary['Date'] + pd.Timedelta(days=1)
feedings_with_prev = feedings_summary.merge(
    feedings_summary[['Date', 'CurDayOunces']].rename(columns={
        'Date': 'PrevDay', 'CurDayOunces': 'PrevDayOunces'
    }),
    on='PrevDay',
    how='left')

feedings_with_prev

Unnamed: 0,Date,CurDayOunces,PrevDay,PrevDayOunces
0,2024-09-03,17.610619,2024-09-04,17.615044
1,2024-09-04,17.615044,2024-09-05,19.774336
2,2024-09-05,19.774336,2024-09-06,20.223404
3,2024-09-06,20.223404,2024-09-07,18.755319
4,2024-09-07,18.755319,2024-09-08,16.085106
...,...,...,...,...
150,2025-02-03,26.500000,2025-02-04,27.000000
151,2025-02-04,27.000000,2025-02-05,27.000000
152,2025-02-05,27.000000,2025-02-06,
153,2025-02-07,28.000000,2025-02-08,


In [15]:
# Step 4: Merge with Calendar to include only one row per date
calendar_filtered = calendar_df[['Date', 'Birth_Week', 'Birth_Month']]
draft_df = calendar_filtered.merge(
    feedings_with_prev[['Date', 'CurDayOunces', 'PrevDayOunces']],
    on='Date',
    how='left')

# Step 5: Merge with Feeding_Type
draft_df = draft_df.merge(feeding_type, on='Date', how='left')

draft_df.info(); draft_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           191 non-null    datetime64[ns]
 1   Birth_Week     191 non-null    int64         
 2   Birth_Month    191 non-null    int64         
 3   CurDayOunces   155 non-null    float64       
 4   PrevDayOunces  150 non-null    float64       
 5   Feeding_Type   155 non-null    object        
dtypes: datetime64[ns](1), float64(2), int64(2), object(1)
memory usage: 9.1+ KB


Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type
0,2024-08-23,1,1,,,
1,2024-08-24,1,1,,,
2,2024-08-25,1,1,,,
3,2024-08-26,1,1,,,
4,2024-08-27,1,1,,,


## Add Wakings

In [16]:
# Filter naps starting between 6 AM and 6 PM
daytime_mask = (awake_df['Time_Start'].dt.hour >= 6) & (awake_df['Time_Start'].dt.hour < 17)
daytime_naps = awake_df[daytime_mask]

# Aggregate total daytime sleep duration and count of naps per date
daytime_summary = daytime_naps.groupby('Time_Start_Date').agg(
    NumbofDayNaps=('Duration', 'count')
).reset_index()

daytime_summary['Time_Start_Date'] = pd.to_datetime(daytime_summary['Time_Start_Date'])

daytime_summary.head()

Unnamed: 0,Time_Start_Date,NumbofDayNaps
0,2024-09-02,2
1,2024-09-03,3
2,2024-09-04,3
3,2024-09-05,3
4,2024-09-06,4


In [17]:
# Merge with daytime summary
draft_df = draft_df.merge(
    daytime_summary,
    left_on='Date',
    right_on='Time_Start_Date',
    how='left'
).drop(columns='Time_Start_Date')

draft_df

Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type,NumbofDayNaps
0,2024-08-23,1,1,,,,
1,2024-08-24,1,1,,,,
2,2024-08-25,1,1,,,,
3,2024-08-26,1,1,,,,
4,2024-08-27,1,1,,,,
...,...,...,...,...,...,...,...
186,2025-02-25,27,7,,,,
187,2025-02-26,27,7,,,,
188,2025-02-27,27,7,,,,
189,2025-02-28,28,7,,,,


## Weight

In [18]:
# Merge measured weights into the draft dataframe
draft_df = draft_df.merge(weight_df, on='Date', how='left')

# Interpolate missing weights linearly
draft_df['Weight_Pounds'] = draft_df['Weight_Pounds'].interpolate(method='linear')

draft_df.head(15)

Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type,NumbofDayNaps,Weight_Pounds
0,2024-08-23,1,1,,,,,7.9
1,2024-08-24,1,1,,,,,7.938542
2,2024-08-25,1,1,,,,,7.977083
3,2024-08-26,1,1,,,,,8.015625
4,2024-08-27,1,1,,,,,8.054167
5,2024-08-28,1,1,,,,,8.092708
6,2024-08-29,1,1,,,,,8.13125
7,2024-08-30,2,1,,,,,8.169792
8,2024-08-31,2,1,,,,,8.208333
9,2024-09-01,2,1,,,,,8.246875


## Sleep

In [19]:
sleep_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Time_Start       887 non-null    datetime64[ns]
 1   Time_End         887 non-null    datetime64[ns]
 2   Duration         887 non-null    object        
 3   Time_Start_Date  887 non-null    datetime64[ns]
 4   Occurrence       887 non-null    int64         
dtypes: datetime64[ns](3), int64(1), object(1)
memory usage: 34.8+ KB


In [20]:
# Complete processing from raw sleep_df to a combined daily summary with Daytime and Nighttime sleep

# Step 1: Initialize accumulators
night_blocks = []
daytime_sleep_hours = []

# Step 2: Loop through each row in sleep_df to calculate both Nighttime and Daytime sleep
for _, row in sleep_df.iterrows():
    start = row['Time_Start']
    end = row['Time_End']
    total_daytime_sleep = 0

    for offset in range(-1, 2):
        anchor_date = (start + pd.Timedelta(days=offset)).normalize()

        # Nighttime block: 7:00 PM to 7:00 AM next day
        night_start = anchor_date + pd.Timedelta(hours=19)
        night_end = night_start + pd.Timedelta(hours=12)

        night_overlap_start = max(start, night_start)
        night_overlap_end = min(end, night_end)

        if night_overlap_start < night_overlap_end:
            night_duration = (night_overlap_end - night_overlap_start).total_seconds() / 3600
            night_blocks.append({
                'Date': night_start.date(),
                'Sleep_Hours': night_duration
            })

        # Daytime block: 7:00 AM to 7:00 PM
        day_start = anchor_date + pd.Timedelta(hours=7)
        day_end = day_start + pd.Timedelta(hours=12)

        day_overlap_start = max(start, day_start)
        day_overlap_end = min(end, day_end)

        if day_overlap_start < day_overlap_end:
            day_duration = (day_overlap_end - day_overlap_start).total_seconds() / 3600
            total_daytime_sleep += day_duration

    daytime_sleep_hours.append(round(total_daytime_sleep, 2))

# Step 3: Add DaytimeSleep to sleep_df
sleep_df['DaytimeSleep'] = daytime_sleep_hours
sleep_df['Date'] = sleep_df['Time_Start'].dt.date

# Step 4: Aggregate both Daytime and Nighttime sleep per date
night_df = pd.DataFrame(night_blocks)
night_df = night_df.groupby('Date')['Sleep_Hours'].sum().reset_index()
night_df.rename(columns={'Sleep_Hours': 'NighttimeSleep'}, inplace=True)

day_df = sleep_df.groupby('Date')['DaytimeSleep'].sum().reset_index()

# Step 5: Merge Daytime and Nighttime into one final daily summary
daily_sleep_df = pd.merge(night_df, day_df, on='Date', how='outer').fillna(0)

In [21]:
sleep_df

Unnamed: 0,Time_Start,Time_End,Duration,Time_Start_Date,Occurrence,DaytimeSleep,Date
0,2024-09-02 00:45:00,2024-09-02 01:15:00,00:30:00,2024-09-02,7,0.00,2024-09-02
1,2024-09-02 02:00:00,2024-09-02 04:45:00,02:45:00,2024-09-02,7,0.00,2024-09-02
2,2024-09-02 06:10:00,2024-09-02 09:00:00,02:50:00,2024-09-02,7,2.00,2024-09-02
3,2024-09-02 10:30:00,2024-09-02 12:00:00,01:30:00,2024-09-02,7,1.50,2024-09-02
4,2024-09-02 15:00:00,2024-09-02 17:45:00,02:45:00,2024-09-02,7,2.75,2024-09-02
...,...,...,...,...,...,...,...
882,2025-02-04 19:00:00,2025-02-05 06:45:00,11:45:00,2025-02-04,5,0.00,2025-02-04
883,2025-02-05 09:15:00,2025-02-05 11:00:00,01:45:00,2025-02-05,4,1.75,2025-02-05
884,2025-02-05 13:45:00,2025-02-05 14:15:00,00:30:00,2025-02-05,4,0.50,2025-02-05
885,2025-02-05 15:45:00,2025-02-05 16:15:00,00:30:00,2025-02-05,4,0.50,2025-02-05


In [22]:
night_df

Unnamed: 0,Date,NighttimeSleep
0,2024-09-01,4.083333
1,2024-09-02,8.000000
2,2024-09-03,8.750000
3,2024-09-04,7.000000
4,2024-09-05,7.750000
...,...,...
143,2025-02-01,11.250000
144,2025-02-02,11.500000
145,2025-02-03,11.750000
146,2025-02-04,11.750000


In [23]:
daily_sleep_df

Unnamed: 0,Date,NighttimeSleep,DaytimeSleep
0,2024-09-01,4.083333,0.00
1,2024-09-02,8.000000,6.75
2,2024-09-03,8.750000,7.00
3,2024-09-04,7.000000,6.75
4,2024-09-05,7.750000,5.50
...,...,...,...
143,2025-02-01,11.250000,3.00
144,2025-02-02,11.500000,3.08
145,2025-02-03,11.750000,3.25
146,2025-02-04,11.750000,3.00


In [24]:
# Convert night_df Date to datetime64[ns] for compatibility
daily_sleep_df['Date'] = pd.to_datetime(daily_sleep_df['Date'])

# Merge to draft
draft_df = draft_df.merge(daily_sleep_df, on='Date', how='left')

# Round nighttime sleep values
draft_df['NighttimeSleep'] = draft_df['NighttimeSleep'].round(2)
draft_df['DaytimeSleep'] = draft_df['DaytimeSleep'].round(2)

draft_df

Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type,NumbofDayNaps,Weight_Pounds,NighttimeSleep,DaytimeSleep
0,2024-08-23,1,1,,,,,7.900000,,
1,2024-08-24,1,1,,,,,7.938542,,
2,2024-08-25,1,1,,,,,7.977083,,
3,2024-08-26,1,1,,,,,8.015625,,
4,2024-08-27,1,1,,,,,8.054167,,
...,...,...,...,...,...,...,...,...,...,...
186,2025-02-25,27,7,,,,,14.281250,,
187,2025-02-26,27,7,,,,,14.281250,,
188,2025-02-27,27,7,,,,,14.281250,,
189,2025-02-28,28,7,,,,,14.281250,,


In [25]:
# Remove rows where NighttimeSleep is less than 5
draft_df = draft_df[draft_df["NighttimeSleep"] >= 5].copy()

draft_df

Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type,NumbofDayNaps,Weight_Pounds,NighttimeSleep,DaytimeSleep
10,2024-09-02,2,1,,,,2.0,8.285417,8.00,6.75
11,2024-09-03,2,1,17.610619,17.615044,Breast,3.0,8.323958,8.75,7.00
12,2024-09-04,2,1,17.615044,19.774336,Breast,3.0,8.362500,7.00,6.75
13,2024-09-05,2,1,19.774336,20.223404,Breast,3.0,8.401042,7.75,5.50
14,2024-09-06,3,1,20.223404,18.755319,Both,4.0,8.439583,7.75,6.42
...,...,...,...,...,...,...,...,...,...,...
161,2025-01-31,24,6,24.000000,30.000000,Bottle,3.0,14.281250,11.50,3.75
162,2025-02-01,24,6,30.000000,28.000000,Bottle,3.0,14.281250,11.25,3.00
163,2025-02-02,24,6,28.000000,26.500000,Bottle,3.0,14.281250,11.50,3.08
164,2025-02-03,24,6,26.500000,27.000000,Bottle,3.0,14.281250,11.75,3.25


In [26]:
# Removing any remaining null values
draft_df.dropna(inplace=True)

draft_df

Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type,NumbofDayNaps,Weight_Pounds,NighttimeSleep,DaytimeSleep
11,2024-09-03,2,1,17.610619,17.615044,Breast,3.0,8.323958,8.75,7.00
12,2024-09-04,2,1,17.615044,19.774336,Breast,3.0,8.362500,7.00,6.75
13,2024-09-05,2,1,19.774336,20.223404,Breast,3.0,8.401042,7.75,5.50
14,2024-09-06,3,1,20.223404,18.755319,Both,4.0,8.439583,7.75,6.42
15,2024-09-07,3,1,18.755319,16.085106,Both,4.0,8.478125,6.50,6.50
...,...,...,...,...,...,...,...,...,...,...
161,2025-01-31,24,6,24.000000,30.000000,Bottle,3.0,14.281250,11.50,3.75
162,2025-02-01,24,6,30.000000,28.000000,Bottle,3.0,14.281250,11.25,3.00
163,2025-02-02,24,6,28.000000,26.500000,Bottle,3.0,14.281250,11.50,3.08
164,2025-02-03,24,6,26.500000,27.000000,Bottle,3.0,14.281250,11.75,3.25


In [27]:
# Create a dichotomous variable 'GoodBadSleep' for later stats work
draft_df['GoodBadSleep'] = (draft_df['NighttimeSleep'] >= 11).astype(int).copy()
draft_df

Unnamed: 0,Date,Birth_Week,Birth_Month,CurDayOunces,PrevDayOunces,Feeding_Type,NumbofDayNaps,Weight_Pounds,NighttimeSleep,DaytimeSleep,GoodBadSleep
11,2024-09-03,2,1,17.610619,17.615044,Breast,3.0,8.323958,8.75,7.00,0
12,2024-09-04,2,1,17.615044,19.774336,Breast,3.0,8.362500,7.00,6.75,0
13,2024-09-05,2,1,19.774336,20.223404,Breast,3.0,8.401042,7.75,5.50,0
14,2024-09-06,3,1,20.223404,18.755319,Both,4.0,8.439583,7.75,6.42,0
15,2024-09-07,3,1,18.755319,16.085106,Both,4.0,8.478125,6.50,6.50,0
...,...,...,...,...,...,...,...,...,...,...,...
161,2025-01-31,24,6,24.000000,30.000000,Bottle,3.0,14.281250,11.50,3.75,1
162,2025-02-01,24,6,30.000000,28.000000,Bottle,3.0,14.281250,11.25,3.00,1
163,2025-02-02,24,6,28.000000,26.500000,Bottle,3.0,14.281250,11.50,3.08,1
164,2025-02-03,24,6,26.500000,27.000000,Bottle,3.0,14.281250,11.75,3.25,1


In [28]:
# Save to Excel
draft_df.to_excel("finaldataset.xlsx", index=False)