# Youtube Data Cleaning

This file was used to clean the Youtube data obtained from Google Takeout. The cleaned data and results are saved as a csv file to be used in sleep_inference.ipynb.

In [702]:
# Import necessary libraries
import pandas as pd
import json
import numpy as np
from datetime import datetime, timedelta

## Clean the watch history data

In [703]:
# REPLACE WITH YOUR FILE PATH
filename = "../CB_data/watch-history.json"

with open(filename, 'r', encoding='utf-8') as file:
    data = json.load(file)

# List to store extracted data
extracted_data = []

# Loop through each entry in the JSON data and extract the relevant fields
for entry in data:
    title = entry.get("title", None)
    url = entry.get("titleUrl", None)
    time_str = entry.get("time", None)

    # Try to parse the date (you can adjust the format if needed)
    date_time = None
    if time_str:
        try:
            date_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
        except ValueError:
            date_time = time_str  # Keep original string if parsing fails

    # Only append if there is a valid title (to avoid extra records)
    if title:
        extracted_data.append({"title": title, "url": url, "date_time": date_time})

# Convert the extracted data to a DataFrame
df_watch = pd.DataFrame(extracted_data)
df_watch.head()


Unnamed: 0,title,url,date_time
0,Watched DCD-The-2530,https://www.youtube.com/watch?v=dO1MRlcmj60,2025-02-23 15:56:16.942
1,Watched HALF HORSE HALF MAN | OFFICIAL VIDEO,https://www.youtube.com/watch?v=6v_R180kIGs,2025-02-13 04:53:21.691
2,Watched [Solo-Leveling]SymphonicSuite-Lv.1 → L...,https://www.youtube.com/watch?v=lmajFEi1Hdk,2025-02-11 01:54:30.969
3,Watched 【MV】『SHADOWBORN』 Hiroyuki SAWANO feat....,https://www.youtube.com/watch?v=qUFRPDHs1Q8,2025-02-11 01:51:10.881
4,Watched DCD-The-2530,https://www.youtube.com/watch?v=dO1MRlcmj60,2025-02-11 01:43:41.407


In [704]:
# Convert date_time to datetime
df_watch['date_time'] = pd.to_datetime(df_watch['date_time'], errors='coerce', utc=True)

# Convert from UTC to US/Eastern timezone
df_watch['date_time'] = df_watch['date_time'].dt.tz_convert('US/Eastern')

# Extract the date and time separately
df_watch['date'] = df_watch['date_time'].dt.date
df_watch['time'] = df_watch['date_time'].dt.time
df_watch.head(2)

Unnamed: 0,title,url,date_time,date,time
0,Watched DCD-The-2530,https://www.youtube.com/watch?v=dO1MRlcmj60,2025-02-23 10:56:16.942000-05:00,2025-02-23,10:56:16.942000
1,Watched HALF HORSE HALF MAN | OFFICIAL VIDEO,https://www.youtube.com/watch?v=6v_R180kIGs,2025-02-12 23:53:21.691000-05:00,2025-02-12,23:53:21.691000


In [705]:
# Extract title
pattern = r'^(Watched)\s+(.*)$'

df_watch[["type", "title"]] = df_watch["title"].str.extract(pattern)
df_watch.head(2)

Unnamed: 0,title,url,date_time,date,time,type
0,DCD-The-2530,https://www.youtube.com/watch?v=dO1MRlcmj60,2025-02-23 10:56:16.942000-05:00,2025-02-23,10:56:16.942000,Watched
1,HALF HORSE HALF MAN | OFFICIAL VIDEO,https://www.youtube.com/watch?v=6v_R180kIGs,2025-02-12 23:53:21.691000-05:00,2025-02-12,23:53:21.691000,Watched


In [706]:
# Add videos per day
videos_per_day = df_watch.groupby('date').size().reset_index(name='total_videos_watched')
videos_per_day['date'] = pd.to_datetime(videos_per_day['date'], errors='coerce')
start_date = "2025-01-01"
end_date = "2025-02-17"
start_date = pd.to_datetime(start_date).date()
start_date = pd.Timestamp(start_date)
end_date = pd.to_datetime(end_date).date()
end_date = pd.Timestamp(end_date)
videos_per_day = videos_per_day[(videos_per_day["date"] >= start_date) & (videos_per_day["date"] <= end_date)]

videos_per_day.head()

Unnamed: 0,date,total_videos_watched
66,2025-01-07,20
67,2025-01-09,4
68,2025-01-22,6
69,2025-02-06,5
70,2025-02-08,2


## Clean the search history data

In [707]:
# REPLACE WITH YOUR FILE PATH
filename = "../CB_data/search-history.json"

with open(filename, 'r', encoding='utf-8') as file:
    data = json.load(file)

# List to store extracted data
extracted_data = []

# Loop through each entry in the JSON data and extract the relevant fields
for entry in data:
    title = entry.get("title", None)
    url = entry.get("titleUrl", None)
    time_str = entry.get("time", None)

    # Try to parse the date (you can adjust the format if needed)
    date_time = None
    if time_str:
        try:
            date_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
        except ValueError:
            date_time = time_str  # Keep original string if parsing fails

    # Only append if there is a valid title (to avoid extra records)
    if title:
        extracted_data.append({"title": title, "url": url, "date_time": date_time})

# Convert the extracted data to a DataFrame
df_search = pd.DataFrame(extracted_data)
df_search.head()

Unnamed: 0,title,url,date_time
0,Searched for half horse half man song,https://www.youtube.com/results?search_query=h...,2025-02-13 04:53:16.321
1,Watched Gymshark - Lift Seamless USP,https://www.youtube.com/watch?v=yJuD5y55kZ0,2025-02-11 01:43:09.379
2,Watched lululemon | FAWI Seasonal 2024 | 15s |...,https://www.youtube.com/watch?v=kpb97e0nYmY,2025-02-11 01:38:02.964
3,Watched (2/18) Now Taxes is Free in the App -...,https://www.youtube.com/watch?v=Ua9ffbSO-20,2025-02-08 05:10:47.120
4,Searched for boston junior toi,https://www.youtube.com/results?search_query=b...,2025-02-08 05:10:27.476


In [708]:
# Separate the search history and watch history using regex
pattern = r'^(Searched|Watched)\s+(.*)$'

df_search[["type", "title"]] = df_search["title"].str.extract(pattern)
df_search.head()

Unnamed: 0,title,url,date_time,type
0,for half horse half man song,https://www.youtube.com/results?search_query=h...,2025-02-13 04:53:16.321,Searched
1,Gymshark - Lift Seamless USP,https://www.youtube.com/watch?v=yJuD5y55kZ0,2025-02-11 01:43:09.379,Watched
2,lululemon | FAWI Seasonal 2024 | 15s | YOGAwCO...,https://www.youtube.com/watch?v=kpb97e0nYmY,2025-02-11 01:38:02.964,Watched
3,(2/18) Now Taxes is Free in the App - TurboTa...,https://www.youtube.com/watch?v=Ua9ffbSO-20,2025-02-08 05:10:47.120,Watched
4,for boston junior toi,https://www.youtube.com/results?search_query=b...,2025-02-08 05:10:27.476,Searched


In [709]:
# Convert date_time to datetime
df_search['date_time'] = pd.to_datetime(df_search['date_time'], errors='coerce', utc=True)

# Convert from UTC to US/Eastern timezone
df_search['date_time'] = df_search['date_time'].dt.tz_convert('US/Eastern')

# Extract the date and time separately
df_search['date'] = df_search['date_time'].dt.date
df_search['time'] = df_search['date_time'].dt.time
df_search.head(2)

Unnamed: 0,title,url,date_time,type,date,time
0,for half horse half man song,https://www.youtube.com/results?search_query=h...,2025-02-12 23:53:16.321000-05:00,Searched,2025-02-12,23:53:16.321000
1,Gymshark - Lift Seamless USP,https://www.youtube.com/watch?v=yJuD5y55kZ0,2025-02-10 20:43:09.379000-05:00,Watched,2025-02-10,20:43:09.379000


## Combine the search and watch history data exported from Google Takeout

In [710]:
# Combine the dataframes
df = pd.concat([df_search, df_watch], axis=0)

# Remove duplicate records
df = df.drop_duplicates()

# Sort by date and time and filter out time range
start_date = "2025-01-01"
end_date = "2025-02-17"
start_date = pd.to_datetime(start_date).date()
end_date = pd.to_datetime(end_date).date()
df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
df = df.sort_values("date_time")

# Extract the day of the week
df['day_of_week'] = df['date_time'].dt.day_name()

df.head()

Unnamed: 0,title,url,date_time,type,date,time,day_of_week
50,Kid-Sized,https://www.youtube.com/watch?v=5hLPYkJ5YOU,2025-01-07 13:59:03.039000-05:00,Watched,2025-01-07,13:59:03.039000,Tuesday
49,Jing-bu Bells🎶🎅🏻🔔,https://www.youtube.com/watch?v=ZdRV7zkXG6w,2025-01-07 13:59:07.318000-05:00,Watched,2025-01-07,13:59:07.318000,Tuesday
48,Ready for a New Chapter? Start Your Therapy Jo...,https://www.youtube.com/watch?v=wNVfY_GisCY,2025-01-07 13:59:11.033000-05:00,Watched,2025-01-07,13:59:11.033000,Tuesday
47,Stranger Things: The First Shadow — Coming to ...,https://www.youtube.com/watch?v=KQ6Mmd8Txww,2025-01-07 13:59:14.595000-05:00,Watched,2025-01-07,13:59:14.595000,Tuesday
46,SHADOWBORN,https://www.youtube.com/watch?v=N6xMg5CzJLI,2025-01-07 13:59:16.828000-05:00,Watched,2025-01-07,13:59:16.828000,Tuesday


## Create summary for analyzing Youtube summary statistics

In [711]:
# Define a threshold for a new session; events more than 30 minutes apart indicate a new session
session_gap = timedelta(minutes=30)

# Compute the difference in time between consecutive events
df['time_diff'] = df['date_time'].diff()

# Create a boolean column indicating if an event starts a new session
df['new_session'] = (df['time_diff'] > session_gap) | (df['time_diff'].isna())

# Create a session ID by cumulatively summing the new_session markers
df['session_id'] = df['new_session'].cumsum()

# Calculate session start, end, and duration by grouping on session_id
session_stats = df.groupby('session_id')['date_time'].agg(['min', 'max'])
session_stats['session_duration'] = session_stats['max'] - session_stats['min']
session_stats['date'] = session_stats['max'].dt.date

session_stats.head()

Unnamed: 0_level_0,min,max,session_duration,date
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2025-01-07 13:59:03.039000-05:00,2025-01-07 14:08:47.753000-05:00,0 days 00:09:44.714000,2025-01-07
2,2025-01-07 19:05:57.340000-05:00,2025-01-07 19:12:33.446000-05:00,0 days 00:06:36.106000,2025-01-07
3,2025-01-07 19:49:05.812000-05:00,2025-01-07 20:04:03.239000-05:00,0 days 00:14:57.427000,2025-01-07
4,2025-01-09 22:23:22.028000-05:00,2025-01-09 22:23:44.782000-05:00,0 days 00:00:22.754000,2025-01-09
5,2025-01-22 20:55:16.692000-05:00,2025-01-22 21:00:32.489000-05:00,0 days 00:05:15.797000,2025-01-22


In [712]:
# Extract the date from the session's end time (max)
session_stats['date'] = session_stats['max'].dt.date

# For each date, identify the session with the latest end time ('max')
last_sessions = session_stats.loc[session_stats.groupby('date')['max'].idxmax()]

# Get the session IDs corresponding to the last session of each day
last_session_ids = last_sessions.index

# Filter the original DataFrame to include only the records from these last sessions
df_last_sessions = df[df['session_id'].isin(last_session_ids)]

# Now, for each of these last sessions, pick only the final record (i.e. the one with the max timestamp)
last_record_per_session = df_last_sessions.groupby('session_id', as_index=False).last()

last_session_df = last_record_per_session[['date_time', 'session_id', 'time_diff', 'date']]
last_session_df.head()

Unnamed: 0,date_time,session_id,time_diff,date
0,2025-01-07 20:04:03.239000-05:00,3,0 days 00:00:36.968000,2025-01-07
1,2025-01-09 22:23:44.782000-05:00,4,0 days 00:00:09.109000,2025-01-09
2,2025-01-22 21:00:32.489000-05:00,5,0 days 00:00:03.347000,2025-01-22
3,2025-02-06 01:48:59.895000-05:00,6,0 days 00:00:07.418000,2025-02-06
4,2025-02-08 00:10:47.120000-05:00,7,0 days 00:00:11.137000,2025-02-08


## Create summary for overall Youtube activity

In [713]:
# Function for getting the first and last visits on Youtube 
def get_first_and_last_visits(df):
    # find first and last visited websites per date
    first_visits = df.loc[df.groupby("date")["time"].idxmin()]
    last_visits = df.loc[df.groupby("date")["time"].idxmax()]

    # select relevant columns
    first_visits = first_visits[["date", "time", "url", "title", "type"]].rename(columns={
        "time": "time_first",
        "url": "url_first",
        "title": "title_first",
        "type": "type_first"
    })
    
    last_visits = last_visits[["date", "time", "url", "title", "type"]].rename(columns={
        "time": "time_last",
        "url": "url_last",
        "title": "title_last",
        "type": "type_last"
    })

    # merge both DataFrames on visit_date to get one row per date
    youtube_summary = pd.merge(first_visits, last_visits, on="date")
    return youtube_summary

youtube_summary_df = get_first_and_last_visits(df)

youtube_summary_df.head()

Unnamed: 0,date,time_first,url_first,title_first,type_first,time_last,url_last,title_last,type_last
0,2025-01-07,13:59:03.039000,https://www.youtube.com/watch?v=5hLPYkJ5YOU,Kid-Sized,Watched,20:04:03.239000,https://www.youtube.com/watch?v=eTvs8ldNJ7g,[KR] BROMANCE KOREAN DRAMA TRAILER | Let Me Be...,Watched
1,2025-01-09,22:23:22.028000,https://www.youtube.com/watch?v=yddr6F34UjQ,First Time | Airbnb Setup,Watched,22:23:44.782000,https://www.youtube.com/watch?v=MLWcgJ1wLww,Love Your Skin | Ulta Beauty,Watched
2,2025-01-22,20:55:16.692000,https://www.youtube.com/results?search_query=u...,for us figure skating championships 2025,Searched,21:00:32.489000,https://www.youtube.com/watch?v=kk0C6B_UlA0,Saya Carpenter - Jon Maravilla | Junior Pairs ...,Watched
3,2025-02-10,20:38:13.461000,https://www.youtube.com/watch?v=wtX_lGSuD3M,DARK ARIA ＜LV2＞ x Vogel Im Käfig | Solo Leveli...,Watched,20:43:40.666000,https://www.youtube.com/watch?v=STH343uvhVo,Technology I GEL-NIMBUS® 27,Watched
4,2025-02-10,20:38:13.461000,https://www.youtube.com/watch?v=wtX_lGSuD3M,DARK ARIA ＜LV2＞ x Vogel Im Käfig | Solo Leveli...,Watched,20:51:10.881000,https://www.youtube.com/watch?v=qUFRPDHs1Q8,【MV】『SHADOWBORN』 Hiroyuki SAWANO feat. Benjami...,Watched


In [714]:
# Add the data for the last session each night
youtube_summary_df = pd.merge(youtube_summary_df, last_session_df, on="date", how="inner")
youtube_summary_df["had_session"] = np.where(youtube_summary_df["session_id"].isna(), False, True)
youtube_summary_df = youtube_summary_df.drop_duplicates(subset=['date'])

# Rename column for time_diff
youtube_summary_df = youtube_summary_df.rename(columns={"time_diff": "session_duration"})

youtube_summary_df.head()

Unnamed: 0,date,time_first,url_first,title_first,type_first,time_last,url_last,title_last,type_last,date_time,session_id,session_duration,had_session
0,2025-01-07,13:59:03.039000,https://www.youtube.com/watch?v=5hLPYkJ5YOU,Kid-Sized,Watched,20:04:03.239000,https://www.youtube.com/watch?v=eTvs8ldNJ7g,[KR] BROMANCE KOREAN DRAMA TRAILER | Let Me Be...,Watched,2025-01-07 20:04:03.239000-05:00,3,0 days 00:00:36.968000,True
1,2025-01-09,22:23:22.028000,https://www.youtube.com/watch?v=yddr6F34UjQ,First Time | Airbnb Setup,Watched,22:23:44.782000,https://www.youtube.com/watch?v=MLWcgJ1wLww,Love Your Skin | Ulta Beauty,Watched,2025-01-09 22:23:44.782000-05:00,4,0 days 00:00:09.109000,True
2,2025-01-22,20:55:16.692000,https://www.youtube.com/results?search_query=u...,for us figure skating championships 2025,Searched,21:00:32.489000,https://www.youtube.com/watch?v=kk0C6B_UlA0,Saya Carpenter - Jon Maravilla | Junior Pairs ...,Watched,2025-01-22 21:00:32.489000-05:00,5,0 days 00:00:03.347000,True
3,2025-02-10,20:38:13.461000,https://www.youtube.com/watch?v=wtX_lGSuD3M,DARK ARIA ＜LV2＞ x Vogel Im Käfig | Solo Leveli...,Watched,20:43:40.666000,https://www.youtube.com/watch?v=STH343uvhVo,Technology I GEL-NIMBUS® 27,Watched,2025-02-10 20:54:30.969000-05:00,9,0 days 00:03:20.088000,True
8,2025-02-06,01:17:28.203000,https://www.youtube.com/watch?v=om_POD45fCs,doing the club penguin dance,Watched,01:48:59.895000,https://www.youtube.com/watch?v=HwStBkyH738,HighlightingMore :13 16:9,Watched,2025-02-06 01:48:59.895000-05:00,6,0 days 00:00:07.418000,True


## Combine Youtube summary with sleep data

In [715]:
# Import sleep data

# CHANGE THIS TO YOUR FILE PATH
sleep_filename = "../cheryl_sleep_data.csv"

sleep_df = pd.read_csv(sleep_filename)
sleep_df["Date"] = pd.to_datetime(sleep_df["Date"])
sleep_df = sleep_df.rename(columns={"Date": "date"})
sleep_df = sleep_df.drop(columns=["Unnamed: 0"])
sleep_df.tail()

Unnamed: 0,date,total_sleep_hours,sleep_start,sleep_end
778,2025-02-13,8.241667,2025-02-13 01:18:33-05:00,2025-02-13 09:33:33-05:00
779,2025-02-14,5.233333,2025-02-14 00:58:44-05:00,2025-02-14 06:25:14-05:00
780,2025-02-15,7.575,2025-02-14 23:41:46-05:00,2025-02-15 17:14:49-05:00
781,2025-02-16,8.016667,2025-02-15 23:03:10-05:00,2025-02-16 14:40:19-05:00
782,2025-02-17,7.35,2025-02-17 01:12:58-05:00,2025-02-17 08:39:28-05:00


In [716]:
youtube_summary_df["date"] = pd.to_datetime(youtube_summary_df["date"])
df = pd.merge(sleep_df, youtube_summary_df, on="date", how="right")
df.head()

Unnamed: 0,date,total_sleep_hours,sleep_start,sleep_end,time_first,url_first,title_first,type_first,time_last,url_last,title_last,type_last,date_time,session_id,session_duration,had_session
0,2025-01-07,8.816667,2025-01-07 00:51:00-05:00,2025-01-07 10:00:00-05:00,13:59:03.039000,https://www.youtube.com/watch?v=5hLPYkJ5YOU,Kid-Sized,Watched,20:04:03.239000,https://www.youtube.com/watch?v=eTvs8ldNJ7g,[KR] BROMANCE KOREAN DRAMA TRAILER | Let Me Be...,Watched,2025-01-07 20:04:03.239000-05:00,3,0 days 00:00:36.968000,True
1,2025-01-09,6.2,2025-01-09 00:47:51-05:00,2025-01-09 07:44:51-05:00,22:23:22.028000,https://www.youtube.com/watch?v=yddr6F34UjQ,First Time | Airbnb Setup,Watched,22:23:44.782000,https://www.youtube.com/watch?v=MLWcgJ1wLww,Love Your Skin | Ulta Beauty,Watched,2025-01-09 22:23:44.782000-05:00,4,0 days 00:00:09.109000,True
2,2025-01-22,7.025,2025-01-22 01:54:51-05:00,2025-01-22 08:59:51-05:00,20:55:16.692000,https://www.youtube.com/results?search_query=u...,for us figure skating championships 2025,Searched,21:00:32.489000,https://www.youtube.com/watch?v=kk0C6B_UlA0,Saya Carpenter - Jon Maravilla | Junior Pairs ...,Watched,2025-01-22 21:00:32.489000-05:00,5,0 days 00:00:03.347000,True
3,2025-02-10,7.366667,2025-02-10 01:21:08-05:00,2025-02-10 08:44:38-05:00,20:38:13.461000,https://www.youtube.com/watch?v=wtX_lGSuD3M,DARK ARIA ＜LV2＞ x Vogel Im Käfig | Solo Leveli...,Watched,20:43:40.666000,https://www.youtube.com/watch?v=STH343uvhVo,Technology I GEL-NIMBUS® 27,Watched,2025-02-10 20:54:30.969000-05:00,9,0 days 00:03:20.088000,True
4,2025-02-06,6.883333,2025-02-06 03:05:30-05:00,2025-02-06 10:00:00-05:00,01:17:28.203000,https://www.youtube.com/watch?v=om_POD45fCs,doing the club penguin dance,Watched,01:48:59.895000,https://www.youtube.com/watch?v=HwStBkyH738,HighlightingMore :13 16:9,Watched,2025-02-06 01:48:59.895000-05:00,6,0 days 00:00:07.418000,True


In [717]:
# Add videos per day
df = pd.merge(df, videos_per_day,  on="date", how="right")
df = df[df['total_videos_watched'] <= 180]
df.head()

Unnamed: 0,date,total_sleep_hours,sleep_start,sleep_end,time_first,url_first,title_first,type_first,time_last,url_last,title_last,type_last,date_time,session_id,session_duration,had_session,total_videos_watched
0,2025-01-07,8.816667,2025-01-07 00:51:00-05:00,2025-01-07 10:00:00-05:00,13:59:03.039000,https://www.youtube.com/watch?v=5hLPYkJ5YOU,Kid-Sized,Watched,20:04:03.239000,https://www.youtube.com/watch?v=eTvs8ldNJ7g,[KR] BROMANCE KOREAN DRAMA TRAILER | Let Me Be...,Watched,2025-01-07 20:04:03.239000-05:00,3,0 days 00:00:36.968000,True,20
1,2025-01-09,6.2,2025-01-09 00:47:51-05:00,2025-01-09 07:44:51-05:00,22:23:22.028000,https://www.youtube.com/watch?v=yddr6F34UjQ,First Time | Airbnb Setup,Watched,22:23:44.782000,https://www.youtube.com/watch?v=MLWcgJ1wLww,Love Your Skin | Ulta Beauty,Watched,2025-01-09 22:23:44.782000-05:00,4,0 days 00:00:09.109000,True,4
2,2025-01-22,7.025,2025-01-22 01:54:51-05:00,2025-01-22 08:59:51-05:00,20:55:16.692000,https://www.youtube.com/results?search_query=u...,for us figure skating championships 2025,Searched,21:00:32.489000,https://www.youtube.com/watch?v=kk0C6B_UlA0,Saya Carpenter - Jon Maravilla | Junior Pairs ...,Watched,2025-01-22 21:00:32.489000-05:00,5,0 days 00:00:03.347000,True,6
3,2025-02-06,6.883333,2025-02-06 03:05:30-05:00,2025-02-06 10:00:00-05:00,01:17:28.203000,https://www.youtube.com/watch?v=om_POD45fCs,doing the club penguin dance,Watched,01:48:59.895000,https://www.youtube.com/watch?v=HwStBkyH738,HighlightingMore :13 16:9,Watched,2025-02-06 01:48:59.895000-05:00,6,0 days 00:00:07.418000,True,5
4,2025-02-08,7.216667,2025-02-08 02:51:47-05:00,2025-02-08 12:48:25-05:00,00:10:27.476000,https://www.youtube.com/results?search_query=b...,for boston junior toi,Searched,00:10:47.120000,https://www.youtube.com/watch?v=Ua9ffbSO-20,(2/18) Now Taxes is Free in the App - TurboTa...,Watched,2025-02-08 00:10:47.120000-05:00,7,0 days 00:00:11.137000,True,2


In [718]:
# RENAME FILE AS APPROPRIATE
df.to_csv("../youtube_sleep_CB.csv")