In [1]:
import pandas as pd
import json

# Some info on operation types:

Liked - you liked video

Disliked - you disliked video

Subscribed - you subscribed to a channel

Watched - you watched video or add

---

Answered - you answered in a questionnaire

Viewed - you viewed a community post

Voted - you voted in a poll

---

Saved - it's weird:
```
"title": [only name of the video you saved]
"titleUrl": [playlist where you saved, URL]
"subtitles": [{
    "name": [Name of channel of the video saved],
    "url": [URL of channel of the video saved]
    }],
```

In [2]:
def split_operation_title(title):
    operations = ['Watched a video that has been removed', 'Answered', 'Disliked', 'Liked', 'Saved', 'Subscribed', 'Viewed', 'Voted', 'Watched']

    cleaned_title = ""
    extracted_operation = ""

    for operation in operations:
        if title.startswith(operation):
            extracted_operation = operation
            title = title[len(operation):].strip()  # Remove the operation and strip any extra whitespace
            cleaned_title = title
            break  # Once an operation is found at the beginning, no need to search further

    return [extracted_operation, cleaned_title]

In [3]:
def is_post(titleUrl):
    if titleUrl == None:
        return None
    if "youtube.com/post/" in titleUrl:
        return True
    else:
        return False
    
def is_removed(title):
    if title == None:
        return None

    if "Watched a video that has been removed" in title:
        return True
    else:
        return False
    

# not always true
# def is_ad():
#     pass
#   "details": [{
#     "name": "From Google Ads"
#   }],

In [4]:
# Load point
with open('source_file\My Activity.json') as json_file:
    data = json.load(json_file)

In [5]:
# TEST and DEMO

def test_demo():
    for item in data:
        print("-----------------------------")
        # print(item) #single item
        print(split_operation_title(item['title'])) 
        try: #{'header': 'YouTube', 'title': 'Watched a video that has been removed', 'time': '2023-09-25T16:37:43.195Z', 'products': ['YouTube'], 'activityControls': ['YouTube watch history']}
            print(item['titleUrl'])
            print(is_post(item['titleUrl']))
        except:
            pass
        try: # Subscribe has not subtitles
            print(item['subtitles'][0]['name']) #channel name
            print(item['subtitles'][0]['url']) #channel url
        except:
            pass
        print(item['time'])
        
# test_demo()

In [6]:
def safe_get(item, key, default=None):
    try:
        return item[key]
    except (KeyError, TypeError):
        return default

dict_history = {
    'timestamp': [safe_get(item, 'time') for item in data],
    'operation': [safe_get(split_operation_title(item['title']), 0) for item in data],
    'title': [safe_get(split_operation_title(item['title']), 1) for item in data],
    'is_removed': [is_removed(safe_get(item, 'title')) for item in data],
    'title_url': [safe_get(item, 'titleUrl') for item in data],
    'is_post': [is_post(safe_get(item, 'titleUrl')) for item in data],
    'channel_name': [safe_get(item.get('subtitles', [{}])[0], 'name') for item in data],
    'channel_url': [safe_get(item.get('subtitles', [{}])[0], 'url') for item in data],
}

for key, value in dict_history.items():
    print(key, len(value))

timestamp 6
operation 6
title 6
is_removed 6
title_url 6
is_post 6
channel_name 6
channel_url 6


In [7]:
# get list of all possible operations

def possible_operations(list_of_full_titles = [safe_get(item, "title") for item in data]):
    first_words_list = []

    for item in list_of_full_titles:
        try:
            first_words_list.append(item.split()[0])
        except:
            pass
    
    a = list(set(first_words_list))

    a.sort()

    return a

print(possible_operations())

['Disliked', 'Liked', 'Subscribed', 'Watched']


In [8]:
# dump all in pandas dataframe
df = pd.DataFrame.from_dict(dict_history)
df

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
0,2023-01-31T14:14:14.011Z,Watched,Double Skull Bowie,False,https://www.youtube.com/watch?v=nBKwJw3rO6U,False,jimmydiresta,https://www.youtube.com/channel/UCiEk4xHBbz0hZ...
1,2023-01-21T11:11:11.111Z,Watched a video that has been removed,,True,,,,
2,2023-09-21T20:20:30.333Z,Liked,This Video is in Reverse.,False,https://www.youtube.com/watch?v=g_a3TQ9L9cM,False,Eran Amir,https://www.youtube.com/channel/UC_lUpneuEUzHV...
3,2023-04-19T19:01:22.333Z,Watched,Secret d'acteurs,False,https://www.youtube.com/watch?v=vc_MIvoVyxs,False,,
4,2023-02-01T00:54:00.707Z,Disliked,🔱 If u needed a sign this is it 🔱 Check BIO,False,https://www.youtube.com/watch?v=_x_harT-ur8,False,Poseidon Originals,https://www.youtube.com/channel/UCKzQMVJWmvkMW...
5,2023-01-30T12:11:11.111Z,Subscribed,to Wrong Way!,False,https://www.youtube.com/channel/UC2RaB95OJ2j3-...,False,,


In [9]:
# get list of all liked videos

liked_videos = df.loc[(df['operation'] == 'Liked') & (df['is_post'] == False) & (df['channel_name'].notna())]


# save liked videos to output.txt, can be used as batch file for yt-dlp
with open('output.txt', 'w') as f:
    f.write(liked_videos['title_url'].str.cat(sep='\n'))

display(liked_videos)

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
2,2023-09-21T20:20:30.333Z,Liked,This Video is in Reverse.,False,https://www.youtube.com/watch?v=g_a3TQ9L9cM,False,Eran Amir,https://www.youtube.com/channel/UC_lUpneuEUzHV...


In [10]:
# All subscribe actions
df.loc[(df['operation'] == 'Subscribed')]

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
5,2023-01-30T12:11:11.111Z,Subscribed,to Wrong Way!,False,https://www.youtube.com/channel/UC2RaB95OJ2j3-...,False,,


In [11]:
# List of removed videos

df.loc[df['is_removed'] == True]

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
1,2023-01-21T11:11:11.111Z,Watched a video that has been removed,,True,,,,


In [12]:
df.loc[(df['operation'] == 'Watched')] # with ADS

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
0,2023-01-31T14:14:14.011Z,Watched,Double Skull Bowie,False,https://www.youtube.com/watch?v=nBKwJw3rO6U,False,jimmydiresta,https://www.youtube.com/channel/UCiEk4xHBbz0hZ...
3,2023-04-19T19:01:22.333Z,Watched,Secret d'acteurs,False,https://www.youtube.com/watch?v=vc_MIvoVyxs,False,,


In [13]:
df.loc[(df['operation'] == 'Watched') & (df['is_post'] == False) & (df['channel_name'].notna())] #without ADS

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
0,2023-01-31T14:14:14.011Z,Watched,Double Skull Bowie,False,https://www.youtube.com/watch?v=nBKwJw3rO6U,False,jimmydiresta,https://www.youtube.com/channel/UCiEk4xHBbz0hZ...


In [14]:
df.loc[(df['operation'] == 'Watched') & (df['is_post'] == False) & (df['channel_name'].isna())].sort_values(by=['timestamp']) # only ADS

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
3,2023-04-19T19:01:22.333Z,Watched,Secret d'acteurs,False,https://www.youtube.com/watch?v=vc_MIvoVyxs,False,,


In [15]:
df.loc[(df['operation'] == 'Disliked')]

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
4,2023-02-01T00:54:00.707Z,Disliked,🔱 If u needed a sign this is it 🔱 Check BIO,False,https://www.youtube.com/watch?v=_x_harT-ur8,False,Poseidon Originals,https://www.youtube.com/channel/UCKzQMVJWmvkMW...


In [16]:
df.loc[(df['operation'] == 'Answered')]

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url


In [17]:
display(df.loc[(df['operation'] == 'Viewed')])

# for item in df.loc[(df['operation'] == 'Viewed')]['title_url']:
#     print(item)

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url


In [18]:
df.loc[(df['operation'] == 'Voted')]

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url


In [19]:
df.loc[(df['operation'] == 'Saved')]

Unnamed: 0,timestamp,operation,title,is_removed,title_url,is_post,channel_name,channel_url
