### Python script to calculate total duration of all the videos in a playlist of a youtube channel

#### Importing necessary packages

In [48]:
import os
from googleapiclient.discovery import build
youtube_api_key = os.environ.get('youtube_api')
import re
from datetime import timedelta

#### Creating API service object using build() function

In [49]:
youtube = build('youtube', 'v3', developerKey = youtube_api_key)

#### Getting channel id using channels() instance method

In [50]:
request = youtube.channels().list(
            part = 'contentDetails, statistics',
            forUsername = 'schafer5'
)

response = request.execute()
print(response)

{'kind': 'youtube#channelListResponse', 'etag': 'CyJOm4sGTdgO3lE7MovTwcRHuBA', 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5}, 'items': [{'kind': 'youtube#channel', 'etag': 'qrRtuSN673A8dj8Dr2Q3RxYb9YU', 'id': 'UCCezIgC97PvUuR4_gbFUs5g', 'contentDetails': {'relatedPlaylists': {'likes': '', 'uploads': 'UUCezIgC97PvUuR4_gbFUs5g'}}, 'statistics': {'viewCount': '73461867', 'subscriberCount': '934000', 'hiddenSubscriberCount': False, 'videoCount': '230'}}]}


#### Listing first 5 playlists of the channel

In [10]:
pl_request = youtube.playlists().list(
            part = 'contentDetails, snippet',
            channelId = 'UCCezIgC97PvUuR4_gbFUs5g'
)

pl_response = pl_request.execute()
for item in pl_response['items']:
    print(item)
    print()

{'kind': 'youtube#playlist', 'etag': 'qIdd8wCVgQOMhzTyggaBTFhGnvk', 'id': 'PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS', 'snippet': {'publishedAt': '2020-01-08T16:44:09Z', 'channelId': 'UCCezIgC97PvUuR4_gbFUs5g', 'title': 'Pandas Tutorials', 'description': '', 'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/ZyhVh-qRZPA/default.jpg', 'width': 120, 'height': 90}, 'medium': {'url': 'https://i.ytimg.com/vi/ZyhVh-qRZPA/mqdefault.jpg', 'width': 320, 'height': 180}, 'high': {'url': 'https://i.ytimg.com/vi/ZyhVh-qRZPA/hqdefault.jpg', 'width': 480, 'height': 360}, 'standard': {'url': 'https://i.ytimg.com/vi/ZyhVh-qRZPA/sddefault.jpg', 'width': 640, 'height': 480}, 'maxres': {'url': 'https://i.ytimg.com/vi/ZyhVh-qRZPA/maxresdefault.jpg', 'width': 1280, 'height': 720}}, 'channelTitle': 'Corey Schafer', 'localized': {'title': 'Pandas Tutorials', 'description': ''}}, 'contentDetails': {'itemCount': 11}}

{'kind': 'youtube#playlist', 'etag': 'Zob8gLvnNo6qswxisYTrwVTRMR0', 'id': 'PL-osiE80TeTvipOqom

#### Getting video ids of first 5 videos in a particular playlist

In [57]:
pl_request2 = youtube.playlistItems().list(
            part = 'contentDetails',
            playlistId = 'PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS'
)

vid_ids = []
pl_response2 = pl_request2.execute()
for item in pl_response2['items']:
    vid_ids.append(item['contentDetails']['videoId'])

vid_ids

['ZyhVh-qRZPA', 'zmdjNSmRXF4', 'W9XjRYFkkyw', 'Lw2rlcxScZY', 'DCDe29sIKcE']

#### Creating a string out of list of video ids

In [58]:
','.join(vid_ids)

'ZyhVh-qRZPA,zmdjNSmRXF4,W9XjRYFkkyw,Lw2rlcxScZY,DCDe29sIKcE'

#### Extracting playtime of each video

In [59]:
vid_request = youtube.videos().list(
            part = 'contentDetails',
            id = ','.join(vid_ids)
)

vid_response = vid_request.execute()

for item in vid_response['items']:
    duration = item['contentDetails']['duration']
    print(duration)
    print()
    
# The playtime thus extracted is in the format PT(Minutes)M(Seconds)S.

PT23M1S

PT33M35S

PT17M27S

PT23M4S

PT40M3S



#### Parsing hours, minutes and seconds from the PT(Minutes)M(Seconds)S format using re package

In [29]:
hours_pattern = re.compile(r'(\d+)H')
minutes_pattern = re.compile(r'(\d+)M')
seconds_pattern = re.compile(r'(\d+)S')

In [61]:
for item in vid_response['items']:
    duration = item['contentDetails']['duration']
    
    hours = hours_pattern.search(duration)
    minutes = minutes_pattern.search(duration)
    seconds = seconds_pattern.search(duration)
    
    print(hours, minutes, seconds)
    print()
    
#It return None for hours as there is no hour info for any video. However, we get minutes and seconds parsed.

None <re.Match object; span=(2, 5), match='23M'> <re.Match object; span=(5, 7), match='1S'>

None <re.Match object; span=(2, 5), match='33M'> <re.Match object; span=(5, 8), match='35S'>

None <re.Match object; span=(2, 5), match='17M'> <re.Match object; span=(5, 8), match='27S'>

None <re.Match object; span=(2, 5), match='23M'> <re.Match object; span=(5, 7), match='4S'>

None <re.Match object; span=(2, 5), match='40M'> <re.Match object; span=(5, 7), match='3S'>



#### Extracting numerice values from minutes and seconds and also converting them into int type

In [34]:
for item in vid_response['items']:
    duration = item['contentDetails']['duration']
    
    hours = hours_pattern.search(duration)
    minutes = minutes_pattern.search(duration)
    seconds = seconds_pattern.search(duration)
    
    hours = int(hours.group(1)) if hours else 0
    minutes = int(minutes.group(1)) if minutes else 0
    seconds = int(seconds.group(1)) if seconds else 0
    
    print(hours, minutes, seconds)
    print()

0 23 1

0 33 35

0 17 27

0 23 4

0 40 3



#### Using timedelta method of datetime module, calculating total duration of each video in seconds.

In [36]:
for item in vid_response['items']:
    duration = item['contentDetails']['duration']
    
    hours = hours_pattern.search(duration)
    minutes = minutes_pattern.search(duration)
    seconds = seconds_pattern.search(duration)
    
    hours = int(hours.group(1)) if hours else 0
    minutes = int(minutes.group(1)) if minutes else 0
    seconds = int(seconds.group(1)) if seconds else 0
    
    video_seconds = timedelta(
                    hours = hours,
                    minutes = minutes,
                    seconds = seconds
                    ).total_seconds()

    
    print(video_seconds)
    print()

1381.0

2015.0

1047.0

1384.0

2403.0



#### The combined python script to calculate total duration of any playlist of a channel

In [64]:
hours_pattern = re.compile(r'(\d+)H')
minutes_pattern = re.compile(r'(\d+)M')
seconds_pattern = re.compile(r'(\d+)S')

total_seconds = 0 #total duration of playlist is set as 0 initially.

nextPageToken = None #page token is set to none, which means the first page of the playlist

# A while loop until all the videos from various pages of a playlist is looped for individual video duration.
while True:

# This API request will extract contents of all the videos from all the pages of a playlist
    pl_request2 = youtube.playlistItems().list(
                part = 'contentDetails',
                playlistId = 'PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS',
                maxResults = 50,
                pageToken = nextPageToken
                )

# This API request will fetch video ids from each results of playlist items in the form of list.
    vid_ids = []
    pl_response2 = pl_request2.execute()
    for item in pl_response2['items']:
        vid_ids.append(item['contentDetails']['videoId'])

# Extracting contents of each video id

    vid_request = youtube.videos().list(
                part = 'contentDetails',
                id = ','.join(vid_ids)
    )

    vid_response = vid_request.execute()
    
    # Looping over each video's contents to extract time duration.

    for item in vid_response['items']:
        duration = item['contentDetails']['duration']

        #Applyting re pattern defined earlier on each time duration extracted
        
        hours = hours_pattern.search(duration)
        minutes = minutes_pattern.search(duration)
        seconds = seconds_pattern.search(duration)

        #Extracting numeric values of hours, minutes and seconds and also taking care of error if none.
        
        hours = int(hours.group(1)) if hours else 0
        minutes = int(minutes.group(1)) if minutes else 0
        seconds = int(seconds.group(1)) if seconds else 0

        # Converting hours, minutes and seconds of each video's time duration into seconds.
        
        video_seconds = timedelta(
                        hours = hours,
                        minutes = minutes,
                        seconds = seconds
                        ).total_seconds()
        
        # Adding each video's time duration in seconds to total_seconds variable defined earlier.
        total_seconds += video_seconds
    
    # This makes sure that the loop ends when the script exhausts every pages of the playlist
    nextPageToken = pl_response2.get(nextPageToken)
    
    if not nextPageToken:
        break

#Converting total_seconds into int type
total_seconds = int(total_seconds)

#Extracting hours, minutes and seconds value of the total duration of the playlist
minutes, seconds = divmod(total_seconds,60)
hours, minutes = divmod(minutes,60)

print(f"Pandas Playlist of Corey Schafer's Channel is {hours}:{minutes}:{seconds} long!")

Pandas Playlist of Corey Schafer's Channel is 5:19:11 long!
