In [15]:
import csv
import googleapiclient.discovery
from datetime import datetime
import os

# Set your YouTube API key
api_key = 'AIzaSyCGxK6OOPUk7DCODbiFy2jZ27bF3Mlxgu4' 

# Create a YouTube Data API client
api_service_name = "youtube"
api_version = "v3"
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=api_key)

# Read video IDs from the andrea_ID.csv file
video_ids = []
with open('andrea_ID.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    next(csv_reader)  # Skip the header row
    for row in csv_reader:
        video_ids.append(row[0])  # Assuming video IDs are in the first column

# Specify the range of video IDs to process
start_index = 0  # Specify the start index (inclusive)
end_index = 76  # Specify the end index (exclusive)

# Ensure that the end index is within the bounds of the list
end_index = min(end_index, len(video_ids))

# Specify the path and filename for the CSV file
csv_filename = "youtube_data.csv"

# Create a list to store the data
data = []

# Add headers for each of the columns
headers = ['comment_id', 'video_id', 'video_title', 'channel_title', 'subscribers', 'username', 'comment', 'likes', 'comment_published_at', 'replies', 'view_count', 'like_count', 'comment_count', 'video_published_at', 'comment_type', 'parent_comment_id']

# Append headers if the file doesn't exist
if not os.path.isfile(csv_filename):
    with open(csv_filename, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=headers)
        writer.writeheader()

# Iterate over the specified range of video IDs
for video_id in video_ids[start_index:end_index]:
    try:
        print(f"Processing video: {video_id}")

        for order in ['relevance', None]:  # Iterate through relevance order and no order
            page_token = None  # Initialize the page token

            # Call the API to retrieve video details including contentDetails and captions
            video_response = youtube.videos().list(
                part="snippet,statistics,contentDetails",
                id=video_id
            ).execute()

            # Extract video information
            if 'items' in video_response and video_response['items']:
                video = video_response['items'][0]
                title = video['snippet']['title']
                view_count = video['statistics'].get('viewCount', 0)  # Handle videos with no view count
                like_count = video['statistics'].get('likeCount', 0)  # Handle videos with no likes
                comment_count = video['statistics'].get('commentCount', 0)  # Handle videos with no comments
                video_published_at = video['snippet']['publishedAt']

                # Retrieve video channel information
                channel_id = video['snippet']['channelId']
                channel_response = youtube.channels().list(
                    part='snippet,statistics',
                    id=channel_id
                ).execute()
                channel_title = channel_response['items'][0]['snippet']['title']
                subscribers = channel_response['items'][0]['statistics']['subscriberCount']

            # Initialize a list to store comments for this video
            all_comments = []

            while True:
                # Call the API to retrieve video comments
                comments_response = youtube.commentThreads().list(
                    part='snippet,replies',
                    videoId=video_id,
                    textFormat='plainText',
                    maxResults=100,  # Fetch up to 100 comments per page
                    order=order,
                    pageToken=page_token  # Use the page token to get the next page
                ).execute()

                # Extract comments from the current page
                current_page_comments = comments_response.get('items', [])
                all_comments.extend(current_page_comments)

                if 'nextPageToken' in comments_response:
                    page_token = comments_response['nextPageToken']
                else:
                    break  # No more pages

            for item in all_comments:
                try:
                    comment_type = 1 if order == 'relevance' else 0
                    comment_id = item['id']
                    comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
                    username = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
                    likes = item['snippet']['topLevelComment']['snippet']['likeCount']
                    comment_published_at = item['snippet']['topLevelComment']['snippet']['publishedAt']
                    replies = item['snippet'].get('totalReplyCount', 0)

                    # Convert comment_published_at to a datetime object
                    comment_datetime = datetime.strptime(comment_published_at, "%Y-%m-%dT%H:%M:%SZ")

                    data.append({
                        'comment_id': comment_id,
                        'video_id': video_id,
                        'video_title': title,
                        'channel_title': channel_title,
                        'subscribers': subscribers,
                        'username': username,
                        'comment': comment,
                        'likes': likes,
                        'comment_published_at': comment_datetime.strftime("%Y-%m-%d %H:%M:%S"),
                        'replies': replies,
                        'view_count': view_count,
                        'like_count': like_count,
                        'comment_count': comment_count,
                        'video_published_at': datetime.strptime(video_published_at, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d %H:%M:%S"),
                        'comment_type': comment_type,
                        'parent_comment_id': None  # Placeholder for top-level comments
                    })

                    # Collect replies
                    if replies > 0:
                        for reply in item.get('replies', {}).get('comments', []):
                            reply_comment_id = reply['id']
                            reply_comment = reply['snippet']['textDisplay']
                            reply_username = reply['snippet']['authorDisplayName']
                            reply_likes = reply['snippet']['likeCount']
                            reply_comment_published_at = reply['snippet']['publishedAt']

                            # Convert reply_comment_published_at to a datetime object
                            reply_comment_datetime = datetime.strptime(reply_comment_published_at, "%Y-%m-%dT%H:%M:%SZ")

                            data.append({
                                'comment_id': reply_comment_id,
                                'video_id': video_id,
                                'video_title': title,
                                'channel_title': channel_title,
                                'subscribers': subscribers,
                                'username': reply_username,
                                'comment': reply_comment,
                                'likes': reply_likes,
                                'comment_published_at': reply_comment_datetime.strftime("%Y-%m-%d %H:%M:%S"),
                                'replies': 0,  # Assuming replies to replies are not being collected here
                                'view_count': view_count,
                                'like_count': like_count,
                                'comment_count': comment_count,
                                'video_published_at': datetime.strptime(video_published_at, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d %H:%M:%S"),
                                'comment_type': comment_type,
                                'parent_comment_id': comment_id  # Parent comment ID for replies
                            })

                except Exception as comment_error:
                    print(f"Error processing comment for video {video_id}: {str(comment_error)}")

    except Exception as video_error:
        print(f"An error occurred while processing video {video_id}: {str(video_error)}")

# Append the new data to the existing CSV file with headers
with open(csv_filename, 'a', newline='', encoding='utf-8') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=headers)

    # Write the data rows
    if data:  # Check if there is data to write
        writer.writerows(data)
        print("Data appended to", csv_filename)

Processing video: JhemzxdFjIw
An error occurred while processing video JhemzxdFjIw: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=JhemzxdFjIw&textFormat=plainText&maxResults=100&order=relevance&key=AIzaSyCGxK6OOPUk7DCODbiFy2jZ27bF3Mlxgu4&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">
Processing video: ZSwegyvjkGk
Processing video: SWHHSuOQw70
Processing video: l0KCoTb8UfI
Processing video: dTaRCOgYAIo
Processing video: jvqjC6ocdJg
Processing video: cwl2Ndp4zWI
Processing video: ryqLiBxBd2A
An error occurred while proces