In [8]:
# Import pandas as 'pd' and create an empty DataFrame with specified column names
import pandas as pd

columns = ['Search', 'video_id', 'comment_id', 'text', 'author', 'date', 'like_count', 'reply_count']
df = pd.DataFrame(columns=columns)


In [9]:
# This script uses the YouTube Data API to search for videos, retrieve comments, and store them in a Pandas DataFrame
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

api_key = 'AIzaSyA2_t4gEE2zwvMRhrQyvZx4mAVaucMapeE'  
youtube = build('youtube', 'v3', developerKey=api_key)

def search_videos(youtube, query, max_results):
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=max_results,
        type='video'
    ).execute()

    video_ids = [item['id']['videoId'] for item in search_response['items']]
    return video_ids

def get_comments(youtube, video_id):
    comments_data = []
    try:
        page_token = None
        while True:
            response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100,
                pageToken=page_token
            ).execute()

            for item in response['items']:
                top_comment = item['snippet']['topLevelComment']['snippet']
                comments_data.append({
                    'video_id': video_id,
                    'comment_id': item['id'],
                    'text': top_comment['textDisplay'],
                    'author': top_comment['authorDisplayName'],
                    'date': top_comment['publishedAt'],
                    'like_count': top_comment['likeCount'],
                    'reply_count': item['snippet']['totalReplyCount']
                })

            page_token = response.get('nextPageToken')
            if not page_token:
                break

    except HttpError as error:
        print(f"An error occurred: {error}")
    return comments_data

video_ids = search_videos(youtube, "iPhone", 100)

all_comments = []
for video_id in video_ids:
    print(f'Fetching comments for video {video_id}')
    comments = get_comments(youtube, video_id)
    all_comments.extend(comments)

temp_df = pd.DataFrame(all_comments)

temp_df = temp_df[temp_df['text'].str.contains("iPhone", case=False)]

temp_df = temp_df.dropna(subset=['text'])

temp_df['Search'] = 'iPhone'

df = pd.concat([df, temp_df], ignore_index=True)

df = df.dropna(subset=['text']).drop_duplicates(subset=['text'])

df.to_csv('youtube_comments.csv', index=False)

df


Fetching comments for video xqyUdNxWazA
An error occurred: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=xqyUdNxWazA&textFormat=plainText&maxResults=100&key=AIzaSyA2_t4gEE2zwvMRhrQyvZx4mAVaucMapeE&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.">
Fetching comments for video pV0ud2B8WfQ
Fetching comments for video HmBo6V7wx8E
Fetching comments for video j3J9cIRXN9c
Fetching comments for video Uqx4DL1pd9w
Fetching comments for video V4D49YYihQI
Fetching comments for video fODv2jXRY-M
Fetching comments for video M2IfmcypfB8
Fetching comments for video 8Og7C6RVMOs
Fetching comments for video 2U7iiQc8eIg
Fetching comments for video pp7rDdo2__0
Fetching comments for video sxS9rUaKZFE
Fetching comments for video _Bwv9Q8oKck
Fetching comments for video 79pOjZQ2GvQ
Fetching comments for video CuxZK8r79og
Fetching

Unnamed: 0,Search,video_id,comment_id,text,author,date,like_count,reply_count
0,iPhone,pV0ud2B8WfQ,UgwuZ0IvcR2eBzmPwax4AaABAg,iPhone 4s was the perfect size.,Carlos Vega,2023-11-07T04:49:36Z,0,0
1,iPhone,pV0ud2B8WfQ,UgzguuMuaAx24szlaAp4AaABAg,I went from the iPhone 3GS to 4 to 4S to 5 to ...,user,2023-11-06T22:33:16Z,0,0
2,iPhone,pV0ud2B8WfQ,UgyGaqW4zXg9OvvN9014AaABAg,Beginning 0:01\niPhone 0:02\niPhone 3G 0:13\ni...,iPhone 5s,2023-11-06T00:48:43Z,1,0
3,iPhone,pV0ud2B8WfQ,UgzbV3ptoU69pXWsJ-V4AaABAg,My evolution of owning an iPhone \n3Gs\n4s\n5s...,Sunshine19,2023-10-29T22:29:27Z,0,0
4,iPhone,pV0ud2B8WfQ,Ugx7_-E4N4MVG3oN2q14AaABAg,Every iPhone: Our subtitles are revolations!\n...,J-D Flash Studios { Discontinued },2023-10-29T14:14:16Z,0,0
...,...,...,...,...,...,...,...,...
16133,iPhone,PjjkemXM32Y,Ugz9yHrDeqMQyrlElqF4AaABAg,Никакой IPhone не нужен!,Александр Стецурин,2022-11-13T19:55:17Z,0,1
16134,iPhone,PjjkemXM32Y,UgzOKrFvtcIbOchBnYp4AaABAg,Я: покупаю iPhone 14 \nДрузья: о у тебя iPhone...,Cagoniro,2022-11-13T17:08:36Z,4012,43
16135,iPhone,PjjkemXM32Y,UgxVJMpzW3VMsR5qx1h4AaABAg,А где новая чёлка в 14 iPhone ?,Sideremy,2022-11-13T10:36:25Z,0,4
16136,iPhone,oNfboeqRtTA,UgwGjSMle3ylO8KBjc14AaABAg,iPhone15Pro MAXって20万?ぐらいするんですよ!?\nマジ凄すぎワロタｧｧｧｧ...,keikachinshu1006,2023-10-15T01:10:29Z,3,0


In [15]:
# Import necessary modules from the Google API client library
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Initialize the YouTube API client
api_key = 'AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4'  # Replace with your API key
youtube = build('youtube', 'v3', developerKey=api_key)

# Function to search for videos using the YouTube API
def search_videos(youtube, query, max_results):
    # Make a search request to the API with the given parameters
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=max_results,
        type='video'
    ).execute()
    
    # Extract video IDs from the search response
    video_ids = [item['id']['videoId'] for item in search_response['items']]
    return video_ids

# Function to retrieve comments for a given video ID using the YouTube API
def get_comments(youtube, video_id):
    comments_data = []
    try:
        # Loop through pages of comments using pagination
        page_token = None
        while True:
            response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100,
                pageToken=page_token
            ).execute()

            # Extract relevant information from each comment and its replies
            for item in response['items']:
                top_comment = item['snippet']['topLevelComment']['snippet']
                comments_data.append({
                    'video_id': video_id,
                    'comment_id': item['id'],
                    'text': top_comment['textDisplay'],
                    'author': top_comment['authorDisplayName'],
                    'date': top_comment['publishedAt'],
                    'like_count': top_comment['likeCount'],
                    'reply_count': item['snippet']['totalReplyCount']
                })

            # Check if there are more pages of comments
            page_token = response.get('nextPageToken')
            if not page_token:
                break

    except HttpError as error:
        print(f"An error occurred: {error}")
    return comments_data

# Search for videos with the keyword "AirPods"
video_ids = search_videos(youtube, "AirPods", 100)

# Collect comments from each video
all_comments = []

# Iterate through the video IDs starting from index 33
for video_id in range(33, len(video_ids)):
    print(f'{video_id + 1} / {len(video_ids)}')
    print(f'Fetching comments for video {video_ids[video_id]}')
    
    # Retrieve comments for the current video ID
    comments = get_comments(youtube, video_ids[video_id])
    
    # Extend the list of all comments with the comments from the current video
    all_comments.extend(comments)
    
    # Create a temporary Pandas DataFrame from the collected comments
    temp_df = pd.DataFrame(all_comments)
    
    # Filter comments containing the word "AirPods" (case-insensitive)
    temp_df = temp_df[temp_df['text'].str.contains("AirPods", case=False)]
    
    # Drop rows with missing text values
    temp_df = temp_df.dropna(subset=['text'])
    
    # Add a 'Search' column with the value 'AirPods'
    temp_df['Search'] = 'AirPods'
    
    # Concatenate the temporary DataFrame with the existing DataFrame
    df = pd.concat([df, temp_df], ignore_index=True)
    
    # Drop rows with missing text values and remove duplicate comments
    df = df.dropna(subset=['text']).drop_duplicates(subset=['text'])
    
    # Save the DataFrame to a CSV file named 'youtube_comments.csv'
    df.to_csv('youtube_comments.csv', index=False)

# Display the resulting DataFrame
df


34 / 50
Fetching comments for video 6yJDWshMZuo
35 / 50
Fetching comments for video WAXqB0CIXrU
36 / 50
Fetching comments for video KqD2JKDLkhU
37 / 50
Fetching comments for video 934V2eiwsNA
38 / 50
Fetching comments for video a500SZyQFxQ
39 / 50
Fetching comments for video jtdDpryUte8
40 / 50
Fetching comments for video 8RIgnWd50dY
41 / 50
Fetching comments for video kjSAYk9tuHw
42 / 50
Fetching comments for video JloOtpv-Wv8
43 / 50
Fetching comments for video qlqP1q9doWo
44 / 50
Fetching comments for video 50fqpXh0FO8
45 / 50
Fetching comments for video wiDoluB69c8
46 / 50
Fetching comments for video d-GPnbzx87Y
47 / 50
Fetching comments for video c3W3iKb5o2I
48 / 50
Fetching comments for video 7MQnkq1zIZ0
49 / 50
Fetching comments for video MZLlPvDYxME
50 / 50
Fetching comments for video Zet21G4RVeU


Unnamed: 0,Search,video_id,comment_id,text,author,date,like_count,reply_count
0,iPhone,pV0ud2B8WfQ,UgwuZ0IvcR2eBzmPwax4AaABAg,iPhone 4s was the perfect size.,Carlos Vega,2023-11-07T04:49:36Z,0,0
1,iPhone,pV0ud2B8WfQ,UgzguuMuaAx24szlaAp4AaABAg,I went from the iPhone 3GS to 4 to 4S to 5 to ...,user,2023-11-06T22:33:16Z,0,0
2,iPhone,pV0ud2B8WfQ,UgyGaqW4zXg9OvvN9014AaABAg,Beginning 0:01\niPhone 0:02\niPhone 3G 0:13\ni...,iPhone 5s,2023-11-06T00:48:43Z,1,0
3,iPhone,pV0ud2B8WfQ,UgzbV3ptoU69pXWsJ-V4AaABAg,My evolution of owning an iPhone \n3Gs\n4s\n5s...,Sunshine19,2023-10-29T22:29:27Z,0,0
4,iPhone,pV0ud2B8WfQ,Ugx7_-E4N4MVG3oN2q14AaABAg,Every iPhone: Our subtitles are revolations!\n...,J-D Flash Studios { Discontinued },2023-10-29T14:14:16Z,0,0
...,...,...,...,...,...,...,...,...
21492,AirPods,Zet21G4RVeU,Ugy25Wb6-WzNtoPBNNF4AaABAg,Nice ill hope i have it. For now i using bavin...,Felo,2023-07-26T11:58:40Z,1,0
21493,AirPods,Zet21G4RVeU,UgwNsSBMjR2-D15LA8p4AaABAg,2079: unboxing AirPods Pro max plus from 2029,Crosante Cotino,2023-07-22T00:23:55Z,39,2
21494,AirPods,Zet21G4RVeU,Ugz5ErqYp6IA9FtVyhV4AaABAg,Those were the first ones I ever got. Then the...,JaymanPro07,2023-07-20T22:24:25Z,24,6
21495,AirPods,Zet21G4RVeU,UgwkQ81-kal3XNAHH0x4AaABAg,I have the airpod pros gen 2 and they sound go...,Donutlover,2023-07-12T00:47:35Z,2,1


In [14]:
# Count the occurrences of each unique Apple product
df.Search.value_counts()


iPhone     15984
AirPods      253
Name: Search, dtype: int64

In [16]:
# Import necessary modules from the Google API client library
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Replace 'api_key' with your actual YouTube Data API key
api_key = 'AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4'
youtube = build('youtube', 'v3', developerKey=api_key)

# Function to search for videos based on a query and retrieve video IDs
def search_videos(youtube, query, max_results):
    # Execute the search query and extract video IDs
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=max_results,
        type='video'
    ).execute()
    video_ids = [item['id']['videoId'] for item in search_response['items']]
    return video_ids

# Function to retrieve comments for a given video ID
def get_comments(youtube, video_id):
    comments_data = []
    try:
        # Iterate through pages of comments using pagination
        page_token = None
        while True:
            response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100,
                pageToken=page_token
            ).execute()

            # Extract relevant information from each comment
            for item in response['items']:
                top_comment = item['snippet']['topLevelComment']['snippet']
                comments_data.append({
                    'video_id': video_id,
                    'comment_id': item['id'],
                    'text': top_comment['textDisplay'],
                    'author': top_comment['authorDisplayName'],
                    'date': top_comment['publishedAt'],
                    'like_count': top_comment['likeCount'],
                    'reply_count': item['snippet']['totalReplyCount']
                })

            # Check for the presence of additional pages of comments
            page_token = response.get('nextPageToken')
            if not page_token:
                break

    except HttpError as error:
        print(f"An error occurred: {error}")
    return comments_data

# Search for videos with the keyword "iPad" and retrieve video IDs
video_ids = search_videos(youtube, "iPad", 100)

# Collect comments from each video and filter for those containing "iPad"
all_comments = []
for video_id in range(len(video_ids)):
    print(str(video_id+1) + " / " + str(len(video_ids)))
    print(f'Fetching comments for video {video_ids[video_id]}')
    comments = get_comments(youtube, video_ids[video_id])

    # Filter comments containing the keyword "iPad"
    temp_df = pd.DataFrame(all_comments)
    temp_df = temp_df[temp_df['text'].str.contains("iPad", case=False)]
    temp_df = temp_df.dropna(subset=['text'])
    temp_df['Search'] = 'iPad'
    
    # Append the filtered comments to the overall DataFrame
    df = pd.concat([df, temp_df], ignore_index=True)
    df = df.dropna(subset=['text']).drop_duplicates(subset=['text'])

    # Save the DataFrame to a CSV file after each iteration
    df.to_csv('youtube_comments.csv', index=False)

# Display the final DataFrame
df


1 / 50
Fetching comments for video FYxUJFD9Ye4
2 / 50
Fetching comments for video A8e5jbiwMXI
3 / 50
Fetching comments for video pwHNannxolo
4 / 50
Fetching comments for video vWoe-jTLExE
5 / 50
Fetching comments for video A5mMaChwCog
6 / 50
Fetching comments for video YQReKkSC5fE
7 / 50
Fetching comments for video fY6y8wnixW8
8 / 50
Fetching comments for video KVI0xftOBvA
9 / 50
Fetching comments for video ujJEEJTrI1Y
10 / 50
Fetching comments for video CwtUJ30A8nY
11 / 50
Fetching comments for video fS06w93bmMg
12 / 50
Fetching comments for video nZtyJVRTUNE
13 / 50
Fetching comments for video mQD1JiefSxw
14 / 50
Fetching comments for video yzMnJ353uBw
15 / 50
Fetching comments for video 74VHzCaaIZo
16 / 50
Fetching comments for video gjnb6u7pSiw
17 / 50
Fetching comments for video V8hp_iDhdZU
18 / 50
Fetching comments for video phGlO0Lt-Tk
19 / 50
Fetching comments for video V2S-aHWeQcc
20 / 50
Fetching comments for video PSRMwwlO8Es
21 / 50
Fetching comments for video qh14a1oCi8c
2

Unnamed: 0,Search,video_id,comment_id,text,author,date,like_count,reply_count
0,iPhone,pV0ud2B8WfQ,UgwuZ0IvcR2eBzmPwax4AaABAg,iPhone 4s was the perfect size.,Carlos Vega,2023-11-07T04:49:36Z,0,0
1,iPhone,pV0ud2B8WfQ,UgzguuMuaAx24szlaAp4AaABAg,I went from the iPhone 3GS to 4 to 4S to 5 to ...,user,2023-11-06T22:33:16Z,0,0
2,iPhone,pV0ud2B8WfQ,UgyGaqW4zXg9OvvN9014AaABAg,Beginning 0:01\niPhone 0:02\niPhone 3G 0:13\ni...,iPhone 5s,2023-11-06T00:48:43Z,1,0
3,iPhone,pV0ud2B8WfQ,UgzbV3ptoU69pXWsJ-V4AaABAg,My evolution of owning an iPhone \n3Gs\n4s\n5s...,Sunshine19,2023-10-29T22:29:27Z,0,0
4,iPhone,pV0ud2B8WfQ,Ugx7_-E4N4MVG3oN2q14AaABAg,Every iPhone: Our subtitles are revolations!\n...,J-D Flash Studios { Discontinued },2023-10-29T14:14:16Z,0,0
...,...,...,...,...,...,...,...,...
52905,iPad,sKBBWeRGuZs,Ugw4umUqqpod0MAYXEJ4AaABAg,Apple loopt achter op concurrentie en zichzelf...,Jesse Place,2022-10-25T17:45:00Z,6,0
52906,iPad,sKBBWeRGuZs,UgzXaLLRnLyci5ddlKJ4AaABAg,Mijn iPad gaat wel naar het beginscherm als ik...,Mikke baauw,2022-10-25T17:37:40Z,6,4
52907,iPad,sKBBWeRGuZs,UgyLDWS6omTwhWlZCFJ4AaABAg,Het nieuwe hoesje van de iPad vind ik wel heel...,Thomas,2022-10-25T17:33:41Z,1,1
52908,iPad,sKBBWeRGuZs,UgwEdDNPu0nXY1qijoB4AaABAg,Als Apple een 11inch iPad met OLED scherm uitb...,Thomas,2022-10-25T17:33:10Z,8,3


In [17]:
# Count the occurrences of each unique Apple product
df.Search.value_counts()


iPad       16960
iPhone     15984
AirPods     2845
Name: Search, dtype: int64

In [18]:
# Import necessary modules from the Google API client library
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Replace 'api_key' with your actual YouTube Data API key
api_key = 'AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4'
youtube = build('youtube', 'v3', developerKey=api_key)

# Function to search for videos based on a query and retrieve video IDs
def search_videos(youtube, query, max_results):
    # Execute the search query and extract video IDs
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=max_results,
        type='video'
    ).execute()
    video_ids = [item['id']['videoId'] for item in search_response['items']]
    return video_ids

# Function to retrieve comments for a given video ID
def get_comments(youtube, video_id):
    comments_data = []
    try:
        # Iterate through pages of comments using pagination
        page_token = None
        while True:
            response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100,
                pageToken=page_token
            ).execute()

            # Extract relevant information from each comment
            for item in response['items']:
                top_comment = item['snippet']['topLevelComment']['snippet']
                comments_data.append({
                    'video_id': video_id,
                    'comment_id': item['id'],
                    'text': top_comment['textDisplay'],
                    'author': top_comment['authorDisplayName'],
                    'date': top_comment['publishedAt'],
                    'like_count': top_comment['likeCount'],
                    'reply_count': item['snippet']['totalReplyCount']
                })

            # Check for the presence of additional pages of comments
            page_token = response.get('nextPageToken')
            if not page_token:
                break

    except HttpError as error:
        print(f"An error occurred: {error}")
    return comments_data

# Search for videos with the keyword "Macbook" and retrieve video IDs
video_ids = search_videos(youtube, "Macbook", 100)

# Collect comments from each video and filter for those containing "Macbook"
all_comments = []
for video_id in range(len(video_ids)):
    print(str(video_id+1) + " / " + str(len(video_ids)))
    print(f'Fetching comments for video {video_ids[video_id]}')
    comments = get_comments(youtube, video_ids[video_id])

    # Filter comments containing the keyword "Macbook"
    temp_df = pd.DataFrame(all_comments)
    temp_df = temp_df[temp_df['text'].str.contains("Macbook", case=False)]
    temp_df = temp_df.dropna(subset=['text'])
    temp_df['Search'] = 'Macbook'
    
    # Append the filtered comments to the overall DataFrame
    df = pd.concat([df, temp_df], ignore_index=True)
    df = df.dropna(subset=['text']).drop_duplicates(subset=['text'])

    # Save the DataFrame to a CSV file after each iteration
    df.to_csv('youtube_comments.csv', index=False)

# Display the final DataFrame
df


1 / 50
Fetching comments for video qie7cfjnLAY
2 / 50
Fetching comments for video 0pg_Y41waaE
An error occurred: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=0pg_Y41waaE&textFormat=plainText&maxResults=100&key=AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.">
3 / 50
Fetching comments for video t4ejFV6n4b8
4 / 50
Fetching comments for video nWtvbaMeImA
5 / 50
Fetching comments for video P0sVCUWJ6VU
6 / 50
Fetching comments for video VbaxlLZnjD4
7 / 50
Fetching comments for video uCNKmp4REHk
8 / 50
Fetching comments for video u3M1Q4TjaZQ
An error occurred: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=u3M1Q4TjaZQ&textFormat=plainText&maxResults=100&key=AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4&al

Unnamed: 0,Search,video_id,comment_id,text,author,date,like_count,reply_count
0,iPhone,pV0ud2B8WfQ,UgwuZ0IvcR2eBzmPwax4AaABAg,iPhone 4s was the perfect size.,Carlos Vega,2023-11-07T04:49:36Z,0,0
1,iPhone,pV0ud2B8WfQ,UgzguuMuaAx24szlaAp4AaABAg,I went from the iPhone 3GS to 4 to 4S to 5 to ...,user,2023-11-06T22:33:16Z,0,0
2,iPhone,pV0ud2B8WfQ,UgyGaqW4zXg9OvvN9014AaABAg,Beginning 0:01\niPhone 0:02\niPhone 3G 0:13\ni...,iPhone 5s,2023-11-06T00:48:43Z,1,0
3,iPhone,pV0ud2B8WfQ,UgzbV3ptoU69pXWsJ-V4AaABAg,My evolution of owning an iPhone \n3Gs\n4s\n5s...,Sunshine19,2023-10-29T22:29:27Z,0,0
4,iPhone,pV0ud2B8WfQ,Ugx7_-E4N4MVG3oN2q14AaABAg,Every iPhone: Our subtitles are revolations!\n...,J-D Flash Studios { Discontinued },2023-10-29T14:14:16Z,0,0
...,...,...,...,...,...,...,...,...
45294,Macbook,TfmE_7tTKSI,UgwvI9fg7rhsJ1_eKBt4AaABAg,I would be here even if you were only unboxing...,Erik Marez,2023-06-14T20:20:26Z,1,0
45295,Macbook,TfmE_7tTKSI,UgyVqyeZfNEOdamXUEJ4AaABAg,So lovely to be watching this on my brand new ...,Alfred,2023-06-14T20:19:00Z,15,0
45296,Macbook,TfmE_7tTKSI,UgxXjTW9jDaZq5IIMRZ4AaABAg,How many of you guys are watching this video b...,visan florin mihai,2023-06-14T20:13:43Z,0,0
45297,Macbook,TfmE_7tTKSI,Ugx6UA2ndDWQUP6JFfJ4AaABAg,You wouldn’t think 1 inch would make much of a...,MegaBeanHead,2023-06-14T20:13:29Z,1,0


In [None]:
# Count the occurrences of each unique Apple product
df.Search.value_counts()


In [20]:
# Import necessary modules from the Google API client library
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Replace 'api_key' with your actual YouTube Data API key
api_key = 'AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4'
youtube = build('youtube', 'v3', developerKey=api_key)

# Function to search for videos based on a query and retrieve video IDs
def search_videos(youtube, query, max_results):
    # Execute the search query and extract video IDs
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=max_results,
        type='video'
    ).execute()
    video_ids = [item['id']['videoId'] for item in search_response['items']]
    return video_ids

# Function to retrieve comments for a given video ID
def get_comments(youtube, video_id):
    comments_data = []
    try:
        # Iterate through pages of comments using pagination
        page_token = None
        while True:
            response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100,
                pageToken=page_token
            ).execute()

            # Extract relevant information from each comment
            for item in response['items']:
                top_comment = item['snippet']['topLevelComment']['snippet']
                comments_data.append({
                    'video_id': video_id,
                    'comment_id': item['id'],
                    'text': top_comment['textDisplay'],
                    'author': top_comment['authorDisplayName'],
                    'date': top_comment['publishedAt'],
                    'like_count': top_comment['likeCount'],
                    'reply_count': item['snippet']['totalReplyCount']
                })

            # Check for the presence of additional pages of comments
            page_token = response.get('nextPageToken')
            if not page_token:
                break

    except HttpError as error:
        print(f"An error occurred: {error}")
    return comments_data

# Search for videos with the keyword "Apple Watch" and retrieve video IDs
video_ids = search_videos(youtube, "Apple Watch", 100)

# Collect comments from each video and filter for those containing "Apple Watch"
all_comments = []
for video_id in range(len(video_ids)):
    print(str(video_id+1) + " / " + str(len(video_ids)))
    print(f'Fetching comments for video {video_ids[video_id]}')
    comments = get_comments(youtube, video_ids[video_id])

    # Filter comments containing the keyword "Apple Watch"
    temp_df = pd.DataFrame(all_comments)
    try:
        temp_df = temp_df[temp_df['text'].str.contains("Apple Watch", case=False)]
    except:
        continue
    temp_df = temp_df.dropna(subset=['text'])
    temp_df['Search'] = 'Apple Watch'
    
    # Append the filtered comments to the overall DataFrame
    df = pd.concat([df, temp_df], ignore_index=True)
    df = df.dropna(subset=['text']).drop_duplicates(subset=['text'])

    # Save the DataFrame to a CSV file after each iteration
    df.to_csv('youtube_comments.csv', index=False)

# Display the final DataFrame
df


1 / 50
Fetching comments for video Nvb_Kta7v6U
An error occurred: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=Nvb_Kta7v6U&textFormat=plainText&maxResults=100&key=AIzaSyCay1rVJ0CbxR3Bk1uVcVKScvTSYlsKcE4&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.">
2 / 50
Fetching comments for video oNCs4C2SMjo
3 / 50
Fetching comments for video CLOOLOdg5Jw
4 / 50
Fetching comments for video MF5odeotkKA
5 / 50
Fetching comments for video aPN13ULL0k4
6 / 50
Fetching comments for video _mOqJyNHkhI
7 / 50
Fetching comments for video 7LaR7meroVI
8 / 50
Fetching comments for video 6J5o4J-Ttp8
9 / 50
Fetching comments for video J6hCaO7n35o
10 / 50
Fetching comments for video JCLsnO2WOEQ
11 / 50
Fetching comments for video FcF0F4lYi_U
12 / 50
Fetching comments for video 6kZWFmuSDys
13 / 50
Fetching comments for video Ceqab

Unnamed: 0,Search,video_id,comment_id,text,author,date,like_count,reply_count
0,iPhone,pV0ud2B8WfQ,UgwuZ0IvcR2eBzmPwax4AaABAg,iPhone 4s was the perfect size.,Carlos Vega,2023-11-07T04:49:36Z,0,0
1,iPhone,pV0ud2B8WfQ,UgzguuMuaAx24szlaAp4AaABAg,I went from the iPhone 3GS to 4 to 4S to 5 to ...,user,2023-11-06T22:33:16Z,0,0
2,iPhone,pV0ud2B8WfQ,UgyGaqW4zXg9OvvN9014AaABAg,Beginning 0:01\niPhone 0:02\niPhone 3G 0:13\ni...,iPhone 5s,2023-11-06T00:48:43Z,1,0
3,iPhone,pV0ud2B8WfQ,UgzbV3ptoU69pXWsJ-V4AaABAg,My evolution of owning an iPhone \n3Gs\n4s\n5s...,Sunshine19,2023-10-29T22:29:27Z,0,0
4,iPhone,pV0ud2B8WfQ,Ugx7_-E4N4MVG3oN2q14AaABAg,Every iPhone: Our subtitles are revolations!\n...,J-D Flash Studios { Discontinued },2023-10-29T14:14:16Z,0,0
...,...,...,...,...,...,...,...,...
46120,Apple Watch,bFaukZe0uSk,UgzxwmoinS3v_MgBO_N4AaABAg,Many of us are comfortable with Android phones...,Anish Thomas,2022-09-14T14:57:52Z,10,0
46121,Apple Watch,bFaukZe0uSk,UgyjJEN7VwJ-6IB19Qd4AaABAg,I went with the Apple Watch Ultra mainly for t...,BP,2022-09-14T14:20:49Z,84,7
46122,Apple Watch,bFaukZe0uSk,Ugzc_FkNY7JXKz6nmPF4AaABAg,Just buy last year Apple Watch Series 7. Skip ...,Csaba Teplan,2022-09-14T14:19:26Z,0,3
46123,Apple Watch,bFaukZe0uSk,Ugx6ruHzlpjb6UEBeGB4AaABAg,Does de Apple Watch Ultra will be compatible w...,antonio caballero,2022-09-14T14:15:03Z,0,1


In [21]:
# Count the occurrences of each unique Apple product
df.Search.value_counts()


iPad           16960
iPhone         15984
Macbook         4779
AirPods         2845
Apple Watch     2788
Name: Search, dtype: int64