In [1]:
# Installing necessary libraries
#pip install google-api-python-client
#pip install youtube_transcript_api

In [2]:
# Importing libraries
import googleapiclient.discovery
from googleapiclient.discovery import build
import os

import pandas as pd
import numpy as np

from youtube_transcript_api import YouTubeTranscriptApi

In [3]:
# Data for service object
api_service_name = 'youtube'
api_version = 'v3'
api_key = 'API_KEY'

# Build the service object
youtube = build(api_service_name, api_version, developerKey= api_key)

# Creating Functions to collect youtube information

In [4]:
'''
Creating a function that receives as an argument the link of a youtube video and return 100 comments' text from this video
'''

def comments(link):

    # Extracting video ID from the link
    video_id = link.split('v=')[1]
    if '&t=' in video_id:
        video_id = video_id.split('&t=')[0]

    # Creating a while loop in order to get the comments for all the pages
    next_page = True
    iteration = 0

    while next_page:
        # Creating the request for 100 comments
        if iteration==0:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId= video_id,
                maxResults = 100
                )
            # Making the request
            response = request.execute()
        else:
            # Creating the request for the next 100 comments
            request = youtube.commentThreads().list(
                part="snippet",
                videoId= video_id,
                maxResults = 100,
                pageToken= token
                )
            # Making the request
            response = request.execute()      

        # Extracting the comment's text
        comment_text, commenter_id, commenter_name, commenter_channel = [],[],[],[]

        for i in range(len(response['items'])):
            comment_text.append(response['items'][i]['snippet']['topLevelComment']['snippet']['textDisplay'])
            commenter_id.append(response['items'][i]['snippet']['topLevelComment']['id'])
            commenter_name.append(response['items'][i]['snippet']['topLevelComment']['snippet']['authorDisplayName'])
            commenter_channel.append(response['items'][i]['snippet']['topLevelComment']['snippet']['authorChannelUrl'])
        if iteration==0:
            comment = pd.DataFrame({'Commenter_id':commenter_id, 'Commenter_name':commenter_name,
            'Comment_text':comment_text, 'Commenter_channel':commenter_channel })
        else:
            temp = pd.DataFrame({'Commenter_id':commenter_id, 'Commenter_name':commenter_name,
            'Comment_text':comment_text, 'Commenter_channel':commenter_channel })
            comment = pd.concat([comment,temp], axis=0)

        # See if there is a next page of comments
        try:
            token = response['nextPageToken']
            iteration+=1
        except:
            next_page = False

    return comment

In [5]:
'''
Creating a function that receives as an argument the link of a youtube video and return the  comments' info from this video
along with their replies
Tip: To see if one comment is a reply to a main comment, the id will be in this formant "id_main_comment.id_of_reply "
issue: We can have the replies of the replies
'''

def comments_replies(link):

    # Extracting video ID from the link
    video_id = link.split('v=')[1]
    if '&t=' in video_id:
        video_id = video_id.split('&t=')[0]

    # Creating a while loop in order to get the comments for all the pages
    next_page = True
    iteration = 0

    while next_page:
    # Creating the request for 100 comments
        if iteration==0:
            request = youtube.commentThreads().list(
                part="snippet, replies",
                videoId= video_id,
                maxResults = 100
                )
            # Making the request
            response = request.execute()
        else:
            # Creating the request for the next 100 comments
            request = youtube.commentThreads().list(
                part="snippet",
                videoId= video_id,
                maxResults = 100,
                pageToken= token
                )
            # Making the request
            response = request.execute() 
    
        comment_text, commenter_id, commenter_name, commenter_channel = [],[],[],[]
        time_published, time_updated = [], []
        video_link = []

        for i in range(len(response['items'])):

            ######## Info for the video #############
            video_link.append(link)

            ######## For the main comment info ######
            comment_text.append(response['items'][i]['snippet']['topLevelComment']['snippet']['textDisplay'])
            commenter_id.append(response['items'][i]['snippet']['topLevelComment']['id'])
            commenter_name.append(response['items'][i]['snippet']['topLevelComment']['snippet']['authorDisplayName'])
            commenter_channel.append(response['items'][i]['snippet']['topLevelComment']['snippet']['authorChannelUrl'])
            time_published.append(response['items'][i]['snippet']['topLevelComment']['snippet']['publishedAt'])
            time_updated.append(response['items'][i]['snippet']['topLevelComment']['snippet']['updatedAt'])

            ######## For replies info ##########

            if 'replies' in response['items'][i].keys():
                # Find the replies for each comment
                replies = response['items'][i]['replies']['comments']
                # For each reply extract the recuired information
                for j in range(len(replies)):
                    video_link.append(link)
                    comment_text.append(replies[j]['snippet']['textDisplay'])
                    commenter_id.append(replies[j]['id'])
                    commenter_name.append(replies[j]['snippet']['authorDisplayName'])
                    commenter_channel.append(replies[j]['snippet']['authorChannelUrl'])
                    time_published.append(replies[j]['snippet']['publishedAt'])
                    time_updated.append(replies[j]['snippet']['updatedAt'])

        if iteration==0:
            comment = pd.DataFrame({'Commenter_id':commenter_id, 'Commenter_name':commenter_name,
                                    'Comment_text':comment_text, 'Commenter_channel':commenter_channel, 
                                    'time_published': time_published, 'time_updated': time_updated,
                                     'video_link': video_link })
        else:
            temp = pd.DataFrame({'Commenter_id':commenter_id, 'Commenter_name':commenter_name,
                                 'Comment_text':comment_text, 'Commenter_channel':commenter_channel, 
                                 'time_published': time_published, 'time_updated': time_updated,
                                 'video_link': video_link })
            comment = pd.concat([comment,temp], axis=0)

        # See if there is a next page of comments
        try:
            token = response['nextPageToken']
            iteration+=1
        except:
            next_page = False

    return comment

In [6]:
'''
Creating a function that receives as an argument the link of a youtube video and return the transcript of the video
You can find the github with the documentation of the package here: https://github.com/jdepoix/youtube-transcript-api 
'''

def get_transcript(link):
    
    # Extracting video ID from the link
    video_id = link.split('v=')[1]
    if '&t=' in video_id:
        video_id = video_id.split('&t=')[0]

    # Collecting the transcript
    trans = YouTubeTranscriptApi.get_transcript(video_id)

    # Converting the transcript from a list of dictionaries to continuous text
    temp = [trans[i]['text'] for i in range(len(trans))]
    transcript = ' '.join(temp)

    # Creating output dataframe
    df = pd.DataFrame({'video_link': link, 'video_transcrpit': transcript}, index=[1])
    
    return df   

In [7]:
def videos_info(videos):
    # Collecting the comments and transcript along with their replies for each video
    for i, video in enumerate(videos):
        # Comments and replies
        df_comments = comments_replies(str(video))
        # Transcript
        try:
            df_trans = get_transcript(str(video))
        except:
            df_trans = pd.DataFrame({'video_link': str(video), 'video_transcrpit': 'Not Available'}, index=[1])
            
        if i==0:
            youtube_comments = df_comments
            youtube_transcript = df_trans
        else:
            youtube_comments = pd.concat([youtube_comments,df_comments], axis=0)
            youtube_transcript = pd.concat([youtube_transcript, df_trans], axis=0, ignore_index=True)
    
    return (youtube_comments, youtube_transcript)
        

# Collecting comments from youtube videos

## Collecting comments from UK video reviews for Fiat 500 BEV

In [8]:
# List of youtube videos for Fiat 500 BEV created in UK
fiat500bev = ['https://www.youtube.com/watch?v=0kDbvxpjLZs',
            'https://www.youtube.com/watch?v=muMkptAGUpg',
            'https://www.youtube.com/watch?v=h4Uq5M_n4Pg',
            'https://www.youtube.com/watch?v=3r4-IyxH7Bg',
            'https://www.youtube.com/watch?v=brvvaWzs63Y',
            'https://www.youtube.com/watch?v=u-0B4jopGmg',
            'https://www.youtube.com/watch?v=T4DpXJAyN08',
            'https://www.youtube.com/watch?v=3ZHWycctDUk']

(fiat500bev_comments, fiat500bev_transcript) = videos_info(fiat500bev)

In [9]:
fiat500bev_comments

Unnamed: 0,Commenter_id,Commenter_name,Comment_text,Commenter_channel,time_published,time_updated,video_link
0,UgzLjWe2KLgpPEKLmQ14AaABAg,Takuan,"Nice car, but not with an acre of cheap piano ...",http://www.youtube.com/channel/UCFBsoG3DqTyNNL...,2022-07-04T07:41:46Z,2022-07-04T07:42:29Z,https://www.youtube.com/watch?v=0kDbvxpjLZs
1,UgyvY6p-Mujgalk4yaJ4AaABAg,TruthBeTold,"no thanks, keep the electric version, only tak...",http://www.youtube.com/channel/UCATyIZRMjqyPFW...,2022-06-23T17:16:34Z,2022-06-23T17:16:34Z,https://www.youtube.com/watch?v=0kDbvxpjLZs
2,Ugx-BWyM38cmSiNkKDJ4AaABAg,Happy Looksr,"Very best car, I love",http://www.youtube.com/channel/UCwEI_6m8s9vbXQ...,2022-06-21T02:49:47Z,2022-06-21T02:49:47Z,https://www.youtube.com/watch?v=0kDbvxpjLZs
3,UgwXOb_SUBYt3vTuFa94AaABAg,Aniket Motghare,Plz fiat launch this car in india,http://www.youtube.com/channel/UCZNDCIMktmyLgW...,2022-06-20T10:29:57Z,2022-06-20T10:29:57Z,https://www.youtube.com/watch?v=0kDbvxpjLZs
4,UgwXa_1nMI6jMx7tfSF4AaABAg,Anders Fagerström,Just passing by locking for a review. What is ...,http://www.youtube.com/channel/UCrB0cF7u8IBoCU...,2022-06-18T10:18:10Z,2022-06-18T10:18:10Z,https://www.youtube.com/watch?v=0kDbvxpjLZs
...,...,...,...,...,...,...,...
88,UgzGYOiml4uW2qi11jd4AaABAg,RADDERS,"The only Fiat 500 I’d consider is the Abarth, ...",http://www.youtube.com/channel/UCSxuomQVbfXgKk...,2020-04-05T20:09:44Z,2020-04-05T20:09:44Z,https://www.youtube.com/watch?v=3ZHWycctDUk
89,Ugz3EDCAZjCbpv8wb_F4AaABAg,Jack,They are probably fine but for most have been ...,http://www.youtube.com/channel/UCwjQjHW8aOzO0w...,2020-04-05T20:08:42Z,2020-04-05T20:08:42Z,https://www.youtube.com/watch?v=3ZHWycctDUk
90,UgyjdzHZsZIcdxFOf3F4AaABAg,Paul Baumer,It&#39;s OK as a girl&#39;s city car. Prefer t...,http://www.youtube.com/channel/UC_sCggwzasOyHn...,2020-04-05T20:06:25Z,2020-04-05T20:06:25Z,https://www.youtube.com/watch?v=3ZHWycctDUk
91,UgzSnZKHgcYrXbtU7kh4AaABAg,Andy Jamieson,You actually did the toaster review!!! Oh wait...,http://www.youtube.com/channel/UCN5q08JQfdIgsK...,2020-04-05T20:04:47Z,2020-04-05T20:04:47Z,https://www.youtube.com/watch?v=3ZHWycctDUk


In [10]:
fiat500bev_transcript

Unnamed: 0,video_link,video_transcrpit
0,https://www.youtube.com/watch?v=0kDbvxpjLZs,[Music] the fiat 500 is one of the most popula...
1,https://www.youtube.com/watch?v=muMkptAGUpg,hello i'm evm welcome back to the channel and ...
2,https://www.youtube.com/watch?v=h4Uq5M_n4Pg,if you were at the google revival in 2017 you ...
3,https://www.youtube.com/watch?v=3r4-IyxH7Bg,i'm driving this the fiat 500e this is the ico...
4,https://www.youtube.com/watch?v=brvvaWzs63Y,as time goes on i'm finding myself getting mor...
5,https://www.youtube.com/watch?v=u-0B4jopGmg,this is the new Fiat 500 and it replaces the o...
6,https://www.youtube.com/watch?v=T4DpXJAyN08,[Music] this is the third generation fiat 500 ...
7,https://www.youtube.com/watch?v=3ZHWycctDUk,alright guys how's it going welcome back today...


In [11]:
fiat500bev_comments.to_csv('fiat500bev_comments.csv', index=False)
fiat500bev_transcript.to_csv('fiat500bev_transcript.csv', index=False)

## Collecting comments from UK video reviews for Fiat 500 Hybrid

In [12]:
fiatHybrid = ['https://www.youtube.com/watch?v=373CuWDiR94', 
            'https://www.youtube.com/watch?v=PIhj77GPcWQ',
            'https://www.youtube.com/watch?v=rY_0zRK9sII',
            'https://www.youtube.com/watch?v=sV6azfsp_mM', 
            'https://www.youtube.com/watch?v=6QBpB3DYl_s',
            'https://www.youtube.com/watch?v=N_TQQeGBP1g',
            'https://www.youtube.com/watch?v=hK8nx1d1wgk']

(fiatHybrid_comments, fiatHybrid_transcript) = videos_info(fiatHybrid)

In [13]:
fiatHybrid_comments

Unnamed: 0,Commenter_id,Commenter_name,Comment_text,Commenter_channel,time_published,time_updated,video_link
0,UgyHP1qmXGjEPmxhn8d4AaABAg,George L,I’m confused about what you’ve said about coas...,http://www.youtube.com/channel/UCFuyD8OgiuoJWh...,2022-02-23T17:32:43Z,2022-02-23T17:32:43Z,https://www.youtube.com/watch?v=373CuWDiR94
1,Ugx6su7XRGD7NenQdCp4AaABAg,BublakVTX,"Its Honda Monkey among cars, by the way its we...",http://www.youtube.com/channel/UCIPyEwgbRWHkZx...,2022-01-15T17:06:37Z,2022-01-15T17:06:37Z,https://www.youtube.com/watch?v=373CuWDiR94
2,UgwLxxEMTeCtMxgY8fN4AaABAg,Mains Fortes,I love that car. Ir&#39;s the perfect daily dr...,http://www.youtube.com/channel/UChXK6htlhpJQTO...,2022-01-14T05:51:03Z,2022-01-14T05:53:44Z,https://www.youtube.com/watch?v=373CuWDiR94
3,UgwLxxEMTeCtMxgY8fN4AaABAg.9XAGZhs79Cc9XAm6DPi0ps,Auto Social UK,I couldn&#39;t agree more,http://www.youtube.com/channel/UCZ5uI1UXgo_smu...,2022-01-14T10:35:23Z,2022-01-14T10:35:23Z,https://www.youtube.com/watch?v=373CuWDiR94
4,UgxIC7rlbfK4vIk4K5B4AaABAg,Nanou197373,Thanks for your great review! I bought this mo...,http://www.youtube.com/channel/UC9z7Kz2cvtN8eu...,2021-11-18T20:53:29Z,2021-11-18T20:53:29Z,https://www.youtube.com/watch?v=373CuWDiR94
...,...,...,...,...,...,...,...
69,Ugy-dZpjnZ872fimxBZ4AaABAg,PS PLAYER 1,"nice video!!!<br><a href=""https://youtu.be/7nz...",http://www.youtube.com/channel/UCPdEJ5EWdsBouK...,2020-12-19T20:27:04Z,2020-12-19T20:27:04Z,https://www.youtube.com/watch?v=hK8nx1d1wgk
70,UgxTs6YJEor9Lm2Ftrh4AaABAg,Uncle Cooper,Do you have gps?? and i love your nails.,http://www.youtube.com/channel/UCkdsfc0af0t3ze...,2020-12-19T03:39:52Z,2020-12-19T03:39:52Z,https://www.youtube.com/watch?v=hK8nx1d1wgk
71,UgzWRAn3hvtXvJtSAsR4AaABAg,Megan Ondari,"From one Megan to another, your car is so cute...",http://www.youtube.com/channel/UCvlCjCV4X4n8QH...,2020-12-17T05:03:25Z,2020-12-17T05:03:25Z,https://www.youtube.com/watch?v=hK8nx1d1wgk
72,UgzmHdctbxhWFE0LWqB4AaABAg,Mollie Green,Loveeee it 🤩,http://www.youtube.com/channel/UCp0WHeRfARXwJJ...,2020-12-13T19:35:03Z,2020-12-13T19:35:03Z,https://www.youtube.com/watch?v=hK8nx1d1wgk


In [14]:
fiatHybrid_transcript

Unnamed: 0,video_link,video_transcrpit
0,https://www.youtube.com/watch?v=373CuWDiR94,[Music] hello and welcome back to auto social ...
1,https://www.youtube.com/watch?v=PIhj77GPcWQ,[Music] the second generation version of fiat'...
2,https://www.youtube.com/watch?v=rY_0zRK9sII,why have i got no power this engine really is ...
3,https://www.youtube.com/watch?v=sV6azfsp_mM,[Music] hi guys welcome back to the player you...
4,https://www.youtube.com/watch?v=6QBpB3DYl_s,i went and dropped off my golf r this morning ...
5,https://www.youtube.com/watch?v=N_TQQeGBP1g,Not Available
6,https://www.youtube.com/watch?v=hK8nx1d1wgk,hello everybody welcome back to my channel or ...


In [15]:
fiatHybrid_comments.to_csv('fiatHybrid_comments.csv', index=False)
fiatHybrid_transcript.to_csv('fiatHybrid_transcript.csv', index=False)

## Collecting data for fiat 500 BEV and its competitors

In [16]:
competitors = ['https://www.youtube.com/watch?v=GJaBDoh2k5w&t=579s', 
                'https://www.youtube.com/watch?v=MSR_ADI1rEs',
                'https://www.youtube.com/watch?v=tpOSwBjgj74', 
                'https://www.youtube.com/watch?v=LU8v9GX6w5g', 
                'https://www.youtube.com/watch?v=rGBKHgNOpLI', 
                'https://www.youtube.com/watch?v=1O8OvDUH6NM', 
                'https://www.youtube.com/watch?v=laOUvTpKCWM', 
                'https://www.youtube.com/watch?v=NXUuYdC76XE', 
                'https://www.youtube.com/watch?v=h4Uq5M_n4Pg']

(competitors_comments, competitors_transcript) = videos_info(competitors)

In [17]:
competitors_comments

Unnamed: 0,Commenter_id,Commenter_name,Comment_text,Commenter_channel,time_published,time_updated,video_link
0,UgzwfTI113JyS3pjkyN4AaABAg,Lovecars,Which one would you choose?,http://www.youtube.com/channel/UC9LfZkFRd3hJzP...,2021-01-03T18:15:49Z,2021-01-03T18:15:49Z,https://www.youtube.com/watch?v=GJaBDoh2k5w&t=...
1,UgzwfTI113JyS3pjkyN4AaABAg.9I3QuZJJAKp9ayRWWDQC3J,Dhivagar Srinivasan,Honda,http://www.youtube.com/channel/UC1xr6evwV-Wu9n...,2022-05-13T12:53:34Z,2022-05-13T12:53:34Z,https://www.youtube.com/watch?v=GJaBDoh2k5w&t=...
2,UgzwfTI113JyS3pjkyN4AaABAg.9I3QuZJJAKp9_XaWdN_YKk,Dr. Sebastian M D,The500 of course,http://www.youtube.com/channel/UCDkNDJuAPw6n0_...,2022-04-07T20:50:51Z,2022-04-07T20:50:51Z,https://www.youtube.com/watch?v=GJaBDoh2k5w&t=...
3,UgzwfTI113JyS3pjkyN4AaABAg.9I3QuZJJAKp9_FPvda6Dtf,sagmahi,"Definitely the Fiat, if only it were sold in C...",http://www.youtube.com/channel/UCO0Fi9zWOys2Hn...,2022-03-31T19:23:12Z,2022-03-31T19:23:12Z,https://www.youtube.com/watch?v=GJaBDoh2k5w&t=...
4,UgzwfTI113JyS3pjkyN4AaABAg.9I3QuZJJAKp9TSWiF7GTsZ,Jerry Flint,Jus chosen a MINI electric. Like the Honda too.,http://www.youtube.com/channel/UCWH3PhCBIu9u7M...,2021-10-13T21:52:55Z,2021-10-13T21:52:55Z,https://www.youtube.com/watch?v=GJaBDoh2k5w&t=...
...,...,...,...,...,...,...,...
35,UgxALPQM2nvjKVGjEid4AaABAg.9UjfUCFBKMc9UkXEZVWdxY,S D,"As someone who owns an Abarth 500, the &quot;n...",http://www.youtube.com/channel/UC-eSptmk4LQ7C8...,2021-11-15T03:34:26Z,2021-11-15T03:34:26Z,https://www.youtube.com/watch?v=h4Uq5M_n4Pg
36,UgxALPQM2nvjKVGjEid4AaABAg.9UjfUCFBKMc9UjgNzkE4WO,X7rocks,The irony in this comment.<br>Nobody is intere...,http://www.youtube.com/channel/UC2cEpmEeIqKdYZ...,2021-11-14T19:43:51Z,2021-11-14T19:44:04Z,https://www.youtube.com/watch?v=h4Uq5M_n4Pg
37,UgxALPQM2nvjKVGjEid4AaABAg.9UjfUCFBKMc9Ujg8yU7SZS,Dash Cam,"They , think anyone ready to pay a 10 grand p...",http://www.youtube.com/channel/UCE95jXK9Fiogt5...,2021-11-14T19:41:48Z,2021-11-14T19:41:48Z,https://www.youtube.com/watch?v=h4Uq5M_n4Pg
38,UgxALPQM2nvjKVGjEid4AaABAg.9UjfUCFBKMc9Ujg1JR26cd,Obd1andDown,"Calm down mate, it&#39;s a big pink phone on w...",http://www.youtube.com/channel/UCa12QfCV0OpHP2...,2021-11-14T19:40:45Z,2021-11-14T19:40:45Z,https://www.youtube.com/watch?v=h4Uq5M_n4Pg


In [18]:
competitors_transcript

Unnamed: 0,video_link,video_transcrpit
0,https://www.youtube.com/watch?v=GJaBDoh2k5w&t=...,[Applause] run over these are three of the hot...
1,https://www.youtube.com/watch?v=MSR_ADI1rEs,if you're a regular viewer to auto eva and if ...
2,https://www.youtube.com/watch?v=tpOSwBjgj74,this is the new fiat 500 and it's a little bit...
3,https://www.youtube.com/watch?v=LU8v9GX6w5g,[Music] so [Music] [Applause] [Music] now befo...
4,https://www.youtube.com/watch?v=rGBKHgNOpLI,for a long time it seemed like electric cars w...
5,https://www.youtube.com/watch?v=1O8OvDUH6NM,[Music] last year europe's biggest selling ev ...
6,https://www.youtube.com/watch?v=laOUvTpKCWM,there are plenty of electric cars being launch...
7,https://www.youtube.com/watch?v=NXUuYdC76XE,[Music] so let's have a chat about range now l...
8,https://www.youtube.com/watch?v=h4Uq5M_n4Pg,if you were at the google revival in 2017 you ...


In [19]:
competitors_comments.to_csv('competitors_comments.csv', index=False)
competitors_transcript.to_csv('competitors_transcript.csv', index=False)