[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farheenfab/AppliedText_CW/blob/main/CW1-generate_dataset.ipynb)


# F20AA Coursework 1

## 1) Data Collection:

In [180]:
import googleapiclient.discovery
import googleapiclient.errors
import numpy as np
import pandas as pd
import glob

api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyAWj_uzrhZL18X32S_P79pT1wnSYGpuA4k"

Reference

https://developers.google.com/youtube/v3/docs/search/list#parameters

https://developers.google.com/youtube/v3/docs/comments/list

In [181]:
class api_handler:
    def __init__(self, api_service_name, api_version, developer_key):
        self.client = googleapiclient.discovery.build(api_service_name,
                                                    api_version,
                                                    developerKey=developer_key)
        
    # Search for videos details given id
    def get_video_details(self, videoId, part="snippet"):
        request = self.client.videos().list(
            part=part,
            id=videoId
        )
        response = request.execute()

        if 'items' in response:
            video_details = response['items'][0]
            snippet=video_details['snippet']
            snippet['videoId']=videoId
            snippet['id']=videoId
            snippet['publishTime']=video_details.get('snippet', {}).get('publishedAt', {})
            snippet['thumbnails']=video_details.get('snippet', {}).get('thumbnails', {}).get('default', {}).get('url', '')
            return snippet

        return None

    # Search for videos given query
    def get_videos(self,query,maxResults=5,part="snippet"):
        request = self.client.search().list(
            part=part,
            maxResults=maxResults,
            # higher view count is likely to be more relevent 
            order="viewCount",
            q=query,  
            # american region videos 
            regionCode="US",
            # english videos
            relevanceLanguage="en",
            type="video"
        )
        response = request.execute()
        return response
    
    # Format Response from get_videos to dataframe
    def get_video_df(response):
        items=[]
        for item in response['items']:
            snippet=item.get('snippet', {})
            items+=[{
                'title':snippet.get('title', ''),
                'videoId':item.get('id', {}).get('videoId', ''),
                'channelTitle':snippet.get('channelTitle', ''),
                'publishTime':snippet.get('publishTime', ''),
                'description':snippet.get('description', ''),
                'thumbnails':snippet.get('thumbnails', {}).get('default', {}).get('url', '')
                }]
        df=pd.DataFrame(items)
        return df
    
    # Get comments from video
    def get_comments(self,videoId,part="snippet",maxResults=100,maxResultsDepth=100):
        all_comments = []
        nextPageToken = None
        while maxResults > 0:
            request = self.client.commentThreads().list(
                part=part,
                videoId=videoId,
                maxResults=min(maxResults, 100),
                order='relevance',
                moderationStatus='published',
                textFormat='plainText',
                pageToken=nextPageToken
            )
            response = request.execute()
            nextPageToken = response.get('nextPageToken')
            if 'items' in response:
                all_comments+=[response]
                for item in response['items']:
                    # extract the comment ID to get replies
                    comment_id = item.get('snippet',{}).get('topLevelComment',{}).get('id','')
                    if item.get('snippet',{}).get('totalReplyCount',0)>2:
                        print('getting replies:',item.get('snippet',{}).get('totalReplyCount',0))
                        replies = self.get_comment_replies(comment_id, maxResults=maxResultsDepth)
                        all_comments += replies

            maxResults -= min(maxResults, 100)
            if nextPageToken is None:
                break;    
        return all_comments
    
    # Get replies from comment 
    def get_comment_replies(self, commentId, part="snippet", maxResults=100):
        all_comments = []
        nextPageToken = None
        while maxResults > 0 and (nextPageToken != None or len(all_comments)==0):

            request = self.client.comments().list(
                part=part,
                parentId=commentId,
                maxResults=min(maxResults, 100),
                textFormat='plainText',
                pageToken=nextPageToken
            )

            response = request.execute()
            nextPageToken = response.get('nextPageToken')

            if 'items' in response and len(response['items'])>0:
                for item in response['items']:
                    modified_response = {
                        'items': [
                            {
                                'id':item.get('id'),
                                'snippet': {
                                    'topLevelComment': {
                                        'snippet': item.get('snippet','')
                                    }
                                }
                            }
                        ]
                    }
                    all_comments += [modified_response]
            maxResults -= min(maxResults, 100)
            if nextPageToken is None:
                break;    
        return all_comments

    # Format response from get_comments to dataframe
    def get_comments_df(response, video,product):
        comments = []
        for pages in response:
            for item in pages['items']:
                comment = item.get('snippet', {}).get('topLevelComment', {}).get('snippet', {})
                comments.append([
                        product,
                        video.get('title', ''),
                        video.get('videoId', ''),
                        video.get('channelTitle', ''),
                        video.get('publishTime', ''),
                        video.get('description', ''),
                        video.get('thumbnails', ''),
                        item.get('id', ''),  
                        comment.get('parentId', ''),  
                        comment.get('authorDisplayName', '')[1:],  
                        comment.get('publishedAt', ''),
                        comment.get('updatedAt', ''),
                        comment.get('likeCount', ''),
                        comment.get('textDisplay', '')
                    ])

        df = pd.DataFrame(comments,
            columns=['product', 'v_title', 'v_videoId',
                    'v_channelTitle', 'v_publishTime',
                    'v_description', 'v_thumbnail',
                    'c_id','c_parentId',
                    'c_author', 'c_published_at',
                    'c_updated_at', 'c_like_count',
                    'c_text'])
        
        return df
    
    # Search for videos related to products iteratively
    # Collect comments from each video and place it into an array
    def create_video_df_from_search(self, products,
                                    number_of_videos_per_product=5,
                                    number_of_comments_per_video=100
                                    ,number_of_replies_per_comment=100):
        multiple_video_comments = pd.DataFrame()
        for product in products:
            # get 25 first videos with the highest viewer counts 
            response = self.get_videos(query=product, maxResults=number_of_videos_per_product)
            # Convert results to df
            videos_df = api_handler.get_video_df(response)
            # For each video get a maximum of 100 comments
            # and place comments into an array
            for _, video in videos_df.iterrows():
                try:
                    response = self.get_comments(video['videoId'], maxResults=number_of_comments_per_video,maxResultsDepth=number_of_replies_per_comment)
                    comments_df = api_handler.get_comments_df(response, video, product)
                except:
                    # Function fails as the API returns 403 if the channel has comments disabled
                    # place an empty entry instead it can be deleted later
                    comments_df = pd.DataFrame(np.zeros((1, 14)),
                                                columns=['product', 'v_title', 'v_videoId',
                                                        'v_channelTitle', 'v_publishTime',
                                                        'v_description', 'v_thumbnail',
                                                        'c_id','c_parentId',
                                                        'c_author', 'c_published_at',
                                                        'c_updated_at', 'c_like_count',
                                                        'c_text'])
                    print('Unable to retrieve comments:', video.get('title', ''))
                multiple_video_comments = pd.concat([multiple_video_comments, comments_df], ignore_index=True)
        return multiple_video_comments
        
    # alternative method by explicitely specifying videos
    def create_video_df(self,products,videos,number_of_comments_per_video=100,number_of_replies_per_comment=100):
        count=0
        multiple_video_comments = pd.DataFrame()
        for product in products:
            for video in videos[count]:
                response = self.get_comments(video,maxResults=number_of_comments_per_video,maxResultsDepth=number_of_replies_per_comment) 
                video=self.get_video_details(video)
                comments_df = api_handler.get_comments_df(response, video, product)
                multiple_video_comments = pd.concat([multiple_video_comments, comments_df], ignore_index=True)
            count+=1
        return multiple_video_comments

In [182]:
products=["Squid Game Korean Drama (2021)"]

# careful when adding videos index number should match between products and videos
# index 0 should contain the videos used to get comments for video 0 and so on 
# videos=[['LhCQ7lHEjU8','Yh7PNUGxihU','8sXTfzaLmiQ'],
#         ['c2xta7hcvXI','mkrrKGo1VEs','CL0wU3ss2uw','jPKm6kc9j5A','g0Oj4A2rslY']]

youtube=api_handler(api_service_name, api_version, DEVELOPER_KEY)


In [183]:
multiple_video_comments=youtube.create_video_df_from_search(products,number_of_videos_per_product=20,number_of_comments_per_video=500,number_of_replies_per_comment=0)
# multiple_video_comments=youtube.create_video_df(products,videos,number_of_comments_per_video=20000,number_of_replies_per_comment=20000)
multiple_video_comments

getting replies: 750
getting replies: 531
getting replies: 5
getting replies: 14
getting replies: 188
getting replies: 3
getting replies: 6
getting replies: 3
getting replies: 7
getting replies: 10
getting replies: 5
getting replies: 17
getting replies: 9
getting replies: 5
getting replies: 4
getting replies: 7
getting replies: 14
getting replies: 3
getting replies: 12
getting replies: 9
getting replies: 70
getting replies: 3
getting replies: 3
getting replies: 3
getting replies: 24
getting replies: 5
getting replies: 6
getting replies: 10
getting replies: 4
getting replies: 4
getting replies: 3
getting replies: 11
getting replies: 3
getting replies: 5
getting replies: 3
getting replies: 3
getting replies: 15
getting replies: 9
getting replies: 6
getting replies: 8
getting replies: 5
getting replies: 70
getting replies: 7
getting replies: 9
getting replies: 4
getting replies: 22
getting replies: 12
getting replies: 3
getting replies: 21
getting replies: 12
getting replies: 6
getting re

Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgzH8vliQSJKHQMGZjx4AaABAg,,MrBeast,2021-11-24T21:02:45Z,2021-11-24T21:02:45Z,1007017,"Like I said in the video, subscribe if you hav..."
1,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgwDhFNTCbfck5apuUJ4AaABAg,,DoodleChaos,2021-11-24T22:07:54Z,2021-11-24T22:07:54Z,513598,"Huge props to the set designers, everything wa..."
2,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgzVlS_nKI4aXISU_ep4AaABAg,,mukul_editz,2023-12-30T01:55:59Z,2023-12-30T01:55:59Z,25,Your videos are so interesting ❤
3,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgxRqmVfQw-fRVcyBiR4AaABAg,,mubashsherafirdous1744,2024-02-10T14:27:53Z,2024-02-10T14:27:53Z,122,The way jimmy smiles melts my heart 🥹🫶🏼 he's t...
4,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,Ugxu5B8dQ9-mZpfW-UV4AaABAg,,user-cs9zv3gh1k,2024-01-30T20:17:02Z,2024-01-30T20:17:02Z,101,This version of the game is pretty much what t...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9777,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgxpdIqCmTsHWVghDp14AaABAg,,jshtd2015,2022-05-19T13:54:56Z,2022-05-19T13:54:56Z,0,I suited so much when watching this
9778,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgwgjAbztLIXMaQoqnd4AaABAg,,coreywigent7693,2021-10-23T15:30:56Z,2021-10-23T15:30:56Z,0,I'm gathering the dragonballs so I can wish al...
9779,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgyG5SbWQi2jZCfJ-fl4AaABAg,,cryptostonker6137,2021-11-12T16:14:46Z,2021-11-12T16:14:46Z,0,i love them all so much :3
9780,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgwU_lziwYY_xSfm1St4AaABAg,,tinadaturtle2077,2021-10-14T09:04:35Z,2021-10-14T09:04:35Z,1748,I just wanna directly meet the guy who played ...


Taken from:

https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python

In [184]:
import re
from bs4 import BeautifulSoup

def remove_emojis(data):
    if isinstance(data, str):
        # Remove html tags
        data = BeautifulSoup(data, "html.parser").get_text()
        # Remove emote, etc
        emoj = re.compile("["
            u"\U0001F600-\U0001F64F"  # emoticons
            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
            u"\U0001F680-\U0001F6FF"  # transport & map symbols
            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
            u"\U00002500-\U00002BEF"  # chinese char
            u"\U00002702-\U000027B0"
            u"\U000024C2-\U0001F251"
            u"\U0001f926-\U0001f937"
            u"\U00010000-\U0010ffff"
            u"\u2640-\u2642" 
            u"\u2600-\u2B55"
            u"\u200d"
            u"\u23cf"
            u"\u23e9"
            u"\u231a"
            u"\ufe0f"  # dingbats
            u"\u3030"
                        "]+", re.UNICODE)
        # english_words = re.compile(r'\b[a-zA-Z]+\b')

        return re.sub(emoj, '', data)
    return 

In [185]:
multiple_video_comments.dropna(subset=['c_text'],inplace=True)

In [186]:
# remove emotes from the text to be analyzed c_text = comment text
multiple_video_comments['c_text']=multiple_video_comments['c_text'].apply(remove_emojis)

df_length_before = len(multiple_video_comments)
print("DataFrame Length Before:", df_length_before)

# drop duplicates
multiple_video_comments.drop_duplicates(inplace=True)

# drop rows with empty or text length <= 2 comments
multiple_video_comments = multiple_video_comments[multiple_video_comments['c_text'].apply(lambda x: len(x) > 2)]

df_length_after = len(multiple_video_comments)
print("DataFrame Length After:", df_length_after)

multiple_video_comments

  data = BeautifulSoup(data, "html.parser").get_text()


DataFrame Length Before: 9782
DataFrame Length After: 8795


Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgzH8vliQSJKHQMGZjx4AaABAg,,MrBeast,2021-11-24T21:02:45Z,2021-11-24T21:02:45Z,1007017,"Like I said in the video, subscribe if you hav..."
1,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgwDhFNTCbfck5apuUJ4AaABAg,,DoodleChaos,2021-11-24T22:07:54Z,2021-11-24T22:07:54Z,513598,"Huge props to the set designers, everything wa..."
2,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgzVlS_nKI4aXISU_ep4AaABAg,,mukul_editz,2023-12-30T01:55:59Z,2023-12-30T01:55:59Z,25,Your videos are so interesting
3,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgxRqmVfQw-fRVcyBiR4AaABAg,,mubashsherafirdous1744,2024-02-10T14:27:53Z,2024-02-10T14:27:53Z,122,The way jimmy smiles melts my heart he's the ...
4,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,Ugxu5B8dQ9-mZpfW-UV4AaABAg,,user-cs9zv3gh1k,2024-01-30T20:17:02Z,2024-01-30T20:17:02Z,101,This version of the game is pretty much what t...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9777,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgxpdIqCmTsHWVghDp14AaABAg,,jshtd2015,2022-05-19T13:54:56Z,2022-05-19T13:54:56Z,0,I suited so much when watching this
9778,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgwgjAbztLIXMaQoqnd4AaABAg,,coreywigent7693,2021-10-23T15:30:56Z,2021-10-23T15:30:56Z,0,I'm gathering the dragonballs so I can wish al...
9779,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgyG5SbWQi2jZCfJ-fl4AaABAg,,cryptostonker6137,2021-11-12T16:14:46Z,2021-11-12T16:14:46Z,0,i love them all so much :3
9780,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,UgwU_lziwYY_xSfm1St4AaABAg,,tinadaturtle2077,2021-10-14T09:04:35Z,2021-10-14T09:04:35Z,1748,I just wanna directly meet the guy who played ...


Reference

https://stackoverflow.com/questions/40375366/pandas-to-csv-checking-for-overwrite

In [187]:
from langdetect import detect

def filter_comments(df):
    c = 0
    comments = []
    irrelevant_keywords = ['HYVE', 'crypto', 'promotion', 'ad', 'spam', 'advertisement', 'spoiler', 'leak', 'promo', 'off-topic', 'clickbait',
                          'self-promotion', '0:', '1:', '2:', '3:', '4:', '5:', '6:', '7:',
                          '8:', '9:', '10:', '11:', '12:', '13:', '14:', '15:']
    for index, row in df.iterrows():
        try:
            if detect(row['c_text']) == 'en' and not any(keyword in row['c_text'] for keyword in irrelevant_keywords):
                comments.append(row)
                c += 1
        except Exception as e:  # Catch any exception
            pass
    print("Number of English Comments: ", c)
    new_df = pd.DataFrame(comments, 
                columns=['product', 'v_title', 'v_videoId',
                    'v_channelTitle', 'v_publishTime',
                    'v_description', 'v_thumbnail',
                    'c_id','c_parentId',
                    'c_author', 'c_published_at',
                    'c_updated_at', 'c_like_count',
                    'c_text'])  # Create a new DataFrame from the list of rows
    new_df = new_df.sort_values(by = ['c_like_count'], ascending = False)
    new_df.drop_duplicates(inplace=True)
    new_df = new_df[:500]
    return new_df

In [188]:
new_df = filter_comments(multiple_video_comments)

Number of English Comments:  4549


In [189]:
new_df

Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
1,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,UgwDhFNTCbfck5apuUJ4AaABAg,,DoodleChaos,2021-11-24T22:07:54Z,2021-11-24T22:07:54Z,513598,"Huge props to the set designers, everything wa..."
7,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,Ugz2W0D6UB1nWoB6wG14AaABAg,,WhitneyAvalon,2021-11-24T23:45:19Z,2021-11-24T23:45:19Z,63377,The sets in this are INCREDIBLE. Would love to...
9600,Squid Game Korean Drama (2021),Squid Game stars take on the Dalgona Challenge...,TYd_pT9hZrM,Netflix K-Content,2021-10-09T09:00:10Z,They may have survived the dalgona challenge i...,https://i.ytimg.com/vi/TYd_pT9hZrM/default.jpg,Ugws5y7jVSnjKeSbbsB4AaABAg,,KPOPLIFEbts2013,2021-10-09T09:41:49Z,2021-10-09T09:41:49Z,61021,"I can't believe, people actually DM'ed the man..."
4589,Squid Game Korean Drama (2021),Squid Game | Official Trailer | Netflix,oqxAJKy0ii4,Netflix,2021-09-02T00:00:02Z,A Netflix Series | Squid Game Survive or die W...,https://i.ytimg.com/vi/oqxAJKy0ii4/default.jpg,UgyVSc5yR-lejpakJ2B4AaABAg,,impostor8850,2021-09-28T22:31:47Z,2021-09-28T22:31:47Z,41708,this is one of the most bingeable shows on net...
999,Squid Game Korean Drama (2021),오징어 게임 3분 요약 [Squid Game 3 minutes summary],iRBQGFN-PEY,소맥거핀,2021-09-26T07:31:20Z,소맥거핀 #오징어게임 #애니메이션 #SquidGame 영상 내 스포일러를 주의해주세...,https://i.ytimg.com/vi/iRBQGFN-PEY/default.jpg,UgxQyRXosVG2Ld11usB4AaABAg,,somacguffin,2021-09-26T07:33:55Z,2021-10-04T04:29:54Z,37055,\n !\n\n(Thank you for watching!)\r\n(Ha...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371,Squid Game Korean Drama (2021),"$456,000 Squid Game In Real Life!",0e3GPea1Tyg,MrBeast,2021-11-24T21:00:01Z,MAKE SURE YOU WATCH UNTIL GLASS BRIDGE IT'S IN...,https://i.ytimg.com/vi/0e3GPea1Tyg/default.jpg,Ugy5838JZO0EjRUyMg94AaABAg,,R.NComedy,2023-11-11T12:52:34Z,2023-11-11T12:52:34Z,130,"Huge props to the set designer , everything wa..."
5975,Squid Game Korean Drama (2021),i need this clock - squid game,43zTkfMngLo,凍檸斌 lemontea,2021-10-08T18:25:07Z,,https://i.ytimg.com/vi/43zTkfMngLo/default.jpg,Ugwwq3djPMatyDevsHN4AaABAg,,justjenna6062,2021-10-12T10:33:36Z,2021-10-12T10:33:36Z,129,He was just peacefully sleeping and the doll j...
3018,Squid Game Korean Drama (2021),SQUID GAMES: Honeycomb Challenge with Shinchan...,ZeOScUh9zz0,Live Engineer,2021-12-30T07:30:03Z,"Hello friends, I do some funny commentary on s...",https://i.ytimg.com/vi/ZeOScUh9zz0/default.jpg,UgyhG2qg1NxhZKPUgeh4AaABAg,,sonaliparadise3591,2022-03-30T11:21:57Z,2022-03-30T11:21:57Z,129,Keep continue make a big video of this pls
8819,Squid Game Korean Drama (2021),Red Light Green Light No Blood - Squid Game 1,Ww9HCin8ORs,PopMov,2021-10-06T20:56:52Z,I did this for a special person who wanted to ...,https://i.ytimg.com/vi/Ww9HCin8ORs/default.jpg,Ugz1SVpAyFAP-UjGe2x4AaABAg,,yipeeeeeeeeeeeeeeeeeeeee9047,2021-10-22T04:33:16Z,2021-10-22T04:33:16Z,129,"Oh finally, i can watch squid game without bei..."


In [190]:
# import os

# # Creating a folder for the comments
# directory = 'comments'
# if not os.path.exists(directory):
#     os.makedirs(directory)

# for index, row in new_df.iterrows():
#     # Different file path for each of the comments
#     file_path = os.path.join(directory, f'comment_{index}.txt')
    
#     # Write the comment content to the text file
#     with open(file_path, 'w', encoding='utf-8') as file:
#         file.write(row['c_text'])

Create CSV

In [191]:
import glob

filename = 'final_comments_df.csv'
files_present = glob.glob(filename)
# will only write to disk if file doesnt exist
if not files_present:
    new_df.to_csv(filename, index=False)
    new_df
else:
    print (f'File Already Exists. Delete {filename}' )