[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farheenfab/AppliedText_CW/blob/main/CW1-generate_dataset.ipynb)


# F20AA Coursework 1

## 1) Data Collection:

In [1]:
import googleapiclient.discovery
import googleapiclient.errors
import numpy as np
import pandas as pd
import glob

api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyAWj_uzrhZL18X32S_P79pT1wnSYGpuA4k"

Reference

https://developers.google.com/youtube/v3/docs/search/list#parameters

https://developers.google.com/youtube/v3/docs/comments/list

In [2]:
class api_handler:
    def __init__(self, api_service_name, api_version, developer_key):
        self.client = googleapiclient.discovery.build(api_service_name,
                                                    api_version,
                                                    developerKey=developer_key)
        
    # Search for videos details given id
    def get_video_details(self, videoId, part="snippet"):
        request = self.client.videos().list(
            part=part,
            id=videoId
        )
        response = request.execute()

        if 'items' in response:
            video_details = response['items'][0]
            snippet=video_details['snippet']
            snippet['videoId']=videoId
            snippet['id']=videoId
            snippet['publishTime']=video_details.get('snippet', {}).get('publishedAt', {})
            snippet['thumbnails']=video_details.get('snippet', {}).get('thumbnails', {}).get('default', {}).get('url', '')
            return snippet

        return None

    # Search for videos given query
    def get_videos(self,query,maxResults=5,part="snippet"):
        request = self.client.search().list(
            part=part,
            maxResults=maxResults,
            # higher view count is likely to be more relevent 
            order="viewCount",
            q=query,  
            # american region videos 
            regionCode="US",
            # english videos
            relevanceLanguage="en",
            type="video"
        )
        response = request.execute()
        return response
    
    # Format Response from get_videos to dataframe
    def get_video_df(response):
        items=[]
        for item in response['items']:
            snippet=item.get('snippet', {})
            items+=[{
                'title':snippet.get('title', ''),
                'videoId':item.get('id', {}).get('videoId', ''),
                'channelTitle':snippet.get('channelTitle', ''),
                'publishTime':snippet.get('publishTime', ''),
                'description':snippet.get('description', ''),
                'thumbnails':snippet.get('thumbnails', {}).get('default', {}).get('url', '')
                }]
        df=pd.DataFrame(items)
        return df
    
    # Get comments from video
    def get_comments(self,videoId,part="snippet",maxResults=100,maxResultsDepth=100):
        all_comments = []
        nextPageToken = None
        while maxResults > 0:
            request = self.client.commentThreads().list(
                part=part,
                videoId=videoId,
                maxResults=min(maxResults, 100),
                order='relevance',
                moderationStatus='published',
                textFormat='plainText',
                pageToken=nextPageToken
            )
            response = request.execute()
            nextPageToken = response.get('nextPageToken')
            if 'items' in response:
                all_comments+=[response]
                for item in response['items']:
                    # extract the comment ID to get replies
                    comment_id = item.get('snippet',{}).get('topLevelComment',{}).get('id','')
                    if item.get('snippet',{}).get('totalReplyCount',0)>2:
                        print('getting replies:',item.get('snippet',{}).get('totalReplyCount',0))
                        replies = self.get_comment_replies(comment_id, maxResults=maxResultsDepth)
                        all_comments += replies

            maxResults -= min(maxResults, 100)
            if nextPageToken is None:
                break;    
        return all_comments
    
    # Get replies from comment 
    def get_comment_replies(self, commentId, part="snippet", maxResults=100):
        all_comments = []
        nextPageToken = None
        while maxResults > 0 and (nextPageToken != None or len(all_comments)==0):

            request = self.client.comments().list(
                part=part,
                parentId=commentId,
                maxResults=min(maxResults, 100),
                textFormat='plainText',
                pageToken=nextPageToken
            )

            response = request.execute()
            nextPageToken = response.get('nextPageToken')

            if 'items' in response and len(response['items'])>0:
                for item in response['items']:
                    modified_response = {
                        'items': [
                            {
                                'id':item.get('id'),
                                'snippet': {
                                    'topLevelComment': {
                                        'snippet': item.get('snippet','')
                                    }
                                }
                            }
                        ]
                    }
                    all_comments += [modified_response]
            maxResults -= min(maxResults, 100)
            if nextPageToken is None:
                break;    
        return all_comments

    # Format response from get_comments to dataframe
    def get_comments_df(response, video,product):
        comments = []
        for pages in response:
            for item in pages['items']:
                comment = item.get('snippet', {}).get('topLevelComment', {}).get('snippet', {})
                comments.append([
                        product,
                        video.get('title', ''),
                        video.get('videoId', ''),
                        video.get('channelTitle', ''),
                        video.get('publishTime', ''),
                        video.get('description', ''),
                        video.get('thumbnails', ''),
                        item.get('id', ''),  
                        comment.get('parentId', ''),  
                        comment.get('authorDisplayName', '')[1:],  
                        comment.get('publishedAt', ''),
                        comment.get('updatedAt', ''),
                        comment.get('likeCount', ''),
                        comment.get('textDisplay', '')
                    ])

        df = pd.DataFrame(comments,
            columns=['product', 'v_title', 'v_videoId',
                    'v_channelTitle', 'v_publishTime',
                    'v_description', 'v_thumbnail',
                    'c_id','c_parentId',
                    'c_author', 'c_published_at',
                    'c_updated_at', 'c_like_count',
                    'c_text'])
        
        return df
    
    # Search for videos related to products iteratively
    # Collect comments from each video and place it into an array
    def create_video_df_from_search(self, products,
                                    number_of_videos_per_product=5,
                                    number_of_comments_per_video=100
                                    ,number_of_replies_per_comment=100):
        multiple_video_comments = pd.DataFrame()
        for product in products:
            # get 25 first videos with the highest viewer counts 
            response = self.get_videos(query=product, maxResults=number_of_videos_per_product)
            # Convert results to df
            videos_df = api_handler.get_video_df(response)
            # For each video get a maximum of 100 comments
            # and place comments into an array
            for _, video in videos_df.iterrows():
                try:
                    response = self.get_comments(video['videoId'], maxResults=number_of_comments_per_video,maxResultsDepth=number_of_replies_per_comment)
                    comments_df = api_handler.get_comments_df(response, video, product)
                except:
                    # Function fails as the API returns 403 if the channel has comments disabled
                    # place an empty entry instead it can be deleted later
                    comments_df = pd.DataFrame(np.zeros((1, 14)),
                                                columns=['product', 'v_title', 'v_videoId',
                                                        'v_channelTitle', 'v_publishTime',
                                                        'v_description', 'v_thumbnail',
                                                        'c_id','c_parentId',
                                                        'c_author', 'c_published_at',
                                                        'c_updated_at', 'c_like_count',
                                                        'c_text'])
                    print('Unable to retrieve comments:', video.get('title', ''))
                multiple_video_comments = pd.concat([multiple_video_comments, comments_df], ignore_index=True)
        return multiple_video_comments
        
    # alternative method by explicitely specifying videos
    def create_video_df(self,products,videos,number_of_comments_per_video=100,number_of_replies_per_comment=100):
        count=0
        multiple_video_comments = pd.DataFrame()
        for product in products:
            for video in videos[count]:
                response = self.get_comments(video,maxResults=number_of_comments_per_video,maxResultsDepth=number_of_replies_per_comment) 
                video=self.get_video_details(video)
                comments_df = api_handler.get_comments_df(response, video, product)
                multiple_video_comments = pd.concat([multiple_video_comments, comments_df], ignore_index=True)
            count+=1
        return multiple_video_comments

In [3]:
products=["Strong Girl Nam-soon"]

# careful when adding videos index number should match between products and videos
# index 0 should contain the videos used to get comments for video 0 and so on 
videos=[['LhCQ7lHEjU8','Yh7PNUGxihU','8sXTfzaLmiQ'],
        ['c2xta7hcvXI','mkrrKGo1VEs','CL0wU3ss2uw','jPKm6kc9j5A','g0Oj4A2rslY']]

youtube=api_handler(api_service_name, api_version, DEVELOPER_KEY)


In [4]:
multiple_video_comments=youtube.create_video_df_from_search(products,number_of_videos_per_product=10,number_of_comments_per_video=50,number_of_replies_per_comment=00)
# multiple_video_comments=youtube.create_video_df(products,videos,number_of_comments_per_video=20000,number_of_replies_per_comment=20000)
multiple_video_comments

getting replies: 42
getting replies: 13
getting replies: 45
getting replies: 3
getting replies: 14
getting replies: 30
getting replies: 3
getting replies: 42
getting replies: 9
getting replies: 62
getting replies: 15
getting replies: 23
getting replies: 292
getting replies: 14
getting replies: 3
getting replies: 10
getting replies: 4
getting replies: 77
getting replies: 13
getting replies: 35
getting replies: 6
getting replies: 15
getting replies: 19
getting replies: 7
getting replies: 6
getting replies: 4
getting replies: 81
getting replies: 22
getting replies: 9
getting replies: 3
getting replies: 10
getting replies: 4
getting replies: 6
getting replies: 16
getting replies: 14
getting replies: 100
getting replies: 20
getting replies: 3
getting replies: 19
getting replies: 13
getting replies: 22
getting replies: 4
getting replies: 3
getting replies: 9
getting replies: 11
getting replies: 3
getting replies: 4
getting replies: 10
getting replies: 35
getting replies: 8
getting replies: 1

Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,Ugz_bUJfbcTKJvJtzTd4AaABAg,,l_hate_snakeu,2023-10-15T12:49:31Z,2023-10-15T12:49:31Z,2855,Bless that girl who helped her she has a pure ...
1,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,UgyjXTyosk6QNriMfL94AaABAg,,ergoproxy5773,2023-10-20T21:12:46Z,2023-10-20T21:12:46Z,884,Moral lesson: Don’t drink too much and stay ae...
2,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,UgzH84Y5rQTurk09B8d4AaABAg,,gimmiedashasha,2023-10-16T08:46:54Z,2023-10-16T08:46:54Z,1616,"""Great performance guys don't forget to take d..."
3,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,Ugy9uQL1F0XvPHBoqQ54AaABAg,,3AHoles,2023-11-20T18:14:11Z,2023-11-20T18:14:34Z,105,Obviously reenactment but this happens all the...
4,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,Ugxsqn7lMbyrJAUHyxp4AaABAg,,kevinshelley2803,2023-10-20T16:08:40Z,2023-10-20T16:08:40Z,474,Awesome. So fortunate that camera was there to...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,Ugx-S5IV213pnOSz21h4AaABAg,,irinislam4155,2024-02-10T06:45:28Z,2024-02-10T06:45:28Z,0,Drama name is only for love
495,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,UgzFoWDJHiMg9ZpFj6Z4AaABAg,,vlexus,2023-12-07T15:57:17Z,2023-12-07T15:57:17Z,2,that is just awesome
496,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,Ugx8YNn-UXGQBwjv-QR4AaABAg,,tjsantiago0578,2023-12-06T21:36:08Z,2023-12-06T21:36:08Z,2,Best part..... ducha fria 😂😂😂😂
497,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,Ugz67MpKN98yO95KJkJ4AaABAg,,user-pu1mi6hc3l,2023-12-30T18:21:01Z,2023-12-30T18:21:01Z,0,Я люблю эту дораму❤


Taken from:

https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python

In [5]:
import re
from bs4 import BeautifulSoup

def remove_emojis(data):
    if isinstance(data, str):
        # Remove html tags
        data = BeautifulSoup(data, "html.parser").get_text()
        # Remove emote, etc
        emoj = re.compile("["
            u"\U0001F600-\U0001F64F"  # emoticons
            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
            u"\U0001F680-\U0001F6FF"  # transport & map symbols
            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
            u"\U00002500-\U00002BEF"  # chinese char
            u"\U00002702-\U000027B0"
            u"\U000024C2-\U0001F251"
            u"\U0001f926-\U0001f937"
            u"\U00010000-\U0010ffff"
            u"\u2640-\u2642" 
            u"\u2600-\u2B55"
            u"\u200d"
            u"\u23cf"
            u"\u23e9"
            u"\u231a"
            u"\ufe0f"  # dingbats
            u"\u3030"
                        "]+", re.UNICODE)
        # english_words = re.compile(r'\b[a-zA-Z]+\b')

        return re.sub(emoj, '', data)
    return 

In [6]:
multiple_video_comments.dropna(subset=['c_text'],inplace=True)

In [7]:
# remove emotes from the text to be analyzed c_text = comment text
multiple_video_comments['c_text']=multiple_video_comments['c_text'].apply(remove_emojis)

df_length_before = len(multiple_video_comments)
print("DataFrame Length Before:", df_length_before)

# drop duplicates
multiple_video_comments.drop_duplicates(inplace=True)

# drop rows with empty or text length <= 2 comments
multiple_video_comments = multiple_video_comments[multiple_video_comments['c_text'].apply(lambda x: len(x) > 2)]

df_length_after = len(multiple_video_comments)
print("DataFrame Length After:", df_length_after)

multiple_video_comments

DataFrame Length Before: 499
DataFrame Length After: 498


  data = BeautifulSoup(data, "html.parser").get_text()


Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,Ugz_bUJfbcTKJvJtzTd4AaABAg,,l_hate_snakeu,2023-10-15T12:49:31Z,2023-10-15T12:49:31Z,2855,Bless that girl who helped her she has a pure ...
1,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,UgyjXTyosk6QNriMfL94AaABAg,,ergoproxy5773,2023-10-20T21:12:46Z,2023-10-20T21:12:46Z,884,Moral lesson: Don’t drink too much and stay ae...
2,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,UgzH84Y5rQTurk09B8d4AaABAg,,gimmiedashasha,2023-10-16T08:46:54Z,2023-10-16T08:46:54Z,1616,"""Great performance guys don't forget to take d..."
3,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,Ugy9uQL1F0XvPHBoqQ54AaABAg,,3AHoles,2023-11-20T18:14:11Z,2023-11-20T18:14:34Z,105,Obviously reenactment but this happens all the...
4,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,Ugxsqn7lMbyrJAUHyxp4AaABAg,,kevinshelley2803,2023-10-20T16:08:40Z,2023-10-20T16:08:40Z,474,Awesome. So fortunate that camera was there to...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,Ugx-S5IV213pnOSz21h4AaABAg,,irinislam4155,2024-02-10T06:45:28Z,2024-02-10T06:45:28Z,0,Drama name is only for love
495,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,UgzFoWDJHiMg9ZpFj6Z4AaABAg,,vlexus,2023-12-07T15:57:17Z,2023-12-07T15:57:17Z,2,that is just awesome
496,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,Ugx8YNn-UXGQBwjv-QR4AaABAg,,tjsantiago0578,2023-12-06T21:36:08Z,2023-12-06T21:36:08Z,2,Best part..... ducha fria
497,Strong Girl Nam-soon,"He can’t sleep, needs cold shower🤣❤️🦋#onlyforl...",1WMRUKjOle0,VeeDaa,2023-11-23T21:44:22Z,,https://i.ytimg.com/vi/1WMRUKjOle0/default.jpg,Ugz67MpKN98yO95KJkJ4AaABAg,,user-pu1mi6hc3l,2023-12-30T18:21:01Z,2023-12-30T18:21:01Z,0,Я люблю эту дораму


Reference

https://stackoverflow.com/questions/40375366/pandas-to-csv-checking-for-overwrite

In [8]:
from langdetect import detect

def filter_comments(df):
    c = 0
    comments = []
    for index, row in df.iterrows():
        try:
            if detect(row['c_text']) == 'en':
                comments.append(row)
                c += 1
        except Exception as e:  # Catch any exception
            pass
    print("Number of English Comments: ", c)
    new_df = pd.DataFrame(comments, 
                columns=['product', 'v_title', 'v_videoId',
                    'v_channelTitle', 'v_publishTime',
                    'v_description', 'v_thumbnail',
                    'c_id','c_parentId',
                    'c_author', 'c_published_at',
                    'c_updated_at', 'c_like_count',
                    'c_text'])  # Create a new DataFrame from the list of rows
    new_df = new_df.sort_values(by = ['c_like_count'], ascending = False)
    new_df = new_df[:200]
    return new_df

In [9]:
new_df = filter_comments(multiple_video_comments)

Number of English Comments:  405


In [10]:
new_df

Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
250,Strong Girl Nam-soon,Park hyung sik kissed but it was not scripted,qzNIrbZkXCM,KdramaPink,2021-11-14T12:49:29Z,,https://i.ytimg.com/vi/qzNIrbZkXCM/default.jpg,UgxWJ86QFbnxB-Oz0EF4AaABAg,,baPBapBaM,2021-12-24T10:09:35Z,2021-12-24T10:09:35Z,67514,he actually admitted that he fell for her but ...
268,Strong Girl Nam-soon,Park hyung sik kissed but it was not scripted,qzNIrbZkXCM,KdramaPink,2021-11-14T12:49:29Z,,https://i.ytimg.com/vi/qzNIrbZkXCM/default.jpg,Ugyk65Vr1FOCmveq6oB4AaABAg,,sAkUrA6309,2021-12-20T03:03:14Z,2021-12-20T03:03:59Z,47747,"usually in any drama especially romcom, actors..."
259,Strong Girl Nam-soon,Park hyung sik kissed but it was not scripted,qzNIrbZkXCM,KdramaPink,2021-11-14T12:49:29Z,,https://i.ytimg.com/vi/qzNIrbZkXCM/default.jpg,UgyswTPxdPEkWuYepK94AaABAg,,paulina9134,2021-12-21T22:40:15Z,2021-12-21T22:40:15Z,35975,She: living a real kdrama moment while acting ...
265,Strong Girl Nam-soon,Park hyung sik kissed but it was not scripted,qzNIrbZkXCM,KdramaPink,2021-11-14T12:49:29Z,,https://i.ytimg.com/vi/qzNIrbZkXCM/default.jpg,UgxCZ2y-BBBoISQWEvl4AaABAg,,jungnikahoseok,2021-12-22T17:31:08Z,2021-12-22T17:32:45Z,20616,This hits different when you know the fact tha...
256,Strong Girl Nam-soon,Park hyung sik kissed but it was not scripted,qzNIrbZkXCM,KdramaPink,2021-11-14T12:49:29Z,,https://i.ytimg.com/vi/qzNIrbZkXCM/default.jpg,UgzOIIA-v4FwrlcFBnt4AaABAg,,raraminaj1251,2022-04-30T13:05:56Z,2022-04-30T13:05:56Z,19679,He fell for her she rejected him and when he w...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,Strong Girl Nam-soon,behind the scenes of strong woman do bong soon...,3CW87x2bji0,thatkdramaholic,2022-02-17T16:05:16Z,,https://i.ytimg.com/vi/3CW87x2bji0/default.jpg,Ugz9g_hacswZHJeRpyl4AaABAg,,ThatsMeRajvi...,2023-01-06T14:04:44Z,2023-01-06T14:04:44Z,20,Ok now i got the vibe why he is V's friend
156,Strong Girl Nam-soon,Girl Attitude Status 🔥 Ft.Little Mix - Power ✨...,qohGpbw5t2E,aspro editz,2023-10-14T00:30:33Z,shorts.,https://i.ytimg.com/vi/qohGpbw5t2E/default.jpg,Ugz-mMm9y6q6ZzCJ7ll4AaABAg,,Shyshy_9411,2024-01-18T17:39:51Z,2024-01-18T17:39:51Z,19,This girl l show in all of us are dead
230,Strong Girl Nam-soon,Eun-ji transformation 😨 into zombie 🧟‍♀ All of...,WoXt1vxnE1k,its Blue Drama 💙,2023-06-28T11:23:30Z,Eun-ji transformation into zombie ‍♀ All of us...,https://i.ytimg.com/vi/WoXt1vxnE1k/default.jpg,UgwhsqAUaU2PbLZI8X94AaABAg,,user-lo1bp7pe1i,2023-07-03T05:37:28Z,2023-07-03T05:37:28Z,19,Name the film:All of us are dead is very cool
22,Strong Girl Nam-soon,SHE SAVED HER LIFE ❤️ #Shorts,aNV59aHbM0o,Goubtube,2021-07-19T19:00:00Z,SUBSCRIBE FOR MORE! --------------------------...,https://i.ytimg.com/vi/aNV59aHbM0o/default.jpg,UgwJysTTGDp5bLzMrSF4AaABAg,,jeremylanier4743,2023-10-23T10:44:57Z,2023-10-23T10:44:57Z,19,What I see is this is what men look like when ...


In [11]:
# import os

# # Creating a folder for the comments
# directory = 'comments'
# if not os.path.exists(directory):
#     os.makedirs(directory)

# for index, row in new_df.iterrows():
#     # Different file path for each of the comments
#     file_path = os.path.join(directory, f'comment_{index}.txt')
    
#     # Write the comment content to the text file
#     with open(file_path, 'w', encoding='utf-8') as file:
#         file.write(row['c_text'])

Create CSV

In [13]:
import glob

filename = 'final_comments_df.csv'
files_present = glob.glob(filename)
# will only write to disk if file doesnt exist
if not files_present:
    new_df.to_csv(filename, index=False)
    new_df
else:
    print (f'File Already Exists. Delete {filename}' )