[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farheenfab/AppliedText_CW/blob/main/CW1-generate_dataset.ipynb)


# F20AA Coursework 1

## 1) Data Collection:

In [28]:
import googleapiclient.discovery
import googleapiclient.errors
import numpy as np
import pandas as pd
import glob

api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyAWj_uzrhZL18X32S_P79pT1wnSYGpuA4k"

Reference

https://developers.google.com/youtube/v3/docs/search/list#parameters

https://developers.google.com/youtube/v3/docs/comments/list

In [29]:
class api_handler:
    def __init__(self, api_service_name, api_version, developer_key):
        self.client = googleapiclient.discovery.build(api_service_name,
                                                    api_version,
                                                    developerKey=developer_key)
        
    # Search for videos details given id
    def get_video_details(self, videoId, part="snippet"):
        request = self.client.videos().list(
            part=part,
            id=videoId
        )
        response = request.execute()

        if 'items' in response:
            video_details = response['items'][0]
            snippet=video_details['snippet']
            snippet['videoId']=videoId
            snippet['id']=videoId
            snippet['publishTime']=video_details.get('snippet', {}).get('publishedAt', {})
            snippet['thumbnails']=video_details.get('snippet', {}).get('thumbnails', {}).get('default', {}).get('url', '')
            return snippet

        return None

    # Search for videos given query
    def get_videos(self,query,maxResults=5,part="snippet"):
        request = self.client.search().list(
            part=part,
            maxResults=maxResults,
            # higher view count is likely to be more relevent 
            order="viewCount",
            q=query,  
            # american region videos 
            regionCode="US",
            # english videos
            relevanceLanguage="en",
            type="video"
        )
        response = request.execute()
        return response
    
    # Format Response from get_videos to dataframe
    def get_video_df(response):
        items=[]
        for item in response['items']:
            snippet=item.get('snippet', {})
            items+=[{
                'title':snippet.get('title', ''),
                'videoId':item.get('id', {}).get('videoId', ''),
                'channelTitle':snippet.get('channelTitle', ''),
                'publishTime':snippet.get('publishTime', ''),
                'description':snippet.get('description', ''),
                'thumbnails':snippet.get('thumbnails', {}).get('default', {}).get('url', '')
                }]
        df=pd.DataFrame(items)
        return df
    
    # Get comments from video
    def get_comments(self,videoId,part="snippet",maxResults=100,maxResultsDepth=100):
        all_comments = []
        nextPageToken = None
        while maxResults > 0:
            request = self.client.commentThreads().list(
                part=part,
                videoId=videoId,
                maxResults=min(maxResults, 100),
                order='relevance',
                moderationStatus='published',
                textFormat='plainText',
                pageToken=nextPageToken
            )
            response = request.execute()
            nextPageToken = response.get('nextPageToken')
            if 'items' in response:
                all_comments+=[response]
                for item in response['items']:
                    # extract the comment ID to get replies
                    comment_id = item.get('snippet',{}).get('topLevelComment',{}).get('id','')
                    if item.get('snippet',{}).get('totalReplyCount',0)>2:
                        print('getting replies:',item.get('snippet',{}).get('totalReplyCount',0))
                        replies = self.get_comment_replies(comment_id, maxResults=maxResultsDepth)
                        all_comments += replies

            maxResults -= min(maxResults, 100)
            if nextPageToken is None:
                break;    
        return all_comments
    
    # Get replies from comment 
    def get_comment_replies(self, commentId, part="snippet", maxResults=100):
        all_comments = []
        nextPageToken = None
        while maxResults > 0 and (nextPageToken != None or len(all_comments)==0):

            request = self.client.comments().list(
                part=part,
                parentId=commentId,
                maxResults=min(maxResults, 100),
                textFormat='plainText',
                pageToken=nextPageToken
            )

            response = request.execute()
            nextPageToken = response.get('nextPageToken')

            if 'items' in response and len(response['items'])>0:
                for item in response['items']:
                    modified_response = {
                        'items': [
                            {
                                'id':item.get('id'),
                                'snippet': {
                                    'topLevelComment': {
                                        'snippet': item.get('snippet','')
                                    }
                                }
                            }
                        ]
                    }
                    all_comments += [modified_response]
            maxResults -= min(maxResults, 100)
            if nextPageToken is None:
                break;    
        return all_comments

    # Format response from get_comments to dataframe
    def get_comments_df(response, video,product):
        comments = []
        for pages in response:
            for item in pages['items']:
                comment = item.get('snippet', {}).get('topLevelComment', {}).get('snippet', {})
                comments.append([
                        product,
                        video.get('title', ''),
                        video.get('videoId', ''),
                        video.get('channelTitle', ''),
                        video.get('publishTime', ''),
                        video.get('description', ''),
                        video.get('thumbnails', ''),
                        item.get('id', ''),  
                        comment.get('parentId', ''),  
                        comment.get('authorDisplayName', '')[1:],  
                        comment.get('publishedAt', ''),
                        comment.get('updatedAt', ''),
                        comment.get('likeCount', ''),
                        comment.get('textDisplay', '')
                    ])

        df = pd.DataFrame(comments,
            columns=['product', 'v_title', 'v_videoId',
                    'v_channelTitle', 'v_publishTime',
                    'v_description', 'v_thumbnail',
                    'c_id','c_parentId',
                    'c_author', 'c_published_at',
                    'c_updated_at', 'c_like_count',
                    'c_text'])
        
        return df
    
    # Search for videos related to products iteratively
    # Collect comments from each video and place it into an array
    def create_video_df_from_search(self, products,
                                    number_of_videos_per_product=5,
                                    number_of_comments_per_video=100
                                    ,number_of_replies_per_comment=100):
        multiple_video_comments = pd.DataFrame()
        for product in products:
            # get 25 first videos with the highest viewer counts 
            response = self.get_videos(query=product, maxResults=number_of_videos_per_product)
            # Convert results to df
            videos_df = api_handler.get_video_df(response)
            # For each video get a maximum of 100 comments
            # and place comments into an array
            for _, video in videos_df.iterrows():
                try:
                    response = self.get_comments(video['videoId'], maxResults=number_of_comments_per_video,maxResultsDepth=number_of_replies_per_comment)
                    comments_df = api_handler.get_comments_df(response, video, product)
                except:
                    # Function fails as the API returns 403 if the channel has comments disabled
                    # place an empty entry instead it can be deleted later
                    comments_df = pd.DataFrame(np.zeros((1, 14)),
                                                columns=['product', 'v_title', 'v_videoId',
                                                        'v_channelTitle', 'v_publishTime',
                                                        'v_description', 'v_thumbnail',
                                                        'c_id','c_parentId',
                                                        'c_author', 'c_published_at',
                                                        'c_updated_at', 'c_like_count',
                                                        'c_text'])
                    print('Unable to retrieve comments:', video.get('title', ''))
                multiple_video_comments = pd.concat([multiple_video_comments, comments_df], ignore_index=True)
        return multiple_video_comments
        
    # alternative method by explicitely specifying videos
    def create_video_df(self,products,videos,number_of_comments_per_video=100,number_of_replies_per_comment=100):
        count=0
        multiple_video_comments = pd.DataFrame()
        for product in products:
            for video in videos[count]:
                response = self.get_comments(video,maxResults=number_of_comments_per_video,maxResultsDepth=number_of_replies_per_comment) 
                video=self.get_video_details(video)
                comments_df = api_handler.get_comments_df(response, video, product)
                multiple_video_comments = pd.concat([multiple_video_comments, comments_df], ignore_index=True)
            count+=1
        return multiple_video_comments

In [30]:
products=["Midsommar (2019) Movie"]

# careful when adding videos index number should match between products and videos
# index 0 should contain the videos used to get comments for video 0 and so on 
videos=[['LhCQ7lHEjU8','Yh7PNUGxihU','8sXTfzaLmiQ'],
        ['c2xta7hcvXI','mkrrKGo1VEs','CL0wU3ss2uw','jPKm6kc9j5A','g0Oj4A2rslY']]

youtube=api_handler(api_service_name, api_version, DEVELOPER_KEY)


In [31]:
multiple_video_comments=youtube.create_video_df_from_search(products,number_of_videos_per_product=20,number_of_comments_per_video=20,number_of_replies_per_comment=0)
# multiple_video_comments=youtube.create_video_df(products,videos,number_of_comments_per_video=20000,number_of_replies_per_comment=20000)
multiple_video_comments

getting replies: 28
getting replies: 54
getting replies: 195
getting replies: 30
getting replies: 60
getting replies: 158
getting replies: 35
getting replies: 37
getting replies: 8
getting replies: 85
getting replies: 18
getting replies: 10
getting replies: 19
getting replies: 3
getting replies: 17
getting replies: 88
getting replies: 23
getting replies: 33
getting replies: 19
getting replies: 43
getting replies: 12
getting replies: 18
getting replies: 25
getting replies: 4
getting replies: 12
getting replies: 8
getting replies: 3
getting replies: 98
getting replies: 4
getting replies: 14
getting replies: 12
getting replies: 7
getting replies: 56
getting replies: 49
getting replies: 64
getting replies: 68
getting replies: 44
getting replies: 32
getting replies: 195
getting replies: 84
getting replies: 231
getting replies: 56
getting replies: 3
getting replies: 59
getting replies: 16
getting replies: 40
getting replies: 14
getting replies: 66
getting replies: 5
getting replies: 78
getti

Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,Ugz1VEBUjXHRrSx8JBR4AaABAg,,otwism8184,2021-02-03T00:04:59Z,2021-02-03T00:04:59Z,15627,"*at the start of the movie*\n- Dude, she needs..."
1,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgzJExDXzLQSE_b1mgp4AaABAg,,TechnogrrlLondon,2020-07-12T19:53:03Z,2020-07-12T19:53:03Z,8885,"After watching this movie, I will never accept..."
2,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgyFooHotcQmOvuq3cp4AaABAg,,secondhandembarrassment4564,2021-03-01T13:53:44Z,2021-03-01T13:53:44Z,23294,I’m Swedish and i can confirm we do this every...
3,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgxyycccD7ie3XYDh_14AaABAg,,denverdean2663,2021-07-18T14:23:43Z,2021-07-18T14:23:43Z,11838,Other horror movies: ☠️🧛🧟🎃💀👽\n\nMidsommar: 🌞🌻🌸🌈✨🌺
4,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,Ugw547MFZe0O_7eqDz94AaABAg,,alienillusi0n,2020-07-12T11:53:10Z,2020-07-12T11:53:10Z,6042,The scary thing is when you realise how vulner...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgyT0Zdu001V2avA_Ah4AaABAg,,sophiaruggeri3050,2022-09-26T09:50:38Z,2022-09-26T09:50:38Z,10,Rewatching the trailer jusr for the fun of it ...
385,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgzbVVQCBYS7bsLqxsp4AaABAg,,emmmmmmmmmmi,2024-02-08T08:52:39Z,2024-02-08T08:52:39Z,0,"Just watched this movie, incredible."
386,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgziC7gqcRBMCMI_Jjl4AaABAg,,dalauu,2023-04-19T20:55:34Z,2023-04-19T20:55:34Z,8,Art at its fucking BEST!
387,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgzJBKOnnH8cAy-479Z4AaABAg,,GardenofGore,2022-10-10T20:00:30Z,2022-10-10T20:00:30Z,2,Here is the new teaser for our bloody slasher ...


Taken from:

https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python

In [32]:
import re
from bs4 import BeautifulSoup

def remove_emojis(data):
    if isinstance(data, str):
        # Remove html tags
        data = BeautifulSoup(data, "html.parser").get_text()
        # Remove emote, etc
        emoj = re.compile("["
            u"\U0001F600-\U0001F64F"  # emoticons
            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
            u"\U0001F680-\U0001F6FF"  # transport & map symbols
            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
            u"\U00002500-\U00002BEF"  # chinese char
            u"\U00002702-\U000027B0"
            u"\U000024C2-\U0001F251"
            u"\U0001f926-\U0001f937"
            u"\U00010000-\U0010ffff"
            u"\u2640-\u2642" 
            u"\u2600-\u2B55"
            u"\u200d"
            u"\u23cf"
            u"\u23e9"
            u"\u231a"
            u"\ufe0f"  # dingbats
            u"\u3030"
                        "]+", re.UNICODE)
        # english_words = re.compile(r'\b[a-zA-Z]+\b')

        return re.sub(emoj, '', data)
    return 

In [33]:
multiple_video_comments.dropna(subset=['c_text'],inplace=True)

In [34]:
# remove emotes from the text to be analyzed c_text = comment text
multiple_video_comments['c_text']=multiple_video_comments['c_text'].apply(remove_emojis)

df_length_before = len(multiple_video_comments)
print("DataFrame Length Before:", df_length_before)

# drop duplicates
multiple_video_comments.drop_duplicates(inplace=True)

# drop rows with empty or text length <= 2 comments
multiple_video_comments = multiple_video_comments[multiple_video_comments['c_text'].apply(lambda x: len(x) > 2)]

df_length_after = len(multiple_video_comments)
print("DataFrame Length After:", df_length_after)

multiple_video_comments

DataFrame Length Before: 389
DataFrame Length After: 389


  data = BeautifulSoup(data, "html.parser").get_text()


Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,Ugz1VEBUjXHRrSx8JBR4AaABAg,,otwism8184,2021-02-03T00:04:59Z,2021-02-03T00:04:59Z,15627,"*at the start of the movie*\n- Dude, she needs..."
1,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgzJExDXzLQSE_b1mgp4AaABAg,,TechnogrrlLondon,2020-07-12T19:53:03Z,2020-07-12T19:53:03Z,8885,"After watching this movie, I will never accept..."
2,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgyFooHotcQmOvuq3cp4AaABAg,,secondhandembarrassment4564,2021-03-01T13:53:44Z,2021-03-01T13:53:44Z,23294,I’m Swedish and i can confirm we do this every...
3,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgxyycccD7ie3XYDh_14AaABAg,,denverdean2663,2021-07-18T14:23:43Z,2021-07-18T14:23:43Z,11838,Other horror movies: \n\nMidsommar:
4,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,Ugw547MFZe0O_7eqDz94AaABAg,,alienillusi0n,2020-07-12T11:53:10Z,2020-07-12T11:53:10Z,6042,The scary thing is when you realise how vulner...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgyT0Zdu001V2avA_Ah4AaABAg,,sophiaruggeri3050,2022-09-26T09:50:38Z,2022-09-26T09:50:38Z,10,Rewatching the trailer jusr for the fun of it ...
385,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgzbVVQCBYS7bsLqxsp4AaABAg,,emmmmmmmmmmi,2024-02-08T08:52:39Z,2024-02-08T08:52:39Z,0,"Just watched this movie, incredible."
386,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgziC7gqcRBMCMI_Jjl4AaABAg,,dalauu,2023-04-19T20:55:34Z,2023-04-19T20:55:34Z,8,Art at its fucking BEST!
387,Midsommar (2019) Movie,Midsommar | Official Trailer | HD | 2019 | Hor...,xb-lGS5RV5A,GARDEN OF GORE,2020-12-23T10:10:23Z,Midsommar - 2019. Directed by: Ari Aster. Stor...,https://i.ytimg.com/vi/xb-lGS5RV5A/default.jpg,UgzJBKOnnH8cAy-479Z4AaABAg,,GardenofGore,2022-10-10T20:00:30Z,2022-10-10T20:00:30Z,2,Here is the new teaser for our bloody slasher ...


Reference

https://stackoverflow.com/questions/40375366/pandas-to-csv-checking-for-overwrite

In [35]:
from langdetect import detect

def filter_comments(df):
    c = 0
    comments = []
    for index, row in df.iterrows():
        try:
            if detect(row['c_text']) == 'en':
                comments.append(row)
                c += 1
        except Exception as e:  # Catch any exception
            pass
    print("Number of English Comments: ", c)
    new_df = pd.DataFrame(comments, 
                columns=['product', 'v_title', 'v_videoId',
                    'v_channelTitle', 'v_publishTime',
                    'v_description', 'v_thumbnail',
                    'c_id','c_parentId',
                    'c_author', 'c_published_at',
                    'c_updated_at', 'c_like_count',
                    'c_text'])  # Create a new DataFrame from the list of rows
    # new_df = new_df.sort_values(by = ['c_like_count'], ascending = False)
    new_df = new_df[:200]
    return new_df

In [36]:
new_df = filter_comments(multiple_video_comments)

Number of English Comments:  327


In [37]:
new_df

Unnamed: 0,product,v_title,v_videoId,v_channelTitle,v_publishTime,v_description,v_thumbnail,c_id,c_parentId,c_author,c_published_at,c_updated_at,c_like_count,c_text
0,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,Ugz1VEBUjXHRrSx8JBR4AaABAg,,otwism8184,2021-02-03T00:04:59Z,2021-02-03T00:04:59Z,15627,"*at the start of the movie*\n- Dude, she needs..."
1,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgzJExDXzLQSE_b1mgp4AaABAg,,TechnogrrlLondon,2020-07-12T19:53:03Z,2020-07-12T19:53:03Z,8885,"After watching this movie, I will never accept..."
2,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgyFooHotcQmOvuq3cp4AaABAg,,secondhandembarrassment4564,2021-03-01T13:53:44Z,2021-03-01T13:53:44Z,23294,I’m Swedish and i can confirm we do this every...
3,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,UgxyycccD7ie3XYDh_14AaABAg,,denverdean2663,2021-07-18T14:23:43Z,2021-07-18T14:23:43Z,11838,Other horror movies: \n\nMidsommar:
4,Midsommar (2019) Movie,MIDSOMMAR | Official Trailer HD | A24,1Vnghdsjmd0,A24,2019-05-14T13:00:03Z,SUBSCRIBE: http://bit.ly/A24subscribe THIS SUM...,https://i.ytimg.com/vi/1Vnghdsjmd0/default.jpg,Ugw547MFZe0O_7eqDz94AaABAg,,alienillusi0n,2020-07-12T11:53:10Z,2020-07-12T11:53:10Z,6042,The scary thing is when you realise how vulner...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,Midsommar (2019) Movie,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,l6XWuruEKVM,ONE Media,2019-06-21T14:02:34Z,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,https://i.ytimg.com/vi/l6XWuruEKVM/default.jpg,Ugx7EMWUVWPYtzM1L5t4AaABAg,,beaudiu,2019-07-03T15:19:23Z,2019-07-03T15:19:45Z,251,If I saw an ad for this festival I'd be the fi...
198,Midsommar (2019) Movie,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,l6XWuruEKVM,ONE Media,2019-06-21T14:02:34Z,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,https://i.ytimg.com/vi/l6XWuruEKVM/default.jpg,UgxVWRj3Q2ILMrmp8lp4AaABAg,,samadams656,2019-06-23T09:17:30Z,2019-06-23T09:17:30Z,362,That violin though
199,Midsommar (2019) Movie,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,l6XWuruEKVM,ONE Media,2019-06-21T14:02:34Z,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,https://i.ytimg.com/vi/l6XWuruEKVM/default.jpg,Ugxaki603iv_ZWQEwuJ4AaABAg,,jasonjulian6436,2021-07-05T07:03:16Z,2021-07-05T07:03:16Z,57,"If you are a die hard horror fan, don't go int..."
200,Midsommar (2019) Movie,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,l6XWuruEKVM,ONE Media,2019-06-21T14:02:34Z,MIDSOMMAR Trailer # 3 (NEW 2019) by HEREDITARY...,https://i.ytimg.com/vi/l6XWuruEKVM/default.jpg,Ugzn692r2p-KkrOstsF4AaABAg,,namjignerak,2019-06-21T15:38:58Z,2019-06-21T15:38:58Z,2032,I hope the black guy is going to survive.


In [38]:
import os

# Creating a folder for the comments
directory = 'comments'
if not os.path.exists(directory):
    os.makedirs(directory)

for index, row in new_df.iterrows():
    # Different file path for each of the comments
    file_path = os.path.join(directory, f'comment_{index}.txt')
    
    # Write the comment content to the text file
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(row['c_text'])

Create CSV

In [39]:
import glob

filename = 'final_comments_df.csv'
files_present = glob.glob(filename)
# will only write to disk if file doesnt exist
if not files_present:
    new_df.to_csv(filename, index=False)
    new_df
else:
    print (f'File Already Exists. Delete {filename}' )