In [49]:
!pip install pandas
!pip install youtube_transcript_api
!pip install instaloader
!pip install google-api-python-client
!pip install youtube_transcript_api




In [56]:
import instaloader
from googleapiclient.discovery import build
import pandas as pd
import re
from urllib.parse import urlparse
from youtube_transcript_api import YouTubeTranscriptApi

def extract_comments_from_url(url, limit=100):
    # Parse the URL to identify the platform
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    if domain == 'www.instagram.com':
        # Extract comments from Instagram
        return extract_instagram_comments(url, limit)
    elif domain == 'www.youtube.com':
        # Extract comments from YouTube
        video_id = get_video_id(url)
        if video_id:
            return extract_youtube_comments(video_id, limit)
        else:
            print("Invalid YouTube URL.")
            return None
    else:
        print("Unsupported platform.")
        return None

def extract_instagram_comments(post_url, limit=100):
    # Create an instance of Instaloader
    loader = instaloader.Instaloader()
    loader.login(user="***", passwd="****")

    try:
        # Load the post by its URL
        post = instaloader.Post.from_shortcode(loader.context, post_url.split('/')[-2])
        
        # Initialize lists to store comments and usernames
        comments_list = []
        usernames_list = []
        
        # Counter for limiting the number of comments
        count = 0
        
        # Iterate over comments and store them in lists
        for comment in post.get_comments():
            comments_list.append(comment.text)
            usernames_list.append(comment.owner.username)  # Accessing the username through owner
            
            # Increment the counter
            count += 1
            
            # Break the loop if the limit is reached
            if count >= limit:
                break
        
        # Create a DataFrame
        df = pd.DataFrame({'Username': usernames_list, 'Comment': comments_list})
        
        return df
        
    except Exception as e:
        print("Error:", e)

def get_video_id(youtube_url):
    # Regular expression pattern to match YouTube video IDs
    pattern = r"(?<=v=)[a-zA-Z0-9_-]+(?=&|\?|$)"
    match = re.search(pattern, youtube_url)
    if match:
        return match.group(0)
    else:
        return None

def extract_youtube_comments(video_id, limit=100):
    # Initialize the YouTube API client
    api_key = "***"
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Set up the initial API request
    request = youtube.commentThreads().list(part="snippet,replies", videoId=video_id, textFormat="plainText", maxResults=100)

    # Lists to store comments, replies, user names, dates, and comment likes
    comments = []
    replies_list = []
    user_names = []
    dates = []
    comment_likes = []

    # Fetch comments in a loop until at least 100 comments are retrieved or there are no more comments
    while len(comments) < limit and request:
        try:
            # Execute the API request
            response = request.execute()

            # Iterate through the response items
            for item in response['items']:
                # Extract comment details
                comment_snippet = item['snippet']['topLevelComment']['snippet']
                if len(comment_snippet['textDisplay']) > 128:
                    continue

                comments.append(comment_snippet['textDisplay'])
                user_names.append(comment_snippet['authorDisplayName'])
                dates.append(comment_snippet['publishedAt'])

                # Extract comment likes
                comment_likes.append(comment_snippet.get('likeCount', 0))

                # Extract replies if available
                reply_count = item['snippet']['totalReplyCount']
                if reply_count > 0:
                    replies = [reply['snippet']['textDisplay'] for reply in item['replies']['comments']]
                else:
                    replies = []
                replies_list.append(replies)

            # Check if there are more pages of comments
            request = youtube.commentThreads().list_next(request, response)

            # Check if the limit has been reached
            if len(comments) >= limit:
                break

        except Exception as e:
            print(f"An error occurred: {str(e)}")
            break

    # Create a DataFrame from the collected data
    df = pd.DataFrame({"comment": comments[:limit], "user_name": user_names[:limit], "date": dates[:limit]})
    
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        print("\nTranscript:")
        for segment in transcript:
            print(segment['text'])
    except Exception as e:
        print(f"Failed to fetch transcript: {str(e)}")
    
    return df


In [57]:
url = "https://www.youtube.com/watch?v=77hrMO0ZarU"

extract_comments_from_url(url, limit=100)


Transcript:
[{'text': "i'm trapping my friend's bed so that", 'start': 0.08, 'duration': 3.68}, {'text': 'whenever he respawns he falls through', 'start': 1.599, 'duration': 4.081}, {'text': 'the bedrock and into the void you see', 'start': 3.76, 'duration': 4.159}, {'text': 'last week he pretended to be on my team', 'start': 5.68, 'duration': 4.72}, {'text': 'in the secret face war only to betray me', 'start': 7.919, 'duration': 4.481}, {'text': 'and steal my enchanted golden apple', 'start': 10.4, 'duration': 3.84}, {'text': 'steve for this to work i need his bed', 'start': 12.4, 'duration': 3.6}, {'text': 'location but since he used an enderpearl', 'start': 14.24, 'duration': 4.56}, {'text': 'stasis chamber to rob me i had no idea', 'start': 16.0, 'duration': 4.24}, {'text': 'where his bed could be i searched all', 'start': 18.8, 'duration': 3.84}, {'text': 'around the server with no luck until i', 'start': 20.24, 'duration': 4.48}, {'text': 'ran into his teammate branzie in the', 

Unnamed: 0,comment,user_name,date
0,Consider Subscribing if ya like the vid!,@rekrap2,2021-11-28T16:14:10Z
1,Team rekrap is more smart then Einstine,@III_Gaming3112,2024-02-05T12:26:42Z
2,Spoilers: the voidtrap didn't actually work an...,@erikpillosko3829,2024-02-04T06:03:44Z
3,Isn’t deepslate coal rare😅,@BagMan_Games,2024-01-17T22:38:41Z
4,I'm the biggest rekrapper ever,@45TR4L1TY,2024-01-07T06:59:22Z
...,...,...,...
95,Ryan Reynolds and Matt Damon team up against a...,@hacksarch,2023-04-06T08:31:34Z
96,Ü,@sam74775,2023-04-04T13:19:15Z
97,Hi,@justinham5000,2023-04-02T21:43:55Z
98,Me watching all the channels’ videos of this,@meepruns4636,2023-04-02T03:56:54Z
