● Data Extraction

1. Data Collection:

In [None]:
import googleapiclient.discovery
from googleapiclient.errors import HttpError
import io
import json
import os
import csv
import pandas as pd

In [None]:
# Set your API key here
API_KEY = 'AIzaSyAc-3AyUnHZnf-edqsUTNgpjmtDOG5_r4Q'

# Initialize the YouTube Data API v3 client
youtube = googleapiclient.discovery.build('youtube', 'v3', developerKey=API_KEY)

In [None]:
def search_videos(query, max_results=50):
    videos = []

    request = youtube.search().list(
        q=query,
        type='video',
        part='id',
        maxResults=max_results
    )

    response = request.execute()


    for item in response['items']:
        video_id = item['id']['videoId']
        videos.append(video_id)

    return videos

def get_video_comments_with_details(video_id):
    comments_data = []

    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100
        )
        response = request.execute()

        for comment in response['items']:
            comment_snippet = comment['snippet']
            top_level_comment = comment_snippet['topLevelComment']['snippet']
            text = top_level_comment['textDisplay']
            author = top_level_comment['authorDisplayName']
            author_id = top_level_comment['authorChannelId']['value']

            comments_data.append({
                'VideoID': video_id,
                'AuthorName': author,
                'AuthorID': author_id,
                'Comment': text
            })
    
    except HttpError as e:
        if "commentsDisabled" in str(e):
            print(f"Comments are disabled for video: {video_id}")
        else:
            print(f"Error fetching comments for video: {video_id}")
    
    return comments_data

if __name__ == '__main__':
    # Replace 'renewable energy sustainability' with your desired query
    query = 'renewable energy sustainability'

    # Search for videos
    videos = search_videos(query)

    # Fetch and save comments with details in a DataFrame
    comments_details = []
    for video_id in videos:
        video_comments = get_video_comments_with_details(video_id)
        comments_details.extend(video_comments)

    # Convert the list of comments with details into a DataFrame
    comments_df = pd.DataFrame(comments_details)

    # Save the DataFrame to a CSV file
    comments_df.to_csv('video_comments_with_details.csv', index=False)

    print("Comments with details have been stored in 'video_comments_with_details22.csv'")

2. Data Preprocessing:
    Clean and preprocess your data, including removing duplicates, handling missing data, and performing text normalization. 

In [None]:
import pandas as pd

# Load your data into a DataFrame (this example assumes you have a CSV file)
df = pd.read_csv('video_comments_with_details.csv')  # Replace with your data file

# Remove duplicate comments
df = df.drop_duplicates(subset='Comment')

# Remove blank comments (comments with only whitespace)
df = df[df['Comment'].str.strip() != '']

# Handle missing data (if applicable)
df = df.dropna(subset=['Comment'])

# Remove comments that don't contain meaningful text (e.g., comments with very few words)
min_comment_length = 3  # Adjust this threshold as needed
df = df[df['Comment'].str.split().str.len() >= min_comment_length]

# Perform text normalization (e.g., lowercasing and punctuation removal)
df['Comment'] = df['Comment'].str.lower()
df['Comment'] = df['Comment'].str.replace(r'[^\w\s]', '')

# Save the preprocessed data to a new file
df.to_csv('preprocessed_comments_data.csv', index=False)


3. Data Filtering

In [None]:
import pandas as pd

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('comments_with_sentiment.csv')

# Step 2: Define common keywords (replace with your list)
common_keywords = [
    "renewable", "sustainable", "climate", "clean energy", "conservation",
    "solar", "wind", "hydroelectric", "geothermal", "biomass", "tidal energy",
    "sustainable practices", "environmental sustainability", "social sustainability",
    "economic sustainability", "sustainable development",
    "climate action", "carbon footprint", "greenhouse gases", "climate mitigation",
    "climate adaptation",
    "clean power", "green energy", "clean technology", "low-carbon energy", "energy efficiency",
    "energy-saving", "energy conservation", "power reduction",
    "eco-conscious", "environmentally friendly", "eco-friendly products", "green living",
    "electric vehicles", "public transportation", "cycling", "sustainable mobility",
    "biodiversity", "habitat preservation", "conservation efforts",
    "recycling", "reuse", "reduce", "circular economy principles",
    "sustainable architecture", "LEED certification", "green building",
    "renewable sources", "renewable technology", "renewable power", "sustainable solutions",
    "sustainable living", "renewable practices", "sustainable initiatives",
    "clean environment", "green initiatives", "environmental conservation",
    "renewable economy", "sustainability goals", "renewable infrastructure", "sustainable transportation",
    "clean fuel", "green infrastructure", "sustainable consumption", "renewable innovations",
    "sustainable policies", "renewable investments", "sustainability measures",
    "clean power generation", "sustainable urban planning", "renewable solutions", "sustainability standards",
    "renewable technologies", "sustainability projects", "clean energy sources", "sustainable resources",
    "renewable initiatives", "sustainability practices", "clean energy solutions", "sustainable business",
    "renewable practices", "renewable management", "sustainability assessment", "clean energy systems",
    "sustainable development goals", "renewable conservation", "renewable strategies", "sustainability efforts",
    "clean energy policies", "green energy solutions", "sustainable practices",
    "renewable investments", "sustainability measures", "clean power generation", "sustainable urban planning",
    "renewable solutions", "sustainability standards", "renewable technologies", "sustainability projects",
    "clean energy sources", "sustainable resources", "renewable initiatives", "sustainability practices",
    "clean energy solutions", "sustainable business", "renewable practices", "renewable management",
    "sustainability assessment", "clean energy systems", "sustainable development goals", "renewable conservation",
    "renewable strategies", "sustainability efforts", "clean energy policies", "green energy solutions", "sustainable practices",
    "renewable investments", "sustainability measures", "clean power generation", "sustainable urban planning",
    "renewable solutions", "sustainability standards", "renewable technologies", "sustainability projects",
    "clean energy sources", "sustainable resources", "renewable initiatives", "sustainability practices",
    "clean energy solutions", "sustainable business", "renewable practices", "renewable management",
    "sustainability assessment", "clean energy systems", "sustainable development goals", "renewable conservation",
    "renewable strategies", "sustainability efforts", "clean energy policies", "green energy solutions", "sustainable practices", "energy", "solar", "nuclear", "green", "wind", "waste", "gas", "electricity",
    "fuel", "fossil", "oil", "plants", "coal", "water", "storage", "renewables", "technology",
    "earth", "greenhouse", "batteries", "fuels", "expensive", "hydrogen", "plant", "global",
    "turbines", "biomass", "land", "heat", "carbon", "co2", "sun", "environment", "human",
    "emissions", "uranium", "government", "recycling", "battery", "pollution", "tax", "price",
    "tons", "natural", "high", "production", "time", "use"
]


# Step 3: Create a function to check if a comment contains common keywords
def contains_common_keywords(comment):
    comment = comment.lower()
    for keyword in common_keywords:
        if keyword in comment:
            return True
    return False

# Step 4: Filter comments using the function
filtered_comments = df[df['Comment'].apply(contains_common_keywords)]

# Step 5: Save the new DataFrame to a new CSV file
filtered_comments.to_csv('filtered_comments2.csv', index=False)


Identify misconceptions or gaps in knowledge related to 
renewable source by analuzing top keywords in commentss

In [None]:
import pandas as pd
import nltk
from nltk import FreqDist
from nltk.corpus import stopwords
import string

# Load the comments with sentiment data
df = pd.read_csv('filtered_comments2.csv')

# Tokenize and preprocess the comments
df['Comment'] = df['Comment'].str.lower()
df['Comment'] = df['Comment'].str.replace(r'[^\w\s]', '')

# Separate comments by sentiment
positive_comments = df[df['Sentiment Category'] == 'Positive']['Comment']
negative_comments = df[df['Sentiment Category'] == 'Negative']['Comment']
neutral_comments = df[df['Sentiment Category'] == 'Neutral']['Comment']

# Define a list of stopwords
stop_words = set(stopwords.words("english"))

# Common keywords for identifying frequency
common_keywords = [
    "renewable", "sustainable", "climate", "clean energy", "conservation",
    "solar", "wind", "hydroelectric", "geothermal", "biomass", "tidal energy",
    "sustainable practices", "environmental sustainability", "social sustainability",
    "economic sustainability", "sustainable development",
    "climate action", "carbon footprint", "greenhouse gases", "climate mitigation",
    "climate adaptation",
    "clean power", "green energy", "clean technology", "low-carbon energy", "energy efficiency",
    "energy-saving", "energy conservation", "power reduction",
    "eco-conscious", "environmentally friendly", "eco-friendly products", "green living",
    "electric vehicles", "public transportation", "cycling", "sustainable mobility",
    "biodiversity", "habitat preservation", "conservation efforts",
    "recycling", "reuse", "reduce", "circular economy principles",
    "sustainable architecture", "LEED certification", "green building",
    "renewable sources", "renewable technology", "renewable power", "sustainable solutions",
    "sustainable living", "renewable practices", "sustainable initiatives",
    "clean environment", "green initiatives", "environmental conservation",
    "renewable economy", "sustainability goals", "renewable infrastructure", "sustainable transportation",
    "clean fuel", "green infrastructure", "sustainable consumption", "renewable innovations",
    "sustainable policies", "renewable investments", "sustainability measures",
    "clean power generation", "sustainable urban planning", "renewable solutions", "sustainability standards",
    "renewable technologies", "sustainability projects", "clean energy sources", "sustainable resources",
    "renewable initiatives", "sustainability practices", "clean energy solutions", "sustainable business",
    "renewable practices", "renewable management", "sustainability assessment", "clean energy systems",
    "sustainable development goals", "renewable conservation", "renewable strategies", "sustainability efforts",
    "clean energy policies", "green energy solutions", "sustainable practices",
    "renewable investments", "sustainability measures", "clean power generation", "sustainable urban planning",
    "renewable solutions", "sustainability standards", "renewable technologies", "sustainability projects",
    "clean energy sources", "sustainable resources", "renewable initiatives", "sustainability practices",
    "clean energy solutions", "sustainable business", "renewable practices", "renewable management",
    "sustainability assessment", "clean energy systems", "sustainable development goals", "renewable conservation",
    "renewable strategies", "sustainability efforts", "clean energy policies", "green energy solutions", "sustainable practices",
    "energy", "solar", "nuclear", "green", "wind", "waste", "gas", "electricity",
    "fuel", "fossil", "oil", "plants", "coal", "water", "storage", "renewables", "technology",
    "earth", "greenhouse", "batteries", "fuels", "expensive", "hydrogen", "plant", "global",
    "turbines", "biomass", "land", "heat", "carbon", "co2", "sun", "environment", "human",
    "emissions", "uranium", "government", "recycling", "battery", "pollution", "tax", "price",
    "tons", "natural", "high", "production", "time", "use"
]

# Function to extract keywords and calculate frequency for a given sentiment
def extract_keywords(sentiment_comments, common_keywords):
    all_sentiment_comments = ' '.join(sentiment_comments)
    
    # Tokenize the text
    tokens = nltk.word_tokenize(all_sentiment_comments)
    
    # Remove punctuation and symbols
    tokens = [word for word in tokens if word not in string.punctuation]
    
    # Remove stopwords
    tokens = [word for word in tokens if word not in stop_words]
    
    # Filter keywords based on common_keywords
    tokens = [word for word in tokens if word in common_keywords]
    
    fdist = FreqDist(tokens)
    common_keywords = fdist.most_common(100)  # Change the number as needed
    
    return common_keywords

# Extract keywords for each sentiment
positive_keywords = extract_keywords(positive_comments, common_keywords)
negative_keywords = extract_keywords(negative_comments, common_keywords)
neutral_keywords = extract_keywords(neutral_comments, common_keywords)

# Create DataFrames from the results
df_positive = pd.DataFrame(positive_keywords, columns=['Keyword', 'Frequency'])
df_negative = pd.DataFrame(negative_keywords, columns=['Keyword', 'Frequency'])
df_neutral = pd.DataFrame(neutral_keywords, columns=['Keyword', 'Frequency'])

# Export DataFrames to CSV files
df_positive.to_csv('positive_keywords.csv', index=False)
df_negative.to_csv('negative_keywords.csv', index=False)
df_neutral.to_csv('neutral_keywords.csv', index=False)


Top watched vedios on youtube

In [None]:
# Setting   API key
import googleapiclient.discovery
import datetime

# Setting our API key
API_KEY = 'AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY'

# Create a YouTube Data API service
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey='AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY')

# Function to search for renewable and clean energy videos
def search_renewable_energy_videos(api_key):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    search_response = youtube.search().list(
        q="renewable energy",
        type="video",
        order="viewCount",
        part="id",
        maxResults=50  # You can adjust the number of results
    ).execute()

    return search_response

# Function to get the video statistics (including view counts) by video ID
def get_video_statistics(api_key, video_ids):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    video_response = youtube.videos().list(
        id=",".join(video_ids),
        part="snippet,statistics"
    ).execute()

    return video_response

# Function to extract the time of day from video publishedAt timestamps
def get_time_of_day_from_published_at(video_response):
    times_of_day = []
    for item in video_response.get("items", []):
        published_at = item["snippet"]["publishedAt"]
        timestamp = datetime.datetime.fromisoformat(published_at.replace("Z", "+00:00"))
        times_of_day.append(timestamp.strftime("%H:%M"))
    return times_of_day

# Main code
search_results = search_renewable_energy_videos(API_KEY)
video_ids = [result["id"]["videoId"] for result in search_results.get("items", [])]

video_response = get_video_statistics(API_KEY, video_ids)
times_of_day = get_time_of_day_from_published_at(video_response)

# Count and find the most common time of day
from collections import Counter
time_counts = Counter(times_of_day)
most_common_time = time_counts.most_common(1)[0]

print(f"Most Common Time of Day to Watch Renewable Energy Videos: {most_common_time[0]}")




Importing view count,duration,likes,etc of a vedio

In [None]:
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sns

api_key = 'AIzaSyAdw62weNnmR82iadUwd6yIgevOjpBJ5dI'
#channel_id = 'UCnz-ZXXER4jOvuED5trXfEA'
channel_ids = ['UCnz-ZXXER4jOvuED5trXfEA', # techTFQ
               'UCLLw7jmFsvfIVaUFsLs8mlQ', # Luke Barousse
               'UCiT9RITQ9PW6BhXK0y2jaeg', # Ken Jee
               'UC7cs8q-gJRlGwj4A8OmCmXg', # Alex the analyst
               'UC2UXDak6o7rBm23k3Vv5dww' ,# Tina Huang
                'UCCz5pvnMksWgbWh5MKMH-vQ'  #tatapower
              ]

youtube = build('youtube', 'v3', developerKey=api_key)


def get_channel_stats(youtube, channel_ids):
    all_data = []
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics',
        id=','.join(channel_ids))
    response = request.execute()

    for i in range(len(response['items'])):
        data = dict(Channel_name=response['items'][i]['snippet']['title'],
                    Subscribers=response['items'][i]['statistics']['subscriberCount'],
                    Views=response['items'][i]['statistics']['viewCount'],
                    Total_videos=response['items'][i]['statistics']['videoCount'],
                    playlist_id=response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)

    return all_data

channel_statistics = get_channel_stats(youtube, channel_ids)

print(channel_statistics)

channel_data = pd.DataFrame(channel_statistics)
channel_data['Subscribers'] = pd.to_numeric(channel_data['Subscribers'])
channel_data['Views'] = pd.to_numeric(channel_data['Views'])
channel_data['Total_videos'] = pd.to_numeric(channel_data['Total_videos'])
channel_data.dtypes
print(channel_data)


import os
import googleapiclient.discovery

# Set your API key and the YouTube video ID
# Set your API key and the YouTube video ID
API_KEY = 'AIzaSyAdw62weNnmR82iadUwd6yIgevOjpBJ5dI'
VIDEO_ID = "RD4tOTaqSTI"
# Creating a YouTube Data API service
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey='AIzaSyAdw62weNnmR82iadUwd6yIgevOjpBJ5dI')

# Getting video details including content details
video_response = youtube.videos().list(
    part="snippet,contentDetails,statistics",
    id=VIDEO_ID
).execute()

# Extracting the view count, video duration
view_count = int(video_response["items"][0]["statistics"]["viewCount"])
video_duration_iso = video_response["items"][0]["contentDetails"]["duration"]
print("view count is",view_count,"and vedio duration is=",video_duration_iso)

# Converting video duration from ISO 8601 format to seconds
video_duration = 0
if "M" in video_duration_iso and "S" in video_duration_iso:
    minutes = int(video_duration_iso.split("M")[0][2:])
    seconds = int(video_duration_iso.split("M")[1][0:-1])
    video_duration = minutes * 60 + seconds
elif "M" in video_duration_iso:
    minutes = int(video_duration_iso.split("M")[0][2:])
    video_duration = minutes * 60
elif "S" in video_duration_iso:
    seconds = int(video_duration_iso.split("S")[0])
    video_duration = seconds

# Calculating the first 10% of the video duration
first_10_percent = video_duration / 10

# Calculating the estimated number of viewers for the first 10%
estimated_viewers_first_10_percent = int((view_count * first_10_percent) / video_duration)

# Printing the results
print(f"Total Views: {view_count}")
print(f"Video Duration: {video_duration} seconds")
print(f"Estimated Viewers for First 10%: {estimated_viewers_first_10_percent}")

#the above code we just assumed that the viewer is watching in a linear fashion




If we are a channel and if we have the view durations,we can plot a normal distribution for te same to assess how much duration the users are watching

In [None]:
# Simulated viewer watch times
viewer_watch_times = [602, 720, 830, 410, 302, 752, 930, 810, 730, 60, 712, 230, 1500, 800]

# Calculate the time threshold for the first 30% of the video
total_watch_time = sum(viewer_watch_times)
threshold_time = 0.3 * total_watch_time

# Calculate the number of viewers who watched the first 30%
num_viewers_watched_30_percent = sum(time <= threshold_time for time in viewer_watch_times)

# Print the result
print(f"Number of Viewers who Watched the First 30%: {num_viewers_watched_30_percent}")

import numpy as np
from statistics import mean, median, mode


# Calculate mean, median, and mode
mean_watch_time = mean(viewer_watch_times)
median_watch_time = median(viewer_watch_times)
try:
    mode_watch_time = mode(viewer_watch_times)
except StatisticsError:
    mode_watch_time = "No unique mode found"

# Print the results
print(f"Mean Watch Time: {mean_watch_time:.2f} seconds")
print(f"Median Watch Time: {median_watch_time:.2f} seconds")
print(f"Mode Watch Time: {mode_watch_time}")

std_deviation = np.std(viewer_watch_times)

print(f"Standard Deviation: {std_deviation:.2f}")

import numpy as np
import matplotlib.pyplot as plt



# Generate simulated viewer watch times
watch_times = np.random.normal(mean_watch_time, std_deviation, view_count)

# Create a histogram
plt.figure(figsize=(10, 6))
plt.hist(watch_times, bins=20, edgecolor='black', alpha=0.7)
plt.title("Viewer Watch Time Histogram")
plt.xlabel("Watch Time (seconds)")
plt.ylabel("Number of Viewers")
plt.grid(True)

# Calculate statistics
mean_watch = np.mean(watch_times)
median_watch = np.median(watch_times)
std_dev = np.std(watch_times)

# Display statistics
print(f"Mean Watch Time: {mean_watch:.2f} seconds")
print(f"Median Watch Time: {median_watch:.2f} seconds")
print(f"Standard Deviation: {std_dev:.2f} seconds")

# Show the histogram
plt.show()



Top 50 vedios for the search "Renewable and sustainable energy"

In [None]:
#finding most watched content on youtube w.r.t. renewable energy

import googleapiclient.discovery

# Set your API key
API_KEY = 'AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY'

# Create a YouTube Data API service
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey='AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY')

# Search for renewable energy content
def search_renewable_energy_videos(api_key):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    search_response = youtube.search().list(
        q="renewable energy",
        type="video",
        order="viewCount",
        part="id",
        maxResults=10
    ).execute()

    return search_response

# Print the top results
search_results = search_renewable_energy_videos(API_KEY)
for search_result in search_results.get("items", []):
    video_id = search_result["id"]["videoId"]
    print(f"Video ID: {video_id}")


Time of the day when people watch "Sustainable energy content the most"

In [None]:
# Setting   API key
import googleapiclient.discovery
import datetime

# Setting our API key
API_KEY = 'AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY'

# Create a YouTube Data API service
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey='AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY')

# Function to search for renewable and clean energy videos
def search_renewable_energy_videos(api_key):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    search_response = youtube.search().list(
        q="renewable energy",
        type="video",
        order="viewCount",
        part="id",
        maxResults=50  # You can adjust the number of results
    ).execute()

    return search_response

# Function to get the video statistics (including view counts) by video ID
def get_video_statistics(api_key, video_ids):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    video_response = youtube.videos().list(
        id=",".join(video_ids),
        part="snippet,statistics"
    ).execute()

    return video_response

# Function to extract the time of day from video publishedAt timestamps
def get_time_of_day_from_published_at(video_response):
    times_of_day = []
    for item in video_response.get("items", []):
        published_at = item["snippet"]["publishedAt"]
        timestamp = datetime.datetime.fromisoformat(published_at.replace("Z", "+00:00"))
        times_of_day.append(timestamp.strftime("%H:%M"))
    return times_of_day

# Main code
search_results = search_renewable_energy_videos(API_KEY)
video_ids = [result["id"]["videoId"] for result in search_results.get("items", [])]

video_response = get_video_statistics(API_KEY, video_ids)
times_of_day = get_time_of_day_from_published_at(video_response)

# Count and find the most common time of day
from collections import Counter
time_counts = Counter(times_of_day)
most_common_time = time_counts.most_common(1)[0]

print(f"Most Common Time of Day to Watch Renewable Energy Videos: {most_common_time[0]}")


Collecting data of all the top 50 vedios

In [None]:
import os
import pandas as pd
from googleapiclient.discovery import build

# Set your YouTube Data API key
api_key = 'AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY'

# Set the topic you want to search for
topic = 'renewable energy and sustainability'

# Set the maximum number of results
max_results = 50

# Define a function to collect video data
def collect_top_videos(api_key, topic, max_results):
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Search for videos related to the topic and order by view count
    search_response = youtube.search().list(
        q=topic,
        type='video',
        part='id',
        maxResults=max_results,
        order='viewCount'
    ).execute()

    video_data = []
    for item in search_response['items']:
        video_id = item['id']['videoId']
        video_details = youtube.videos().list(part='snippet,statistics', id=video_id).execute()
        snippet = video_details['items'][0]['snippet']
        statistics = video_details['items'][0]['statistics']
        video_data.append({
            'Video ID': video_id,
            'Title': snippet['title'],
            'Description': snippet['description'],
            'Views': statistics['viewCount'],
            'Likes': statistics.get('likeCount', 0),
        })

    return video_data

# Analyze video data
def analyze_video_data(video_data):
    df = pd.DataFrame(video_data)
    return df

if __name__ == '__main__':
    # Collect data for the top 50 videos
    video_data = collect_top_videos(api_key, topic, max_results)

    # Analyze video data
    df = analyze_video_data(video_data)

    # Display the DataFrame
    pd.set_option('display.max_columns', None)
    pd.set_option('display.expand_frame_repr', False)
    print(df)

# Group all descriptions into one list
    all_descriptions = df['Description'].tolist()

    # Display the list of descriptions
    for description in all_descriptions:
        print(description)

####
import os
import pandas as pd
from googleapiclient.discovery import build

# Set your YouTube Data API key
api_key = 'AIzaSyBAWKHwXeOxokQfCx6RvNxJkX8BFc2scTY'

# Set the topic you want to search for
topic = 'renewable energy and sustainability'

# Set the maximum number of results
max_results = 50

# Define a function to collect video data
def collect_top_videos(api_key, topic, max_results):
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Search for videos related to the topic and order by view count
    search_response = youtube.search().list(
        q=topic,
        type='video',
        part='id',
        maxResults=max_results,
        order='viewCount'
    ).execute()

    video_data = []
    for item in search_response['items']:
        video_id = item['id']['videoId']
        video_details = youtube.videos().list(part='snippet', id=video_id).execute()
        snippet = video_details['items'][0]['snippet']
        video_data.append({
            'Video ID': video_id,
            'Title': snippet['title'],
            'Description': snippet['description'],
        })

    return video_data

# Analyze video data
def analyze_video_data(video_data):
    df = pd.DataFrame(video_data)
    return df

if __name__ == '__main__':
    # Collect data for the top 50 videos
    video_data = collect_top_videos(api_key, topic, max_results)

    # Analyze video data
    df = analyze_video_data(video_data)

    # Save description-related data to a CSV file
    df[['Video ID', 'Description']].to_csv('video_description_data.csv', index=False)

