In [15]:
import pandas as pd
from googleapiclient.discovery import build
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.stem import PorterStemmer

# YouTube Data API key
api_key = 'AIzaSyAc-3AyUnHZnf-edqsUTNgpjmtDOG5_r4Q'

# Function to search for videos and extract video information along with channel info
def search_videos(youtube, query, max_results=10):
    search_response = youtube.search().list(
        q=query,
        type="video",
        part="id,snippet",
        maxResults=max_results
    ).execute()

    video_data = []
    for search_result in search_response.get("items", []):
        video_id = search_result["id"]["videoId"]
        video_title = search_result["snippet"]["title"]
        channel_id = search_result["snippet"]["channelId"]
        channel_title = search_result["snippet"]["channelTitle"]
        video_description = search_result["snippet"]["description"]
        video_data.append({
            "Channel Name": channel_title,
            "Video Title": video_title,
            "Channel ID": channel_id,
            "Video ID": video_id,
            "Video Description": video_description
        })

    return pd.DataFrame(video_data)

# Function to get the channels that users watch based on video IDs
def get_watched_channels(youtube, video_ids):
    channel_ids = set()
    
    for video_id in video_ids:
        video_response = youtube.videos().list(
            id=video_id,
            part="snippet"
        ).execute()

        if video_response.get("items"):
            channel_id = video_response["items"][0]["snippet"]["channelId"]
            channel_ids.add(channel_id)
    
    return list(channel_ids)

# Function to get channel names from channel IDs
def get_channel_names(youtube, channel_ids):
    channel_data = []
    
    for channel_id in channel_ids:
        channel_response = youtube.channels().list(
            id=channel_id,
            part="snippet"
        ).execute()
        
        if channel_response.get("items"):
            channel_name = channel_response["items"][0]["snippet"]["title"]
            channel_data.append({"Channel Name": channel_name, "Channel ID": channel_id})
    
    return pd.DataFrame(channel_data)

# Function to extract keywords from text
def extract_keywords(text):
    # Tokenize the text
    words = word_tokenize(text)
    
    # Remove stopwords and punctuation
    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word.isalpha() and word.lower() not in stop_words]
    
    # Stem the words (optional)
    stemmer = PorterStemmer()
    words = [stemmer.stem(word) for word in words]
    
    # Calculate word frequency
    fdist = FreqDist(words)
    
    # Get the most common keywords
    keywords = fdist.most_common(10)
    
    return keywords

# Create a YouTube API service
youtube = build('youtube', 'v3', developerKey=api_key)

# Part 1: Search for renewable and sustainability videos and extract video information with channel info
renewable_videos_query = "renewable energy and sustainability"
video_data_df = search_videos(youtube, renewable_videos_query)
print("Table 1: Video Titles, Video IDs, Channel Names, Channel IDs, and Video Descriptions from Renewable and Sustainability Videos")
print(video_data_df)

# Part 2: Get the channels that users watch based on video IDs
video_ids = video_data_df["Video ID"].tolist()
channel_ids = get_watched_channels(youtube, video_ids)
channel_data_df = get_channel_names(youtube, channel_ids)
print("\nTable 2: Channel Names and Channel IDs Watched Alongside Renewable and Sustainability Videos")
print(channel_data_df)

# Part 3: Extract keywords from video titles and descriptions
keywords_data = []
for video_title, video_description in zip(video_data_df["Video Title"], video_data_df["Video Description"]):
    keywords_title = extract_keywords(video_title)
    keywords_description = extract_keywords(video_description)
    keywords_data.append({
        "Video Keywords (Title)": keywords_title,
        "Video Keywords (Description)": keywords_description
    })

# Create a DataFrame for extracted keywords
keywords_df = pd.DataFrame(keywords_data)
print("\nTable 3: Extracted Keywords from Video Titles and Descriptions")
print(keywords_df)

# Part 4: Save the tables to CSV files
video_data_df.to_csv(r"C:\Users\anjal\Downloads\video_data.csv", index=False)
channel_data_df.to_csv(r"C:\Users\anjal\Downloads\channel_data.csv", index=False)
keywords_df.to_csv(r"C:\Users\anjal\Downloads\keywords.csv", index=False)

# Part 5: Check for common_keywords
common_keywords = [
    "renewable", "sustainable", "climate", "clean energy", "conservation",
    "solar", "wind", "hydroelectric", "geothermal", "biomass", "tidal energy",
    "sustainable practices", "environmental sustainability", "social sustainability",
    "economic sustainability", "sustainable development",
    "climate action", "carbon footprint", "greenhouse gases", "climate mitigation",
    "climate adaptation",
    "clean power", "green energy", "clean technology", "low-carbon energy", "energy efficiency",
    "energy-saving", "energy conservation", "power reduction",
    "eco-conscious", "environmentally friendly", "eco-friendly products", "green living",
    "electric vehicles", "public transportation", "cycling", "sustainable mobility",
    "biodiversity", "habitat preservation", "conservation efforts",
    "recycling", "reuse", "reduce", "circular economy principles",
    "sustainable architecture", "LEED certification", "green building",
    "renewable sources", "renewable technology", "renewable power", "sustainable solutions",
    "sustainable living", "renewable practices", "sustainable initiatives",
    "clean environment", "green initiatives", "environmental conservation",
    "renewable economy", "sustainability goals", "renewable infrastructure", "sustainable transportation",
    "clean fuel", "green infrastructure", "sustainable consumption", "renewable innovations",
    "sustainable policies", "renewable investments", "sustainability measures",
    "clean power generation", "sustainable urban planning", "renewable solutions", "sustainability standards",
    "renewable technologies", "sustainability projects", "clean energy sources", "sustainable resources",
    "renewable initiatives", "sustainability practices", "clean energy solutions", "sustainable business",
    "renewable practices", "renewable management", "sustainability assessment", "clean energy systems",
    "sustainable development goals", "renewable conservation", "renewable strategies", "sustainability efforts",
    "clean energy policies", "green energy solutions", "sustainable practices",
    "renewable investments", "sustainability measures", "clean power generation", "sustainable urban planning",
    "renewable solutions", "sustainability standards", "renewable technologies", "sustainability projects",
    "clean energy sources", "sustainable resources", "renewable initiatives", "sustainability practices",
    "clean energy solutions", "sustainable business", "renewable practices", "renewable management",
    "sustainability assessment", "clean energy systems", "sustainable development goals", "renewable conservation",
    "renewable strategies", "sustainability efforts", "clean energy policies", "green energy solutions", "sustainable practices"
]

found_keywords = [keyword for keyword in common_keywords if any(keyword in text.lower() for text in video_data_df["Video Title"])]

print("\nCommon Keywords Found in Video Titles:")
for keyword in found_keywords:
    print(keyword)



Table 1: Video Titles, Video IDs, Channel Names, Channel IDs, and Video Descriptions from Renewable and Sustainability Videos
              Channel Name                                        Video Title  \
0                   TED-Ed  Can 100% renewable energy power the world? - F...   
1         Planète Energies  What Is the Difference Between Renewable Energ...   
2  Interesting Engineering            Is renewable energy really sustainable?   
3      National Geographic         Renewable Energy 101 | National Geographic   
4            The Economist  Green energy: Which sources are the most susta...   
5   Northumbria University  Renewable and Sustainable Energy Technologies ...   
6               TEDx Talks  Why renewables can’t save the planet | Michael...   
7                      TED  A Faster Way to Get to a Clean Energy Future |...   
8                    Axios        Pushing Forward on the Path to Clean Energy   
9                DeclanLTD            Elon Musk on Renewable Ene