In [7]:
# Install Packages
!pip install google-api-python-client google-cloud-bigquery-storage pycountry



In [None]:
# --- 1. SETUP AND AUTHENTICATION ---

from googleapiclient.discovery import build
from google.cloud import bigquery
from datetime import datetime
import pandas as pd
import pycountry
import time
import os

# Initialize the YouTube Data API client
youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))

print("‚úÖ YouTube client initialized successfully")

‚úÖ YouTube client initialized successfully


In [9]:
# --- 2. DEFINE REQUEST PARAMETERS ---

# --- Define African regions with all 54 countries ---
regions = {
    "East Africa": ["KE", "UG", "TZ", "RW", "BI", "ET", "SO", "DJ", "ER", "SC", "MG", "MU"],
    "West Africa": ["NG", "GH", "CI", "SN", "TG", "SL", "LR", "GM", "BF", "BJ", "NE", "ML", "GN", "GW", "CV"],
    "North Africa": ["EG", "DZ", "MA", "TN", "LY", "SD"],
    "Southern Africa": ["ZA", "NA", "BW", "MZ", "ZW", "ZM", "LS", "SZ", "AO", "MW"],
    "Central Africa": ["CM", "CD", "CG", "GA", "GQ", "CF", "TD", "ST"]
}

# The API returns results in pages; this keeps track of which page to fetch next
next_page_token = None

In [10]:
# --- Initialize ---
videos = []

for region, countries in regions.items():
    print(f"Collecting data for {region} ‚úÖ")
    
    for country in countries:
        # --- Get full country name ---
        try:
            country_name = pycountry.countries.get(alpha_2=country).name
        except:
            country_name = "Unknown"
        
        try:
            next_page_token = None
            while True:
                request = youtube.videos().list(
                    part="snippet,statistics",
                    chart="mostPopular",
                    regionCode=country,
                    maxResults=50,
                    pageToken=next_page_token
                )
            
                response = request.execute()

                # --- Create a category map ---
                category_response = youtube.videoCategories().list(
                    part="snippet",
                    regionCode=country
                ).execute()
                
                category_map = {item["id"]: item["snippet"]["title"] for item in category_response.get("items", [])}

                for item in response.get("items", []):
                    snippet = item.get("snippet", {})
                    stats = item.get("statistics", {})
            
                    # Get category info
                    category_id = snippet.get("categoryId")
                    category_name = category_map.get(category_id, "Unknown")

                    videos.append({
                        "country_code": country,
                        "country_name": country_name,  # ‚úÖ added country name
                        "region": region,
                        "video_id": item.get("id"),
                        "title": snippet.get("title"),
                        "description": snippet.get("description"),
                        "channel_title": snippet.get("channelTitle"),
                        "category_id": category_id,
                        "category_name": category_name,  # ‚úÖ new field
                        "published_at": snippet.get("publishedAt"),
                        "tags": snippet.get("tags", []),
                        "view_count": int(stats.get("viewCount", 0)),
                        "like_count": int(stats.get("likeCount", 0)),
                        "comment_count": int(stats.get("commentCount", 0))
                    })

                next_page_token = response.get("nextPageToken")
                if not next_page_token:
                    break

        except Exception as e:
            # print(f"  ‚ö†Ô∏è Skipping {country} ‚Äî Not supported or error occurred: {e}")
            continue

print(f"Total videos retrieved across Africa: {len(videos)} ‚úÖ")

Collecting data for East Africa ‚úÖ
Collecting data for West Africa ‚úÖ
Collecting data for North Africa ‚úÖ
Collecting data for Southern Africa ‚úÖ
Collecting data for Central Africa ‚úÖ
Total videos retrieved across Africa: 2109 ‚úÖ


In [11]:
# Convert to DataFrame
bigdata = pd.DataFrame(videos)
bigdata.drop_duplicates(subset=["video_id", "country_code", "country_name", "region"], keep="first", inplace=True)
bigdata = bigdata.reset_index(drop=True)

In [18]:
# Initialize BigQuery client
client = bigquery.Client(project='data-storage-485106')

In [19]:
# Define Table ID
table_id = 'data-storage-485106.youtube.trending_now'

# Export Data to BigQuery
job = client.load_table_from_dataframe(bigdata, table_id)
while job.state != 'DONE':
    time.sleep(2)
    job.reload()
    print(job.state)

DONE


In [20]:
# Define SQL Query to Retrieve Open Weather Data from Google Cloud BigQuery
sql = (
    'SELECT *'
    'FROM `data-storage-485106.youtube.trending_now`'
    'ORDER BY country_code'
      )
    
# Run SQL Query
data = client.query(sql).to_dataframe()

In [21]:
# Delete Original Table
client.delete_table(table_id)
print(f"Table deleted successfully.")
    
# Remove Duplicate Records
data.drop_duplicates(subset=["video_id", "country_code", "country_name", "region"], inplace=True)

Table deleted successfully.


In [None]:
# Define the dataset ID and table ID
dataset_id = 'youtube'
inner_table_id = 'trending_now'
    
# Define the BigQuery schema for YouTube trending videos
schema = [
    bigquery.SchemaField("country_code", "STRING"),
    bigquery.SchemaField("country_name", "STRING"),
    bigquery.SchemaField("region", "STRING"),
    bigquery.SchemaField("video_id", "STRING"),
    bigquery.SchemaField("title", "STRING"),
    bigquery.SchemaField("description", "STRING"),
    bigquery.SchemaField("channel_title", "STRING"),
    bigquery.SchemaField("category_id", "STRING"),
    bigquery.SchemaField("category_name", "STRING"),
    bigquery.SchemaField("published_at", "STRING"),
    bigquery.SchemaField("tags", "STRING"),  # optional: convert list to string before upload
    bigquery.SchemaField("view_count", "INTEGER"),
    bigquery.SchemaField("like_count", "INTEGER"),
    bigquery.SchemaField("comment_count", "INTEGER")
]

# Define the table reference
table_ref = client.dataset(dataset_id).table(inner_table_id)
    
# Create the table object
table = bigquery.Table(table_ref, schema=schema)

try:
    # Create the table in BigQuery
    table = client.create_table(table)
    print(f"Table {table.inner_table_id} created successfully.")
except Exception as e:
    print(f"Table {table.inner_table_id} failed")

Table trending_now created successfully.


In [25]:
# Define the BigQuery table ID
table_id = 'data-storage-485106.youtube.trending_now'

# Load the data into the BigQuery table
job = client.load_table_from_dataframe(data, table_id)

# Wait for the job to complete
while job.state != 'DONE':
    time.sleep(2)
    job.reload()
    print(job.state)

DONE


In [None]:
# Define SQL Query to Retrieve Open Weather Data from Google Cloud BigQuery
sql = (
    'SELECT *'
    'FROM `data-storage-485106.youtube.trending_now`'
    'ORDER BY country_name'
      )
    
# Run SQL Query
data = client.query(sql).to_dataframe()

In [None]:
data.shape

Unnamed: 0,country_code,country_name,region,video_id,title,description,channel_title,category_id,category_name,published_at,tags,view_count,like_count,comment_count
0,DZ,Algeria,North Africa,sr8fIUDD-vg,POSSO Fazer MEGA ROB√îS no Minecraft!,üì∏ Meu Instagram:\nhttps://www.instagram.com/_a...,Athos,20,Gaming,2026-01-21T13:31:06Z,"['minecraft', 'athos', 'athos gamer', 'minecra...",579800,5651,123
1,DZ,Algeria,North Africa,_9e0kuQlNhs,English SAKURA SCHOOL SIMULATOR: üòç Excited str...,Hey folks! Watch me play Sakura school simulat...,ahiplayz,20,Gaming,2026-01-21T11:07:07Z,[],416165,1338,0
2,DZ,Algeria,North Africa,DBC2KsLkoV0,Golo l oumi semhi liya ŸÇŸàŸÑŸà ŸÑŸÖŸä ÿ≥ÿ≠Ÿä ŸÑŸäÿß (Speci...,Provided to YouTube by DistroKid\n\nGolo l oum...,kamikabro - Topic,10,Music,2026-01-16T09:37:14Z,"['kamikabro', 'hajar egypte alam el fan mazzik...",64133,1064,45
3,DZ,Algeria,North Africa,7dgNijJQO_M,Long Slide Game With Cow Elephant Gorilla Hipp...,#animals3dbin #longslidegame #funny3danimals ...,Flygame Animals,1,Film & Animation,2026-01-21T05:05:31Z,[],750882,11190,198
4,DZ,Algeria,North Africa,epzngHfK_xE,Choo Choo Charles is Hunting me | Horror Gameplay,"Hello People , In this Video we are playing th...",Chahat Gaming 3.0,20,Gaming,2026-01-21T21:49:05Z,[],167800,710,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2104,ZW,Zimbabwe,Southern Africa,wby65kBRWzk,STEAL A BRAINROT GIVEAWAY LIVE | STEAL A BRAIN...,STEAL A BRAINROT GIVEAWAY LIVE | STEAL A BRAIN...,Joxy,20,Gaming,2026-01-17T18:01:04Z,"['steal a brainrot giveaway', 'steal a brainro...",874034,10079,49
2105,ZW,Zimbabwe,Southern Africa,lCqLnjLm6jE,üî¥SENEGAL vs MOROCCO - Africa Cup of Nations 20...,Subscribe Please! \nPes 21 - SENEGAL vs MOROCC...,Banchik,20,Gaming,2026-01-18T23:08:22Z,"['pes 2021', 'pes 2021 gameplay', 'soccer', 'p...",414545,570,2
2106,ZW,Zimbabwe,Southern Africa,ErnWUbjOKoU,SIDEMEN AMONG US DUMPER ROLE: COLLECT DEAD BOD...,üçó: Order food NOW at: https://www.eatsides.com...,MoreSidemen,22,People & Blogs,2025-12-23T19:20:00Z,"['sidemen', 'moresidemen', 'miniminter', 'ksi'...",5172071,149499,4624
2107,ZW,Zimbabwe,Southern Africa,DjNKhpOC6Ag,Shone - Kuchema (Official Music Video),Video was shot in Kuwadzana Extension where Sh...,Shone,10,Music,2026-01-16T08:00:06Z,[],15177,670,69
