In [None]:
# Install Packages
!pip install google-api-python-client google-cloud-bigquery-storage pycountry



In [None]:
# --- 1. SETUP AND AUTHENTICATION ---

from googleapiclient.discovery import build
from google.cloud import bigquery
from datetime import datetime
import pandas as pd
import pycountry
import time
import os

# Initialize the YouTube Data API client
youtube = build("youtube", "v3", developerKey=os.getenv("YOUTUBE_API_KEY"))

print("✅ YouTube client initialized successfully")

✅ YouTube client initialized successfully


In [None]:
# --- 2. DEFINE REQUEST PARAMETERS ---

# --- Define African regions with all 54 countries ---
regions = {
    "East Africa": ["KE", "UG", "TZ", "RW", "BI", "ET", "SO", "DJ", "ER", "SC", "MG", "MU"],
    "West Africa": ["NG", "GH", "CI", "SN", "TG", "SL", "LR", "GM", "BF", "BJ", "NE", "ML", "GN", "GW", "CV"],
    "North Africa": ["EG", "DZ", "MA", "TN", "LY", "SD"],
    "Southern Africa": ["ZA", "NA", "BW", "MZ", "ZW", "ZM", "LS", "SZ", "AO", "MW"],
    "Central Africa": ["CM", "CD", "CG", "GA", "GQ", "CF", "TD", "ST"]
}

# The API returns results in pages; this keeps track of which page to fetch next
next_page_token = None

In [None]:
# --- Initialize ---
videos = []

for region, countries in regions.items():
    print(f"Collecting data for {region} ✅")

    for country in countries:
        # --- Get full country name ---
        try:
            country_name = pycountry.countries.get(alpha_2=country).name
        except:
            country_name = "Unknown"

        try:
            next_page_token = None
            while True:
                request = youtube.videos().list(
                    part="snippet,statistics",
                    chart="mostPopular",
                    regionCode=country,
                    maxResults=50,
                    pageToken=next_page_token
                )

                response = request.execute()

                # --- Create a category map ---
                category_response = youtube.videoCategories().list(
                    part="snippet",
                    regionCode=country
                ).execute()

                category_map = {item["id"]: item["snippet"]["title"] for item in category_response.get("items", [])}

                for item in response.get("items", []):
                    snippet = item.get("snippet", {})
                    stats = item.get("statistics", {})

                    # Get category info
                    category_id = snippet.get("categoryId")
                    category_name = category_map.get(category_id, "Unknown")

                    videos.append({
                        "country_code": country,
                        "country_name": country_name,  # ✅ added country name
                        "region": region,
                        "video_id": item.get("id"),
                        "title": snippet.get("title"),
                        "description": snippet.get("description"),
                        "channel_title": snippet.get("channelTitle"),
                        "category_id": category_id,
                        "category_name": category_name,  # ✅ new field
                        "published_at": snippet.get("publishedAt"),
                        "tags": snippet.get("tags", []),
                        "view_count": int(stats.get("viewCount", 0)),
                        "like_count": int(stats.get("likeCount", 0)),
                        "comment_count": int(stats.get("commentCount", 0))
                    })

                next_page_token = response.get("nextPageToken")
                if not next_page_token:
                    break

        except Exception as e:
            # print(f"  ⚠️ Skipping {country} — Not supported or error occurred: {e}")
            continue

print(f"Total videos retrieved across Africa: {len(videos)} ✅")

Collecting data for East Africa ✅
Collecting data for West Africa ✅
Collecting data for North Africa ✅
Collecting data for Southern Africa ✅
Collecting data for Central Africa ✅
Total videos retrieved across Africa: 2044 ✅


In [None]:
# Convert to DataFrame
bigdata = pd.DataFrame(videos)
bigdata.drop_duplicates(subset=["video_id", "country_code", "country_name", "region"], keep="first", inplace=True)
bigdata = bigdata.reset_index(drop=True)

In [None]:
bigdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2044 entries, 0 to 2043
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   country_code   2044 non-null   object
 1   country_name   2044 non-null   object
 2   region         2044 non-null   object
 3   video_id       2044 non-null   object
 4   title          2044 non-null   object
 5   description    2044 non-null   object
 6   channel_title  2044 non-null   object
 7   category_id    2044 non-null   object
 8   category_name  2044 non-null   object
 9   published_at   2044 non-null   object
 10  tags           2044 non-null   object
 11  view_count     2044 non-null   int64 
 12  like_count     2044 non-null   int64 
 13  comment_count  2044 non-null   int64 
dtypes: int64(3), object(11)
memory usage: 223.7+ KB


In [None]:
bigdata.head()

Unnamed: 0,country_code,country_name,region,video_id,title,description,channel_title,category_id,category_name,published_at,tags,view_count,like_count,comment_count
0,KE,Kenya,East Africa,ws9CaAY6oww,BAHATI - BYE BYE BABA (Raila Odinga Tribute Song),#bahati #bahatikenya #byebyebaba #tribute #rai...,Bahati Kenya,10,Music,2025-10-16T07:00:07Z,"[Bahati, Bahati kenya, kenya, baba, bye bye ba...",1805341,87341,6836
1,KE,Kenya,East Africa,R4wiXj9NmEE,Send Help | Official Trailer | In Theaters Jan 30,Rachel McAdams. Dylan O’Brien. SEND HELP.\n\nD...,20th Century Studios,1,Film & Animation,2025-10-14T16:00:36Z,[Trailer],3188090,62731,4246
2,KE,Kenya,East Africa,emFIjOXduCg,Liverpool vs Manchester United | Premier Leagu...,Liverpool vs Manchester United Premier League ...,Wavear,20,Gaming,2025-10-19T17:51:22Z,[],655839,0,2
3,KE,Kenya,East Africa,VrSC_SBgPf4,PRINCE INDAH - TRIBUTE TO RT. HON RAILA ODINGA,Artist : Prince Indah\nTitle : Tribute To Rt. ...,"Prince Indah, OGW",10,Music,2025-10-16T03:31:00Z,"[Prince Indah, Tribute To Raila Odinga, Raila,...",1061198,38231,4473
4,KE,Kenya,East Africa,lgoxHC7WF9w,Marvel Television’s Wonder Man | Official Trai...,"""Simon Williams. Reading for Wonder Man.""\n\n#...",Marvel Entertainment,24,Entertainment,2025-10-11T18:12:35Z,"[marvel, comics]",4303922,105224,7127


In [None]:
bigdata.tail()

Unnamed: 0,country_code,country_name,region,video_id,title,description,channel_title,category_id,category_name,published_at,tags,view_count,like_count,comment_count
2039,ZW,Zimbabwe,Southern Africa,YRA7DNy3kzk,NEMZZZ - 8PM [OFFICIAL VIDEO],NEMZZZ - 8PM [OFFICIAL VIDEO]\n\n‘8PM’ OUT NOW...,Nemzzz,10,Music,2025-10-17T18:00:36Z,"[nemz, nemzz, nemzzzz, nemz 8pm, nemzz 8pm, AP...",805094,56492,1894
2040,ZW,Zimbabwe,Southern Africa,EN1tMeXQii0,JISOO X ZAYN - EYES CLOSED (OFFICIAL MV),JISOO X ZAYN - EYES CLOSED (OFFICIAL MV)\n\nFo...,JISOO,10,Music,2025-10-10T04:00:06Z,"[YG Entertainment, YG, 와이지, K-po​p, BLACKPINK,...",47172408,2235485,163859
2041,ZW,Zimbabwe,Southern Africa,oS6nyaJUxes,Dj Tira & Pcee ft Campmasters and General Cmam...,Brand new music by Dj Tira titled Awungazi.\n\...,Ezase Afro,10,Music,2025-10-09T22:00:06Z,"[Afrotainment, Tira, Durban Music, Afro Music,...",646271,11031,735
2042,ZW,Zimbabwe,Southern Africa,DvQiyVkwY94,"Msaki, Jesse Clegg, Sjava - Wayside Lover","Jesse Clegg, Msaki and Sjava's ""Wayside Lover""...",MsakiVEVO,10,Music,2025-10-08T07:00:05Z,"[Msaki, Jesse Clegg, Sjava, Platoon, Pop]",293964,13513,737
2043,ZW,Zimbabwe,Southern Africa,TEbZsMv2c0Q,Lloyiso - Scary (Official Video),Lloyiso - Scary (Official Video) ♫ Out now: ht...,LloyisoVEVO,10,Music,2025-09-25T17:00:06Z,"[Lloyiso, NDLOVU RECORDS (PTY) LTD EMPIRE, Pop]",1293340,15379,785


In [None]:
bigdata['category_name'].value_counts()

category_name
Gaming                  809
Music                   775
People & Blogs          149
Entertainment           146
Sports                   80
Film & Animation         52
Comedy                    8
Education                 7
Unknown                   6
Howto & Style             5
News & Politics           3
Travel & Events           2
Pets & Animals            1
Science & Technology      1
Name: count, dtype: int64

In [None]:
# Initialize BigQuery client
client = bigquery.Client(project='project-adrian-aluoch')

In [None]:
bigdata['tags'] = bigdata['tags'].astype(str)

In [None]:
# Define Table ID
table_id = 'project-adrian-aluoch.youtube_trends_ke.trends'

# Export Data to BigQuery
job = client.load_table_from_dataframe(bigdata, table_id)
while job.state != 'DONE':
    time.sleep(2)
    job.reload()
    print(job.state)

DONE


In [None]:
# Define SQL Query to Retrieve Open Weather Data from Google Cloud BigQuery
sql = (
    'SELECT *'
    'FROM `project-adrian-aluoch.youtube_trends_ke.trends`'
    'ORDER BY country_code'
      )

# Run SQL Query
data = client.query(sql).to_dataframe()

In [None]:
# Delete Original Table
client.delete_table(table_id)
print(f"Table deleted successfully.")

# Remove Duplicate Records
data.drop_duplicates(subset=["video_id", "country_code", "country_name", "region"], inplace=True)

Table deleted successfully.


In [None]:
data['tags'] = data['tags'].astype(str)

In [None]:
# Define the dataset ID and table ID
dataset_id = 'youtube_trends_ke'
table_id = 'trends'

# Define the BigQuery schema for YouTube trending videos
schema = [
    bigquery.SchemaField("country_code", "STRING"),
    bigquery.SchemaField("country_name", "STRING"),
    bigquery.SchemaField("region", "STRING"),
    bigquery.SchemaField("video_id", "STRING"),
    bigquery.SchemaField("title", "STRING"),
    bigquery.SchemaField("description", "STRING"),
    bigquery.SchemaField("channel_title", "STRING"),
    bigquery.SchemaField("category_id", "STRING"),
    bigquery.SchemaField("category_name", "STRING"),
    bigquery.SchemaField("published_at", "STRING"),
    bigquery.SchemaField("tags", "STRING"),  # optional: convert list to string before upload
    bigquery.SchemaField("view_count", "INTEGER"),
    bigquery.SchemaField("like_count", "INTEGER"),
    bigquery.SchemaField("comment_count", "INTEGER")
]

# Define the table reference
table_ref = client.dataset(dataset_id).table(table_id)

# Create the table object
table = bigquery.Table(table_ref, schema=schema)

try:
    # Create the table in BigQuery
    table = client.create_table(table)
    print(f"Table {table.table_id} created successfully.")
except Exception as e:
    print(f"Table {table.table_id} failed")

Table trends created successfully.


In [None]:
# Define the BigQuery table ID
table_id = 'project-adrian-aluoch.youtube_trends_ke.trends'

# Load the data into the BigQuery table
job = client.load_table_from_dataframe(data, table_id)

# Wait for the job to complete
while job.state != 'DONE':
    time.sleep(2)
    job.reload()
    print(job.state)

DONE


In [None]:
data

Unnamed: 0,country_code,country_name,region,video_id,title,description,channel_title,category_id,category_name,published_at,tags,view_count,like_count,comment_count
0,DZ,Algeria,North Africa,4sj1hMxhXAk,Didou Kalma 2025 • مـازال نـولي وتزهـالي ليــا...,Bienvenue Dans La Chaine YouTube Officielle de...,AMINE PROD,10,Music,2025-10-18T15:30:06Z,[],68782,1957,160
1,DZ,Algeria,North Africa,MZA0JWoq2zs,"Tawsen - Khallini, (Official Video)","Listen to Tawsen's new song ""Khallini,"" availa...",Tawsen,10,Music,2025-10-22T15:00:06Z,[],328423,7878,120
2,DZ,Algeria,North Africa,K0LpmWCV4D0,الفأر المجنون خطف اخوتي الصغار 🧸 مغامرة الدب ا...,رابط الانتساب بالقناة 🥰\nhttps://www.youtube.c...,Mysterious Gamer,20,Gaming,2025-10-22T10:30:28Z,"['مغامرة الدب الخارق', 'الدب الخارق', 'مغامرة ...",70755,2448,88
3,DZ,Algeria,North Africa,GyyH6XE1upI,السفر الطويل : ظهور الفضائيين و انا في ورطة ! 😱,جميع صفحاتي .. 🥰💌\n\nhttps://www.youtube.com/@...,شبكة العاب العرب | Arab Games Network,20,Gaming,2025-10-22T11:00:36Z,[],136062,6947,511
4,DZ,Algeria,North Africa,n5SxjtU17_c,ربلوكس فتحت كل الشخصيات السرية المحدودة ! Stea...,رابط القروب https://www.roblox.com/share/g/687...,اصحاب مازن,20,Gaming,2025-10-22T17:05:01Z,"['roblox', 'روبلوكس', 'العاب', 'ماب البيوت', '...",261274,7137,1328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2039,ZW,Zimbabwe,Southern Africa,emFIjOXduCg,Liverpool vs Manchester United | Premier Leagu...,Liverpool vs Manchester United Premier League ...,Wavear,20,Gaming,2025-10-19T17:51:22Z,[],655839,0,2
2040,ZW,Zimbabwe,Southern Africa,7bzdFQkHNwc,Hulengende - Zunza (Official Video),As We continue dropping the visuals for KUFADZ...,Hulengende Zw,10,Music,2025-10-18T10:05:19Z,[],59291,3154,660
2041,ZW,Zimbabwe,Southern Africa,Vcp2UjxEt4Y,🔴Live : Chelsea vs Ajax I UEFA Champions Leagu...,✅✅✅ Facebook: https://www.facebook.com/MunnaVd...,Munna,20,Gaming,2025-10-23T00:00:52Z,"['munna', 'football', 'soccer', 'football game...",81210,399,1
2042,ZW,Zimbabwe,Southern Africa,Z0Qg8FHPKHI,I Got My OWN BASE in Steal a Brainrot...,Instagram: https://www.instagram.com/cashmarco...,CashBlox,24,Entertainment,2025-10-05T11:00:57Z,"['Cash', 'Nico', 'Nico and Cash', 'Cash and Ni...",3440741,133101,5296
