In [1]:
import pandas as pd
import glob
import os

In [2]:
#1. Create a single dataframe with the concatenation of all input csv files, adding a column called country

# Path to dataset folder: go one level up (..) then into 'trendingYT'
data_path = os.path.join("..", "trendingYT")

# Find all CSV files that end with "videos.csv"
csv_files = glob.glob(os.path.join(data_path, "*videos.csv"))
print("Found CSV files:")
for f in csv_files:
    print(" -", f)

dfs = []

for file in csv_files:
    # Extract country code from the filename (first two letters, e.g., US, CA, DE)
    country_code = os.path.basename(file)[:2].upper()
    print(f"\nReading {file} -> country = {country_code}")
    
    # Read CSV (latin-1 encoding avoids common decoding errors)
    df = pd.read_csv(file, encoding="latin-1")
    
    # Add a new column indicating the country of the file
    df["country"] = country_code
    
    # Store the dataframe for later concatenation
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
all_videos = pd.concat(dfs, ignore_index=True)

print("\nFinal shape:", all_videos.shape)

# Display the first few rows of the final dataframe
all_videos.head()

Found CSV files:
 - ..\trendingYT\CAvideos.csv
 - ..\trendingYT\DEvideos.csv
 - ..\trendingYT\FRvideos.csv
 - ..\trendingYT\GBvideos.csv
 - ..\trendingYT\INvideos.csv
 - ..\trendingYT\JPvideos.csv
 - ..\trendingYT\KRvideos.csv
 - ..\trendingYT\MXvideos.csv
 - ..\trendingYT\RUvideos.csv
 - ..\trendingYT\USvideos.csv

Reading ..\trendingYT\CAvideos.csv -> country = CA

Reading ..\trendingYT\DEvideos.csv -> country = DE

Reading ..\trendingYT\FRvideos.csv -> country = FR

Reading ..\trendingYT\GBvideos.csv -> country = GB

Reading ..\trendingYT\INvideos.csv -> country = IN

Reading ..\trendingYT\JPvideos.csv -> country = JP

Reading ..\trendingYT\KRvideos.csv -> country = KR

Reading ..\trendingYT\MXvideos.csv -> country = MX

Reading ..\trendingYT\RUvideos.csv -> country = RU

Reading ..\trendingYT\USvideos.csv -> country = US

Final shape: (375942, 17)


Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,country
0,n1WpP7iowLc,17.14.11,Eminem - Walk On Water (Audio) ft. BeyoncÃ©,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,125882,https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg,False,False,False,Eminem's new track Walk on Water ft. BeyoncÃ© ...,CA
1,0dBIkQ4Mz1M,17.14.11,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,13030,https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg,False,False,False,STill got a lot of packages. Probably will las...,CA
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO â¶ \n\nSUBSCRIBE âº ...,CA
3,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095828,132239,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...,CA
4,2Vv-BfVoq4g,17.14.11,Ed Sheeran - Perfect (Official Music Video),Ed Sheeran,10,2017-11-09T11:04:14.000Z,"edsheeran|""ed sheeran""|""acoustic""|""live""|""cove...",33523622,1634130,21082,85067,https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg,False,False,False,ð§: https://ad.gt/yt-perfect\nð°: https://...,CA


In [3]:
# Part 2 - Extract all videos that have no tag

# Some videos have no tags: tags column can be NaN, empty string, or "[none]"
no_tag_mask = (
    all_videos["tags"].isna() |                          # missing value
    all_videos["tags"].str.strip().eq("") |              # empty string
    all_videos["tags"].str.strip().eq("[none]")          # explicit [none] marker
)

# Create a new dataframe with only videos that have no tag
videos_no_tags = all_videos[no_tag_mask].copy()

print("Number of videos with no tags:", videos_no_tags.shape[0])
videos_no_tags.head()

Number of videos with no tags: 37698


Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,country
41,JwboxqDylgg,17.14.11,Canada Soccer's Women's National Team v USA In...,Canada Soccer,17,2017-11-13T05:53:49.000Z,[none],36311,277,28,13,https://i.ytimg.com/vi/JwboxqDylgg/default.jpg,False,False,False,Canada Soccer's Women's National Team face riv...,CA
58,9B-q8h31Bpk,17.14.11,John Oliver Tackles Louis C.K. And Donald Trum...,TV Shows,22,2017-11-13T04:49:26.000Z,[none],106029,1270,101,181,https://i.ytimg.com/vi/9B-q8h31Bpk/default.jpg,False,False,False,"John Oliver on News, Politics ...",CA
78,1UE5Dq1rvUA,17.14.11,Taylor Swift Perform Ready For It - SNL,Ken Reactz,24,2017-11-12T05:18:02.000Z,[none],320964,8069,285,717,https://i.ytimg.com/vi/1UE5Dq1rvUA/default.jpg,False,False,False,Thanks for watching please subscribe and subsc...,CA
86,pmJQ4KwliX4,17.14.11,"LATEST Q POSTS: ROTHSCHILDS, HOUSE OF SAUD, lL...",James Munder,2,2017-11-12T21:25:40.000Z,[none],116820,1503,139,1066,https://i.ytimg.com/vi/pmJQ4KwliX4/default.jpg,False,False,False,https://pastebin.ca/3930472\n\nSupport My Chan...,CA
98,lHcXhBojpeQ,17.14.11,ä¸å±TVBè¦å¸ï¼ææ£10å¹´éæ¢ç«¹é¦¬é«®å¦...,ææç¾æç,22,2017-11-12T12:49:50.000Z,[none],88061,47,58,17,https://i.ytimg.com/vi/lHcXhBojpeQ/default.jpg,False,False,False,,CA


In [4]:
# Part 3 - Total number of views for each channel

# Group by channel_title and sum all views
channel_total_views = (
    all_videos.groupby("channel_title")["views"]
    .sum()
    .reset_index()
    .sort_values(by="views", ascending=False)
)

print("Number of channels:", channel_total_views.shape[0])
channel_total_views.head(10)

Number of channels: 37824


Unnamed: 0,channel_title,views
4564,ChildishGambinoVEVO,11016766510
15536,Marvel Entertainment,10430605449
17726,NickyJamTV,9479859505
18466,Ozuna,8623329509
28412,ibighit,8205572221
6689,DrakeVEVO,7637228580
2788,Bad Bunny,7124207494
2101,ArianaGrandeVevo,6202230488
28621,jypentertainment,5802822913
7047,Ed Sheeran,5775405574
