In [1]:
import pandas as pd
import glob
import os

In [3]:
#1. Create a single dataframe with the concatenation of all input csv files, adding a column called country

# Path to dataset folder: go one level up (..) then into 'trendingYT'
data_path = os.path.join("..", "trendingYT")

# Find all CSV files that end with "videos.csv"
csv_files = glob.glob(os.path.join(data_path, "*videos.csv"))
print("Found CSV files:")
for f in csv_files:
    print(" -", f)

dfs = []

for file in csv_files:
    # Extract country code from the filename (first two letters, e.g., US, CA, DE)
    country_code = os.path.basename(file)[:2].upper()
    print(f"\nReading {file} -> country = {country_code}")
    
    # Read CSV (latin-1 encoding avoids common decoding errors)
    df = pd.read_csv(file, encoding="latin-1")
    
    # Add a new column indicating the country of the file
    df["country"] = country_code
    
    # Store the dataframe for later concatenation
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
all_videos = pd.concat(dfs, ignore_index=True)

print("\nFinal shape:", all_videos.shape)

# Display the first few rows of the final dataframe
all_videos.head()

Found CSV files:
 - ..\trendingYT\CAvideos.csv
 - ..\trendingYT\DEvideos.csv
 - ..\trendingYT\FRvideos.csv
 - ..\trendingYT\GBvideos.csv
 - ..\trendingYT\INvideos.csv
 - ..\trendingYT\JPvideos.csv
 - ..\trendingYT\KRvideos.csv
 - ..\trendingYT\MXvideos.csv
 - ..\trendingYT\RUvideos.csv
 - ..\trendingYT\USvideos.csv

Reading ..\trendingYT\CAvideos.csv -> country = CA

Reading ..\trendingYT\DEvideos.csv -> country = DE

Reading ..\trendingYT\FRvideos.csv -> country = FR

Reading ..\trendingYT\GBvideos.csv -> country = GB

Reading ..\trendingYT\INvideos.csv -> country = IN

Reading ..\trendingYT\JPvideos.csv -> country = JP

Reading ..\trendingYT\KRvideos.csv -> country = KR

Reading ..\trendingYT\MXvideos.csv -> country = MX

Reading ..\trendingYT\RUvideos.csv -> country = RU

Reading ..\trendingYT\USvideos.csv -> country = US

Final shape: (375942, 17)


Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,country
0,n1WpP7iowLc,17.14.11,Eminem - Walk On Water (Audio) ft. BeyoncÃ©,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,125882,https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg,False,False,False,Eminem's new track Walk on Water ft. BeyoncÃ© ...,CA
1,0dBIkQ4Mz1M,17.14.11,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,13030,https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg,False,False,False,STill got a lot of packages. Probably will las...,CA
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO â¶ \n\nSUBSCRIBE âº ...,CA
3,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095828,132239,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...,CA
4,2Vv-BfVoq4g,17.14.11,Ed Sheeran - Perfect (Official Music Video),Ed Sheeran,10,2017-11-09T11:04:14.000Z,"edsheeran|""ed sheeran""|""acoustic""|""live""|""cove...",33523622,1634130,21082,85067,https://i.ytimg.com/vi/2Vv-BfVoq4g/default.jpg,False,False,False,ð§: https://ad.gt/yt-perfect\nð°: https://...,CA
