In [1]:
import pandas as pd
import os
import glob
import re
from datetime import datetime

# Directory where the CSV files are located
path = r"C:\Users\w196283\Downloads\spotify"

# Get all CSV files from the directory
all_files = glob.glob(os.path.join(path, "regional-us-weekly-*.csv"))

# List to store all dataframes
df_list = []

# Process each file
for file in all_files:
    # Extract date from filename using regex
    date_match = re.search(r'regional-us-weekly-(\d{4}-\d{2}-\d{2})\.csv', file)
    
    if date_match:
        date_str = date_match.group(1)
        # Convert date string to datetime object
        file_date = datetime.strptime(date_str, '%Y-%m-%d').date()
        
        # Read the CSV file
        df = pd.read_csv(file, encoding='utf-8-sig')
        
        # Add the date column
        df['date'] = file_date
        
        # Add to the list of dataframes
        df_list.append(df)
    else:
        print(f"Could not extract date from {file}")

# Combine all dataframes
combined_df = pd.concat(df_list, ignore_index=True)

# Sort the dataframe chronologically by date
combined_df = combined_df.sort_values(by=['date', 'rank'])

# Save the result to a single CSV file
combined_df.to_csv("spotify.csv", index=False, encoding='utf-8-sig')

print(f"Processing complete! Combined {len(df_list)} files into spotify_charts_combined.csv")
print(f"Data arranged chronologically from {combined_df['date'].min()} to {combined_df['date'].max()}")

Processing complete! Combined 52 files into spotify_charts_combined.csv
Data arranged chronologically from 2023-12-28 to 2024-12-19


In [3]:
import pandas as pd
import os
import glob
import re
from datetime import datetime, timedelta

# Directory where the CSV files are located
path = r"C:\Users\w196283\Downloads\apple music"

# Get all CSV files from the directory
all_files = glob.glob(os.path.join(path, "*Top Songs*.csv"))

# List to store all dataframes
df_list = []

# Process each file
for file in all_files:
    # Extract week information from filename using regex
    week_match = re.search(r'(\d{4}-W\d{2})', file)
    
    if week_match:
        week_str = week_match.group(1)
        year, week_num = week_str.split('-W')
        
        # Convert week number to a date (using the first day of that week)
        # ISO week date format: the first week starts with Monday closest to Jan 1
        first_day = datetime.strptime(f'{year}-{week_num}-1', '%Y-%W-%w').date()
        
        # Read the CSV file
        df = pd.read_csv(file, encoding='utf-8-sig')
        
        # Add the date column
        df['date'] = first_day
        
        # Add to the list of dataframes
        df_list.append(df)
    else:
        print(f"Could not extract week information from {file}")

# Combine all dataframes
combined_df = pd.concat(df_list, ignore_index=True)

# Sort the dataframe chronologically by date
combined_df = combined_df.sort_values(by=['date', 'Position'])

# Save the result to a single CSV file
combined_df.to_csv("apple_music.csv", index=False, encoding='utf-8-sig')

print(f"Processing complete! Combined {len(df_list)} files into apple_music.csv")
print(f"Data arranged chronologically from {combined_df['date'].min()} to {combined_df['date'].max()}")

Processing complete! Combined 52 files into apple_music.csv
Data arranged chronologically from 2023-12-18 to 2024-12-09
