# This file is used to break CSVs into individual chunks. It will return 1 CSV and 1 metadata file for every row in the main CSV file. 

In [1]:
import pandas as pd
import json
import os

In [None]:
os.getcwd()

In [None]:
"""
Define functions 
"""

# Standardise languages
def clean_languages(languages):
    languages = languages.lower()
    languages = languages.replace("mandarin chinese", "chinese")
    languages = languages.replace("cantonese chinese", "chinese")
    languages = languages.replace("mandarin", "chinese")
    languages = languages.replace("cantonese", "chinese")
    return languages


def generate_metadata(row):
    metadata = {
        "metadataAttributes": {
            "standings": str(row.get("standings", "")).lower(),
            "igl": str(row.get("igl", "")).lower(),
            "region": str(row.get("region", "amer")).lower(),  # Default to "amer" if not specified
            "league": str(row.get("league", "vct game changers")).lower(),  # Default league
            "languages": clean_languages(str(row.get("languages", ""))),
            "role": str(row.get("team_role", "")).lower()
        }
    }
    return metadata

input_dir = r'my_input_dir'
output_dir = r'my_output_dir'

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Loop through all CSV files in the directory
for file_name in os.listdir(input_dir):
    if file_name.endswith('.csv'):
        # Load each CSV file
        csv_path = os.path.join(input_dir, file_name)
        df = pd.read_csv(csv_path)
        
        # Initialize list to collect metadata for all players in the CSV
        combined_metadata_list = []
        
        # Iterate through each row in the dataframe
        for index, row in df.iterrows():
            # Extract ign and ensure it's usable as a filename
            player_ign = str(row.get("ign", f"player_{index + 1}")).replace(" ", "_").lower()
            
            # Extract player data into a new dataframe
            player_df = pd.DataFrame([row])
            
            # Save as a new CSV file, naming it after the player's ign
            csv_filename = f"{player_ign}.csv"
            player_df.to_csv(os.path.join(output_dir, csv_filename), index=False)
            
            # Generate metadata for the player
            metadata = generate_metadata(row)
            
            # Save individual metadata as a JSON file, also named after the player's ign
            metadata_filename = f"{player_ign}.csv.metadata.json"
            with open(os.path.join(output_dir, metadata_filename), 'w') as json_file:
                json.dump(metadata, json_file, indent=4)
            
            # Add metadata to combined metadata list
            combined_metadata_list.append(metadata)
        
        # Save combined metadata for all players in the CSV file
        base_name = file_name.replace('.csv', '')
        combined_metadata_filename = f"{base_name}.csv.metadata.json"
        combined_metadata_path = os.path.join(output_dir, combined_metadata_filename)
        
        # Save combined metadata as JSON
        with open(combined_metadata_path, 'w') as json_file:
            json.dump(combined_metadata_list, json_file, indent=4)

print("CSV and metadata files created for all players")