In [7]:
import os
import pandas as pd

def merge_pokemon_with_sp500(pokemon_folder, sp500_file, output_folder, max_lag_months=5):
    """
    Merge each Pokémon card dataset with specific S&P 500 dataset columns based on the date,
    including multiple lagged versions of the 'Adj Close_stock' and 'Adj Close_bond' data up to a specified number of months.

    Parameters:
    pokemon_folder (str): Path to the folder containing Pokémon card datasets.
    sp500_file (str): Path to the S&P 500 dataset file.
    output_folder (str): Path to the folder where merged datasets will be saved.
    max_lag_months (int): Maximum number of months for lagging the S&P 500 data.

    Returns:
    None: Saves the merged datasets in the specified output folder.
    """

    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Load the S&P 500 data
    sp500_data = pd.read_csv(sp500_file)
    sp500_data['Date'] = pd.to_datetime(sp500_data['Date'])

    # Initial preparation for lagged data merging
    lagged_columns = ['Adj Close_stock', 'Adj Close_bond', 'Volume']  # Specify columns to lag
    final_sp500_data = sp500_data.set_index('Date')

    # Correcting the lag direction in the function
    for lag in range(1, max_lag_months + 1):  # Generates lagged columns from 1 to max_lag_months
        for col in lagged_columns:
            # Shift data to bring past values forward, not modifying the 'Date' but the values themselves
            final_sp500_data[f'{col}_Lag_{lag}'] = final_sp500_data[col].shift(lag)

    final_sp500_data.reset_index(inplace=True)

    # Iterate over files in the Pokémon folder
    for filename in os.listdir(pokemon_folder):
        if filename.endswith('.csv'):
            file_path = os.path.join(pokemon_folder, filename)
            pokemon_data = pd.read_csv(file_path)
            pokemon_data['Date'] = pd.to_datetime(pokemon_data['Date'])

            # Reshape Pokémon data to wide format
            pokemon_pivot = pokemon_data.pivot(index='Date', columns='Grade', values='Price').reset_index()

            # Merge with S&P 500 data including lagged versions
            merged_data = pd.merge(pokemon_pivot, final_sp500_data, on='Date', how='left')

            # Forward fill missing values
            merged_data.fillna(method='ffill', inplace=True)

            # Save the merged dataset
            merged_output_path = os.path.join(output_folder, 'merged_' + filename)
            merged_data.to_csv(merged_output_path, index=False)

            print(f'Merged dataset saved: {merged_output_path}')

# Example usage
pokemon_folder = 'final_cards_datasets'
sp500_file = '^GSPC_output_main.csv'
output_folder = 'lagged_merged_datasets'
max_lag_months = 6  # Specify the maximum number of months for lagging
merge_pokemon_with_sp500(pokemon_folder, sp500_file, output_folder, max_lag_months)


Merged dataset saved: lagged_merged_datasets\merged_Blastoise___Holo_1999_Base_Set.csv
Merged dataset saved: lagged_merged_datasets\merged_Charizard_Reverse_Foil_2016_Evolutions.csv
Merged dataset saved: lagged_merged_datasets\merged_Charizard___Holo_1999_Base_Set.csv
Merged dataset saved: lagged_merged_datasets\merged_Charizard___Holo_2016_Evolutions.csv
Merged dataset saved: lagged_merged_datasets\merged_Full_Art_Charizard_GX_2019_Hidden_Fates.csv
Merged dataset saved: lagged_merged_datasets\merged_Full_Art_Charizard_Vmax_Portuguese__Holo_2020_Darkness_Ablaze.csv
Merged dataset saved: lagged_merged_datasets\merged_Full_Art_Charizard_Vmax_Secret__Holo_2020_Champions_Path.csv
Merged dataset saved: lagged_merged_datasets\merged_Full_Art_M_Charizard_EX___Holo_2016_Evolutions.csv
Merged dataset saved: lagged_merged_datasets\merged_Machamp_1st_Edition__Holo_1999_Base_Set.csv
Merged dataset saved: lagged_merged_datasets\merged_Venusaur___Holo_1999_Base_Set.csv


In [8]:
import os
import pandas as pd

def process_and_save_datasets(input_folder, output_folder):
    """
    Processes each dataset in the specified input folder by applying backward fill to missing values. 
    The processed datasets are then saved in the specified output folder.

    :param input_folder: The folder containing the original datasets.
    :param output_folder: The folder where processed datasets will be saved.
    """
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Process each file in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith('.csv'):
            file_path = os.path.join(input_folder, filename)
            data = pd.read_csv(file_path)

            # Apply backward fill for missing values
            data.bfill(inplace=True)

            # Save the processed dataset to the output folder
            output_path = os.path.join(output_folder, filename)
            data.to_csv(output_path, index=False)

            print(f"Processed and saved {filename}")

# Example usage
process_and_save_datasets('lagged_merged_datasets', 'lagged_merged_datasets_final')


Processed and saved merged_Blastoise___Holo_1999_Base_Set.csv
Processed and saved merged_Charizard_Reverse_Foil_2016_Evolutions.csv
Processed and saved merged_Charizard___Holo_1999_Base_Set.csv
Processed and saved merged_Charizard___Holo_2016_Evolutions.csv
Processed and saved merged_Full_Art_Charizard_GX_2019_Hidden_Fates.csv
Processed and saved merged_Full_Art_Charizard_Vmax_Portuguese__Holo_2020_Darkness_Ablaze.csv
Processed and saved merged_Full_Art_Charizard_Vmax_Secret__Holo_2020_Champions_Path.csv
Processed and saved merged_Full_Art_M_Charizard_EX___Holo_2016_Evolutions.csv
Processed and saved merged_Machamp_1st_Edition__Holo_1999_Base_Set.csv
Processed and saved merged_Venusaur___Holo_1999_Base_Set.csv


In [10]:
#the following fction adjusts the datasets' columns IN PLACE! (in the 'merged_datasets_final' folder):
#run the previous chunk in order to get unadjusted columns, and first then this chunk to adjust the columns accordingly

import os
import pandas as pd

def rename_and_clean_columns(folder):
    """
    Rename and clean column names in each dataset within the specified folder.

    Parameters:
    folder (str): Path to the folder containing datasets to be processed.

    Operations:
    - Rename columns with grade numbers (1-10) to 'gradeXprice' format.
    - Remove the 'Unnamed: 0' column if present.
    - Convert all other column names to lowercase, remove spaces and underscores.

    Returns:
    None: Modifies the datasets in place and saves them in the same folder.
    """

    # Iterate over files in the folder
    for filename in os.listdir(folder):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder, filename)
            data = pd.read_csv(file_path)

            # Process column names
            new_column_names = []
            for col in data.columns:
                if col == 'Unnamed: 0':  # Remove 'Unnamed: 0' column
                    continue
                elif col.isdigit() and int(col) in range(1, 11):
                    new_column_names.append(f'grade{col}price')
                else:
                    new_col = col.lower().replace(' ', '').replace('_', '')
                    new_column_names.append(new_col)

            # Apply new column names
            data = data.loc[:, data.columns != 'Unnamed: 0']  # Remove 'Unnamed: 0' column
            data.columns = new_column_names

            # Save the modified dataset
            data.to_csv(file_path, index=False)
            print(f'Processed and saved: {file_path}')

# Example usage
folder = 'lagged_merged_datasets_final'
rename_and_clean_columns(folder)


Processed and saved: lagged_merged_datasets_final\merged_Blastoise___Holo_1999_Base_Set.csv
Processed and saved: lagged_merged_datasets_final\merged_Charizard_Reverse_Foil_2016_Evolutions.csv
Processed and saved: lagged_merged_datasets_final\merged_Charizard___Holo_1999_Base_Set.csv
Processed and saved: lagged_merged_datasets_final\merged_Charizard___Holo_2016_Evolutions.csv
Processed and saved: lagged_merged_datasets_final\merged_Full_Art_Charizard_GX_2019_Hidden_Fates.csv
Processed and saved: lagged_merged_datasets_final\merged_Full_Art_Charizard_Vmax_Portuguese__Holo_2020_Darkness_Ablaze.csv
Processed and saved: lagged_merged_datasets_final\merged_Full_Art_Charizard_Vmax_Secret__Holo_2020_Champions_Path.csv
Processed and saved: lagged_merged_datasets_final\merged_Full_Art_M_Charizard_EX___Holo_2016_Evolutions.csv
Processed and saved: lagged_merged_datasets_final\merged_Machamp_1st_Edition__Holo_1999_Base_Set.csv
Processed and saved: lagged_merged_datasets_final\merged_Venusaur___Hol