# CSV Column List Explorer

This notebook displays all available columns in each CSV file in the `data/` directory.

In [1]:
# Import required libraries
import pandas as pd
import os
from pathlib import Path

# Data directory path
data_dir = 'data'

print(f"Scanning directory: {data_dir}\n")

Scanning directory: data



In [2]:
# Get all CSV files in the data directory
csv_files = sorted([f for f in os.listdir(data_dir) if f.endswith('.csv')])

print(f"Found {len(csv_files)} CSV files:")
for i, file in enumerate(csv_files, 1):
    print(f"  {i}. {file}")

Found 8 CSV files:
  1. All Product Data Back Up 070825.csv
  2. Cards.csv
  3. DAPs.csv
  4. Editions.csv
  5. Frames.csv
  6. GWS.csv
  7. Misc.csv
  8. Postcards.csv


In [3]:
# Function to get columns from a CSV file
def get_csv_columns(file_path):
    """
    Read the first row of a CSV file to get column names.
    
    Args:
        file_path: Path to the CSV file
        
    Returns:
        List of column names or error message
    """
    try:
        # Read only the first row to get column names (efficient for large files)
        df = pd.read_csv(file_path, nrows=0)
        return df.columns.tolist()
    except Exception as e:
        return f"Error reading file: {str(e)}"

# Dictionary to store all column information
all_columns = {}

# Process each CSV file
for csv_file in csv_files:
    file_path = os.path.join(data_dir, csv_file)
    columns = get_csv_columns(file_path)
    all_columns[csv_file] = columns

print("Column extraction complete!")

Column extraction complete!


In [4]:
# Display columns for each file
print("=" * 80)
print("COLUMN LIST FOR ALL CSV FILES")
print("=" * 80)

for i, (file_name, columns) in enumerate(all_columns.items(), 1):
    print(f"\n{i}. {file_name}")
    print("-" * 80)
    
    if isinstance(columns, list):
        print(f"Total columns: {len(columns)}\n")
        for j, col in enumerate(columns, 1):
            print(f"  {j:3}. {col}")
    else:
        # Error message
        print(f"  {columns}")
    
    print()

print("=" * 80)
print("SUMMARY")
print("=" * 80)
for file_name, columns in all_columns.items():
    if isinstance(columns, list):
        print(f"{file_name:45} : {len(columns):3} columns")
    else:
        print(f"{file_name:45} : ERROR")

COLUMN LIST FOR ALL CSV FILES

1. All Product Data Back Up 070825.csv
--------------------------------------------------------------------------------
Total columns: 156

    1. SKU (Unique Id)
    2. Wholesale Price
    3. Retail Price
    4. Interiors Price
    5. DTC P&P Price
    6. PIM Parent ID
    7. PIM Unique ID
    8. Unbxd Primary Image URL (productImage)
    9. B2B-BC PIM Internal Name Search Field
   10. B2B-BC PIM Internal ID Search Field
   11. PIM Internal Name Search Field
   12. PIM Internal Search Field
   13. PIM Internal Name
   14. B2B-BC Pim Internal Name
   15. Weight
   16. Product Type
   17. Orientation
   18. Primary Colour
   19. Secondary Colour
   20. Depth
   21. Name (productName)
   22. Keywords
   23. Height
   24. Artist
   25. Width
   26. EAN
   27. SKU (Parent ID) (parent_id)
   28. Frame Style (Child only)
   29. Print Size (Child only)
   30. EAN Group
   31. DTC Packed Weight
   32. DTC Packed Height
   33. DTC Packed Width
   34. DTC Packed De

In [5]:
# Optional: Export column information to a text file for reference
output_file = 'data_columns_reference.txt'

with open(output_file, 'w') as f:
    f.write("=" * 80 + "\n")
    f.write("COLUMN LIST FOR ALL CSV FILES IN data/\n")
    f.write("Generated: " + pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S") + "\n")
    f.write("=" * 80 + "\n\n")
    
    for i, (file_name, columns) in enumerate(all_columns.items(), 1):
        f.write(f"{i}. {file_name}\n")
        f.write("-" * 80 + "\n")
        
        if isinstance(columns, list):
            f.write(f"Total columns: {len(columns)}\n\n")
            for j, col in enumerate(columns, 1):
                f.write(f"  {j:3}. {col}\n")
        else:
            f.write(f"  {columns}\n")
        
        f.write("\n")
    
    f.write("=" * 80 + "\n")
    f.write("SUMMARY\n")
    f.write("=" * 80 + "\n")
    for file_name, columns in all_columns.items():
        if isinstance(columns, list):
            f.write(f"{file_name:45} : {len(columns):3} columns\n")
        else:
            f.write(f"{file_name:45} : ERROR\n")

print(f"✓ Column information exported to: {output_file}")

✓ Column information exported to: data_columns_reference.txt
