In [11]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('../data/original/consumermoney2013.csv')

# Function to process the data and group by categories
def process_consumer_spending(df):
    # Forward fill NaN values in first column to identify categories
    df.iloc[:, 0] = df.iloc[:, 0].fillna('')
    
    # Initialize variables
    categories = []
    current_category = None
    grouped_data = []
    
    for index, row in df.iterrows():
        item = row.iloc[0]
        
        # Skip completely empty rows
        if item == '':
            current_category = None
            continue
        
        # Check if this is a main category (no indentation and not a subcategory)
        if not item.startswith(' ') and ',' not in item and ':' not in item:
            # We found a new main category
            current_category = item
            if current_category not in categories:
                categories.append(current_category)
        
        # Create a row with category information
        new_row = row.copy()
        if current_category:
            new_row['Category'] = current_category
            grouped_data.append(new_row)
    
    # Convert to DataFrame
    result_df = pd.DataFrame(grouped_data)
    
    # Clean up the monetary values
    for year in range(2013, 2020):
        year_col = str(year)
        if year_col in result_df.columns:
            result_df[year_col] = result_df[year_col].apply(lambda x: 
                pd.to_numeric(str(x).replace('$', '').replace(',', ''), errors='coerce')
                if isinstance(x, str) else x)
    
    # Rename columns for clarity
    result_df.rename(columns={result_df.columns[0]: 'Item'}, inplace=True)
    
    return result_df

# Process the data
processed_df = process_consumer_spending(df)

# Example: Display items in the "Food" category
food_items = processed_df[processed_df['Category'] == 'Food']
print("Food category items:")
print(food_items[['Item', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']])

# Example: Display items in the "Housing" category
housing_items = processed_df[processed_df['Category'] == 'Housing']
print("\nHousing category items:")
print(housing_items[['Item', '2013', '2014', '2015', '2016', '2017', '2018', '2019','2020']])

# Get the list of all categories
all_categories = processed_df['Category'].unique()
print("\nAll identified categories:")
print(all_categories)

# Function to analyze spending trends by category
def analyze_category_spending(df, category):
    category_data = df[df['Category'] == category]
    
    # Get the main category total (should be the first row)
    main_item = category_data.iloc[0]
    
    print(f"\nAnalysis for {category}:")
    print(f"Total spending in 2013: ${main_item['2013']:,.0f}")
    print(f"Total spending in 2020 ${main_item['2020']:,.0f}")
    
    change = (main_item['2020'] - main_item['2013']) / main_item['2013'] * 100
    print(f"Percent change from 2013 to 2020: {change:.1f}%")
    
    # Get top subcategories by 2012 spending
    subcategories = category_data.iloc[1:]  # Skip the main category row
    top_items = subcategories.nlargest(3, '2020')
    
    print(f"Top 3 subcategories by 2020 spending:")
    for idx, item in top_items.iterrows():
        print(f"  {item['Item']}: ${item['2020']:,.0f}")

# Example usage of the analysis function
analyze_category_spending(processed_df, 'Food')
analyze_category_spending(processed_df, 'Housing')

# Save the processed data to a new CSV file
processed_df.to_csv('processed_consumer_spending.csv', index=False)
print("\nProcessed data saved to 'processed_consumer_spending.csv'")

Food category items:


KeyError: "['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020'] not in index"