# FRED CPI 카테고리별 물가 데이터 수집

미국 소비자물가지수(CPI)를 카테고리별로 수집하여 Deep Insight 분석용 데이터셋 생성

In [24]:
%pip install fredapi pandas

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [25]:
from fredapi import Fred
import pandas as pd
from datetime import datetime

# FRED API 키 설정 (https://fred.stlouisfed.org/docs/api/api_key.html 에서 무료 발급)
FRED_API_KEY = "ab58273ddda417d3566cb17aba4342d9"
fred = Fred(api_key=FRED_API_KEY)

In [26]:
# CPI 카테고리별 FRED 시리즈 ID
cpi_series = {
    "Food": "CPIUFDSL",
    "Energy": "CPIENGSL",
    "Housing": "CPIHOSSL",
    "Apparel": "CPIAPPSL",
    "Transportation": "CPITRNSL",
    "Medical Care": "CPIMEDSL",
    "Recreation": "CPIRECSL",
    "Education": "CPIEDUSL",
    "All Items": "CPIAUCSL"
}

# 데이터 수집 기간
START_DATE = "2019-01-01"
END_DATE = "2024-12-01"

In [27]:
# 각 카테고리별 데이터 수집
all_data = []

for category, series_id in cpi_series.items():
    print(f"Fetching {category} ({series_id})...")
    try:
        data = fred.get_series(series_id, observation_start=START_DATE, observation_end=END_DATE)
        
        for date, value in data.items():
            all_data.append({
                "Date": date,
                "Category": category,
                "CPI_Value": value
            })
    except Exception as e:
        print(f"  Error: {e}")

df = pd.DataFrame(all_data)
print(f"\nTotal records: {len(df)}")
df.head(10)

Fetching Food (CPIUFDSL)...


Fetching Energy (CPIENGSL)...
Fetching Housing (CPIHOSSL)...
Fetching Apparel (CPIAPPSL)...
Fetching Transportation (CPITRNSL)...
Fetching Medical Care (CPIMEDSL)...
Fetching Recreation (CPIRECSL)...
Fetching Education (CPIEDUSL)...
Fetching All Items (CPIAUCSL)...

Total records: 648


Unnamed: 0,Date,Category,CPI_Value
0,2019-01-01,Food,256.533
1,2019-02-01,Food,257.253
2,2019-03-01,Food,257.681
3,2019-04-01,Food,257.325
4,2019-05-01,Food,257.818
5,2019-06-01,Food,257.952
6,2019-07-01,Food,258.059
7,2019-08-01,Food,258.288
8,2019-09-01,Food,258.865
9,2019-10-01,Food,259.434


In [28]:
# 전월대비, 전년대비 변화율 계산
df = df.sort_values(["Category", "Date"]).reset_index(drop=True)

# 전월대비 변화율 (MoM %)
df["MoM_Change"] = df.groupby("Category")["CPI_Value"].pct_change() * 100

# 전년대비 변화율 (YoY %)
df["YoY_Change"] = df.groupby("Category")["CPI_Value"].pct_change(periods=12) * 100

# 추가 컬럼: 연도, 월, 분기
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["Quarter"] = df["Date"].dt.quarter

# 인플레이션 수준 분류
def classify_inflation(yoy):
    if pd.isna(yoy):
        return "N/A"
    elif yoy < 0:
        return "Deflation"
    elif yoy < 2:
        return "Low"
    elif yoy < 4:
        return "Moderate"
    elif yoy < 7:
        return "High"
    else:
        return "Very High"

df["Inflation_Level"] = df["YoY_Change"].apply(classify_inflation)

df.head(20)

Unnamed: 0,Date,Category,CPI_Value,MoM_Change,YoY_Change,Year,Month,Quarter,Inflation_Level
0,2019-01-01,All Items,252.561,,,2019,1,1,
1,2019-02-01,All Items,253.319,0.300126,,2019,2,1,
2,2019-03-01,All Items,254.277,0.378179,,2019,3,1,
3,2019-04-01,All Items,255.233,0.375968,,2019,4,2,
4,2019-05-01,All Items,255.296,0.024683,,2019,5,2,
5,2019-06-01,All Items,255.213,-0.032511,,2019,6,2,
6,2019-07-01,All Items,255.802,0.230788,,2019,7,3,
7,2019-08-01,All Items,256.036,0.091477,,2019,8,3,
8,2019-09-01,All Items,256.43,0.153885,,2019,9,3,
9,2019-10-01,All Items,257.155,0.282728,,2019,10,4,


In [29]:
# 데이터 요약
print("=" * 50)
print("데이터 요약")
print("=" * 50)
print(f"기간: {df['Date'].min()} ~ {df['Date'].max()}")
print(f"총 레코드 수: {len(df):,}")
print(f"카테고리 수: {df['Category'].nunique()}")
print(f"\n카테고리별 레코드:")
print(df['Category'].value_counts())
print(f"\n인플레이션 수준 분포:")
print(df['Inflation_Level'].value_counts())

데이터 요약
기간: 2019-01-01 00:00:00 ~ 2024-12-01 00:00:00
총 레코드 수: 648
카테고리 수: 9

카테고리별 레코드:
Category
All Items         72
Apparel           72
Education         72
Energy            72
Food              72
Housing           72
Medical Care      72
Recreation        72
Transportation    72
Name: count, dtype: int64

인플레이션 수준 분포:
Inflation_Level
Low          144
Moderate     140
N/A          108
High         107
Very High     81
Deflation     68
Name: count, dtype: int64


In [30]:
# CSV 파일로 저장
output_path = "/home/ubuntu/projects/sample-deep-insight-dev/self-hosted/data/fred/us-cpi-by-category.csv"

# 디렉토리 생성
import os
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# Date 포맷 정리
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")

# 소수점 정리
df["CPI_Value"] = df["CPI_Value"].round(3)
df["MoM_Change"] = df["MoM_Change"].round(3)
df["YoY_Change"] = df["YoY_Change"].round(3)

# 저장
df.to_csv(output_path, index=False)
print(f"저장 완료: {output_path}")
print(f"파일 크기: {os.path.getsize(output_path):,} bytes")

저장 완료: /home/ubuntu/projects/sample-deep-insight-dev/self-hosted/data/fred/us-cpi-by-category.csv
파일 크기: 36,206 bytes


In [31]:
# 최종 데이터 미리보기
print(df.to_string(index=False, max_rows=30))

      Date       Category  CPI_Value  MoM_Change  YoY_Change  Year  Month  Quarter Inflation_Level
2019-01-01      All Items    252.561         NaN         NaN  2019      1        1             N/A
2019-02-01      All Items    253.319       0.300         NaN  2019      2        1             N/A
2019-03-01      All Items    254.277       0.378         NaN  2019      3        1             N/A
2019-04-01      All Items    255.233       0.376         NaN  2019      4        2             N/A
2019-05-01      All Items    255.296       0.025         NaN  2019      5        2             N/A
2019-06-01      All Items    255.213      -0.033         NaN  2019      6        2             N/A
2019-07-01      All Items    255.802       0.231         NaN  2019      7        3             N/A
2019-08-01      All Items    256.036       0.091         NaN  2019      8        3             N/A
2019-09-01      All Items    256.430       0.154         NaN  2019      9        3             N/A
2019-10-01

In [32]:
# Column metadata JSON generation
import json

column_metadata = {
    "dataset_description": "US CPI by Category - Monthly Consumer Price Index data by category from FRED",
    "source": "Federal Reserve Economic Data (FRED)",
    "period": f"{START_DATE} ~ {END_DATE}",
    "categories": {
        "Time Information": {
            "description": "Date and time-related information",
            "columns": {
                "Date": "Reference month for the data (YYYY-MM-DD, first day of each month)",
                "Year": "Year (2019-2024)",
                "Month": "Month (1-12)",
                "Quarter": "Quarter (1: Jan-Mar, 2: Apr-Jun, 3: Jul-Sep, 4: Oct-Dec)"
            }
        },
        "Category Information": {
            "description": "CPI category classification",
            "columns": {
                "Category": "CPI category (Food, Energy, Housing, Apparel, Transportation, Medical Care, Recreation, Education, All Items)"
            }
        },
        "CPI Metrics": {
            "description": "Consumer Price Index related metrics",
            "columns": {
                "CPI_Value": "Consumer Price Index value (Index, base 1982-84=100)",
                "MoM_Change": "Month-over-Month change rate (%) - CPI change compared to previous month",
                "YoY_Change": "Year-over-Year change rate (%) - CPI change compared to same month last year (inflation rate)"
            }
        },
        "Classification": {
            "description": "Inflation level classification",
            "columns": {
                "Inflation_Level": "Inflation level (Deflation: <0%, Low: 0-2%, Moderate: 2-4%, High: 4-7%, Very High: >7%, N/A: insufficient data)"
            }
        }
    },
    "category_details": {
        "Food": "Food and beverages (CPIUFDSL)",
        "Energy": "Energy - gasoline, electricity, natural gas, etc. (CPIENGSL)",
        "Housing": "Housing - rent, shelter costs (CPIHOSSL)",
        "Apparel": "Apparel and footwear (CPIAPPSL)",
        "Transportation": "Transportation - vehicles, public transit, etc. (CPITRNSL)",
        "Medical Care": "Medical care services and commodities (CPIMEDSL)",
        "Recreation": "Recreation and entertainment (CPIRECSL)",
        "Education": "Education and communication (CPIEDUSL)",
        "All Items": "All items Consumer Price Index (CPIAUCSL)"
    },
    "analysis_suggestions": [
        "Compare inflation trends across categories",
        "Analyze price changes before and after COVID-19 (around 2020)",
        "Correlation analysis between Energy and Food prices",
        "Seasonality pattern analysis (monthly/quarterly)",
        "Category-wise impact analysis during inflation surge period (2021-2022)",
        "Volatility comparison across categories (MoM standard deviation)"
    ]
}

# Save JSON file
metadata_path = "/home/ubuntu/projects/sample-deep-insight-dev/self-hosted/data/fred/us_cpi_by_category_columns.json"
with open(metadata_path, "w", encoding="utf-8") as f:
    json.dump(column_metadata, f, ensure_ascii=False, indent=2)

print(f"Metadata saved: {metadata_path}")
print("\n" + "=" * 50)
print(json.dumps(column_metadata, ensure_ascii=False, indent=2))

Metadata saved: /home/ubuntu/projects/sample-deep-insight-dev/self-hosted/data/fred/us_cpi_by_category_columns.json

{
  "dataset_description": "US CPI by Category - Monthly Consumer Price Index data by category from FRED",
  "source": "Federal Reserve Economic Data (FRED)",
  "period": "2019-01-01 ~ 2024-12-01",
  "categories": {
    "Time Information": {
      "description": "Date and time-related information",
      "columns": {
        "Date": "Reference month for the data (YYYY-MM-DD, first day of each month)",
        "Year": "Year (2019-2024)",
        "Month": "Month (1-12)",
        "Quarter": "Quarter (1: Jan-Mar, 2: Apr-Jun, 3: Jul-Sep, 4: Oct-Dec)"
      }
    },
    "Category Information": {
      "description": "CPI category classification",
      "columns": {
        "Category": "CPI category (Food, Energy, Housing, Apparel, Transportation, Medical Care, Recreation, Education, All Items)"
      }
    },
    "CPI Metrics": {
      "description": "Consumer Price Index rela