In [2]:
import pandas as pd

raw = [
    {"date": "2026-01-01", "menu": "Americano", "price": "4500원", "qty": "2", "paid": "TRUE"},
    {"date": "2026/01/01", "menu": "Latte",     "price": "5,000",  "qty": 1,   "paid": "True"},
    {"date": "2026-01-02", "menu": "Latte",     "price": None,     "qty": 2,   "paid": "FALSE"},
    {"date": "2026-01-03", "menu": "Mocha",     "price": "5500",   "qty": None,"paid": True},
]

df = pd.DataFrame(raw)
df

Unnamed: 0,date,menu,price,qty,paid
0,2026-01-01,Americano,4500원,2.0,True
1,2026/01/01,Latte,5000,1.0,True
2,2026-01-02,Latte,,2.0,False
3,2026-01-03,Mocha,5500,,True


In [3]:
# 1) price 전처리: "원", "," 제거 → 숫자
df["price_num"] = (
    df["price"]
    .astype("string")
    .str.replace(",", "", regex=False)
    .str.replace("원", "", regex=False)
)
df["price_num"] = pd.to_numeric(df["price_num"], errors="coerce")
df

Unnamed: 0,date,menu,price,qty,paid,price_num
0,2026-01-01,Americano,4500원,2.0,True,4500.0
1,2026/01/01,Latte,5000,1.0,True,5000.0
2,2026-01-02,Latte,,2.0,False,
3,2026-01-03,Mocha,5500,,True,5500.0


In [4]:
# 2) qty 전처리: 문자열이어도 숫자로
df["qty_num"] = pd.to_numeric(df["qty"], errors="coerce")
df

Unnamed: 0,date,menu,price,qty,paid,price_num,qty_num
0,2026-01-01,Americano,4500원,2.0,True,4500.0,2.0
1,2026/01/01,Latte,5000,1.0,True,5000.0,1.0
2,2026-01-02,Latte,,2.0,False,,2.0
3,2026-01-03,Mocha,5500,,True,5500.0,


In [5]:
# 3) 파생 피처(derived feature) 생성
df["sales"] = df["price_num"] * df["qty_num"]
df

Unnamed: 0,date,menu,price,qty,paid,price_num,qty_num,sales
0,2026-01-01,Americano,4500원,2.0,True,4500.0,2.0,9000.0
1,2026/01/01,Latte,5000,1.0,True,5000.0,1.0,5000.0
2,2026-01-02,Latte,,2.0,False,,2.0,
3,2026-01-03,Mocha,5500,,True,5500.0,,


In [7]:
print(df[["date", "menu", "price", "qty", "price_num", "qty_num", "sales"]])
df

         date       menu  price   qty  price_num  qty_num   sales
0  2026-01-01  Americano  4500원     2       4500      2.0  9000.0
1  2026/01/01      Latte  5,000     1       5000      1.0  5000.0
2  2026-01-02      Latte   None     2       <NA>      2.0    <NA>
3  2026-01-03      Mocha   5500  None       5500      NaN    <NA>


Unnamed: 0,date,menu,price,qty,paid,price_num,qty_num,sales
0,2026-01-01,Americano,4500원,2.0,True,4500.0,2.0,9000.0
1,2026/01/01,Latte,5000,1.0,True,5000.0,1.0,5000.0
2,2026-01-02,Latte,,2.0,False,,2.0,
3,2026-01-03,Mocha,5500,,True,5500.0,,
