# Fraud Transactions Analysis  
## Step 4 - Feature Engineering

Objectives:
- Create analytical features
- Encode useful flags
- Prepare final dataset

In [1]:
import polars as pl

In [2]:
df = pl.read_parquet("../data/clean.parquet")
df.shape

(6362620, 13)

In [3]:
df = df.with_columns(
    (pl.col("amount") > 200000)
    .cast(pl.Int8)
    .alias("high_amount_flag")
)

In [4]:
df = df.with_columns(
    (pl.col("newbalanceDest") == 0)
    .cast(pl.Int8)
    .alias("empty_dest_flag")
)

In [5]:
df = df.with_columns([
    (pl.col("type") == "TRANSFER").cast(pl.Int8).alias("is_transfer"),
    (pl.col("type") == "CASH_OUT").cast(pl.Int8).alias("is_cash_out")
])

In [6]:
df = df.with_columns([
    (pl.col("step") % 24).alias("hour"),
    (pl.col("step") // 24).alias("day")
])

We engineered features reflecting:
- Amount risk
- Destination behavior
- Transaction nature
- Time context

In [7]:
df.select([
    "amount",
    "high_amount_flag",
    "empty_dest_flag",
    "is_transfer",
    "is_cash_out",
    "hour",
    "day"
]).head()

amount,high_amount_flag,empty_dest_flag,is_transfer,is_cash_out,hour,day
f64,i8,i8,i8,i8,i64,i64
72967.59,0,0,0,1,12,5
381563.82,1,1,0,0,18,12
207741.19,1,0,0,1,9,13
307.43,0,1,0,0,22,0
85095.26,0,0,0,0,13,7


In [8]:
df.write_parquet("../data/final.parquet")
print("Final dataset saved ✔️")

Final dataset saved ✔️
