In [3]:
import polars as pl

df = pl.read_parquet("transaction_fraud_data.parquet")

# Гипотеза 1: айфоны

In [10]:
# фильтрация high-risk транзакций
high_risk_df = df.filter(
    pl.col("is_high_risk_vendor") & (pl.col("device").is_not_null())
)

In [20]:
fraud_rates_by_device = (
    high_risk_df
    .with_columns([
        pl.when(pl.col("device") == "iOS App")
          .then("device")
          .otherwise(pl.lit("Other"))
          .alias("device_group")
    ])
    .group_by("device_group")
    .agg([
        pl.len().alias("total"),
        pl.col("is_fraud").sum().alias("frauds")
    ])
    .with_columns([
        (pl.col("frauds") / pl.col("total")).alias("fraud_rate")
    ])
)


fraud_rates_by_device


device_group,total,frauds,fraud_rate
str,u32,u32,f64
"""iOS App""",285759,35090,0.122796
"""Other""",1586204,339277,0.213892


# Гипотеза 2: выходные

In [41]:

weekend_offline_df = (
    df.filter(
        (pl.col("last_hour_activity").is_not_null())
    )
    .select([
        "is_weekend",
        "is_fraud",
        pl.col("last_hour_activity").struct.field("num_transactions").alias("activity")
    ])
)


In [42]:
# биннинг активности по группам
weekend_offline_df = weekend_offline_df.with_columns(
    pl.when(pl.col("activity") < 2).then(pl.lit("Low"))
    .when(pl.col("activity") < 5).then(pl.lit("Medium"))
    .otherwise(pl.lit("High")).alias("activity_level")
)


In [44]:
#fraud rate по выходным/невыходным и уровню активности
import plotly.express as px
agg_df = (
    weekend_offline_df
    .group_by(["is_weekend", "activity_level"])
    .agg([
        pl.len().alias("total"),
        pl.col("is_fraud").sum().alias("frauds")
    ])
    .with_columns([
        (pl.col("frauds") / pl.col("total")).alias("fraud_rate")
    ])
    .to_pandas()
)

fig = px.bar(
    agg_df,
    x="activity_level",
    y="fraud_rate",
    color="is_weekend",
    barmode="group",
    title="Fraud Rate by Activity Level & Weekend"
)
fig.show()
