### Test 1: Chi-Square Tests for Categorical Variables

In [3]:
import pandas as pd
df = pd.read_csv("../artifacts/processed_data.csv")


In [4]:
from scipy.stats import chi2_contingency
import pandas as pd

def run_chi_square_test(df, feature, target='fraud_label'):
    """
    Performs a Chi-Square test between a categorical feature and a binary target.
    
    Parameters:
    - df: pandas DataFrame
    - feature: column name of the categorical feature
    - target: column name of the binary target (default: 'fraud_label')
    
    Returns:
    - None (prints result)
    """
    print(f"Chi-Square Test: {feature} vs {target}")
    contingency_table = pd.crosstab(df[feature], df[target])
    chi2, p, dof, expected = chi2_contingency(contingency_table)

    print(f"Chi² Statistic: {chi2:.3f}")
    print(f"Degrees of Freedom: {dof}")
    print(f"p-value: {p:.5f}")

    if p < 0.05:
        print("Result: Statistically significant — feature is likely associated with the target.\n")
    else:
        print("Result: Not statistically significant — likely not useful on its own.\n")


In [5]:
run_chi_square_test(df, 'card_type')

Chi-Square Test: card_type vs fraud_label
Chi² Statistic: 349.383
Degrees of Freedom: 2
p-value: 0.00000
Result: Statistically significant — feature is likely associated with the target.



In [6]:
run_chi_square_test(df, 'card_brand')

Chi-Square Test: card_brand vs fraud_label
Chi² Statistic: 70.417
Degrees of Freedom: 3
p-value: 0.00000
Result: Statistically significant — feature is likely associated with the target.



In [7]:
run_chi_square_test(df, 'use_chip')

Chi-Square Test: use_chip vs fraud_label
Chi² Statistic: 38468.717
Degrees of Freedom: 2
p-value: 0.00000
Result: Statistically significant — feature is likely associated with the target.



### Test 1: Mann-Whitney Tests for Numerical Variables

In [12]:
from scipy.stats import mannwhitneyu

def run_mannwhitney_test(df, feature, target='fraud_label'):
    """
    Performs a Mann-Whitney U Test between a continuous feature and a binary target.
    """
    fraud_values = df[df[target] == 1][feature]
    legit_values = df[df[target] == 0][feature]

    stat, p = mannwhitneyu(fraud_values, legit_values, alternative='two-sided')

    print(f"Mann–Whitney U Test: {feature} vs {target}")
    print(f"U-statistic: {stat:.3f}")
    print(f"p-value: {p:.5f}")

    if p < 0.05:
        print("Result: Statistically significant — distributions differ across fraud and legit groups.\n")
    else:
        print("Result: Not statistically significant — no strong distributional difference.\n")


In [13]:
import numpy as np

df['amount_clean'] = df['amount'].replace('[\$,]', '', regex=True).astype(float)
df['log_amount'] = df['amount_clean'].apply(lambda x: np.log1p(x) if x > 0 else 0)

run_mannwhitney_test(df, 'log_amount')


Mann–Whitney U Test: log_amount vs fraud_label
U-statistic: 78209416656.500
p-value: 0.00000
Result: Statistically significant — distributions differ across fraud and legit groups.



In [14]:
run_mannwhitney_test(df, 'credit_score')

Mann–Whitney U Test: credit_score vs fraud_label
U-statistic: 59778827666.500
p-value: 0.13788
Result: Not statistically significant — no strong distributional difference.



In [15]:
df['yearly_income_clean'] = df['yearly_income'].replace('[\$,]', '', regex=True).astype(float)
df['log_income'] = df['yearly_income_clean'].apply(lambda x: np.log1p(x) if x > 0 else 0)

run_mannwhitney_test(df, 'log_income')


Mann–Whitney U Test: log_income vs fraud_label
U-statistic: 55668373889.500
p-value: 0.00000
Result: Statistically significant — distributions differ across fraud and legit groups.



In [16]:
run_mannwhitney_test(df, 'num_credit_cards')


Mann–Whitney U Test: num_credit_cards vs fraud_label
U-statistic: 64757987802.000
p-value: 0.00000
Result: Statistically significant — distributions differ across fraud and legit groups.



#### The number of credit cards owned by a user is statistically associated with fraud risk (p < 0.00001). This suggests that card ownership patterns may indicate behavioral red flags, such as synthetic identities or unusually low/high card usage.


In [17]:
run_mannwhitney_test(df, 'num_cards_issued')


Mann–Whitney U Test: num_cards_issued vs fraud_label
U-statistic: 59193690507.000
p-value: 0.57537
Result: Not statistically significant — no strong distributional difference.



#### Mann–Whitney U tests revealed that `log_amount`, `log_income`, and `num_credit_cards` are significantly different between fraudulent and legitimate transactions (p < 0.00001). These features capture meaningful behavioral and financial patterns. Other features such as `credit_score` and `num_cards_issued` did not show statistically significant differences but may still add marginal value when combined with other variables.
