# **Fraud-Assessment**
### A high-stakes fraud risk assessment system combining statistical models, rule-based evidence, and AI-assisted reasoning for defensible decision-making.


## **01_Imports**

In [1]:
import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## **Data Understanding**

In [None]:
df = pd.read_csv('/kaggle/input/fraud-detection-data/train_transaction.csv')
df.info()

### **Understanding Transactions.csv**

In [None]:
transactions = pd.read_csv('/kaggle/input/fraud-detection-data/train_transaction.csv')
transactions.info()

In [None]:
transactions.head()

In [None]:
transactions.describe()

In [None]:
for col in transactions.columns[:56]:
    print(col)

In [None]:
transactions[['P_emaildomain', 'R_emaildomain']].head()

In [None]:
transactions[["card1", "card2", "card3", "card4", "card5", "card6"]].head(10)

In [None]:
transactions['ProductCD'].unique()

In [None]:
cols = [f"C{i}" for i in range(1, 15)]
transactions[cols].head(10)

In [None]:
cols = [f"D{i}" for i in range(1, 16)]
transactions[cols].head(10)

In [None]:
cols = [f"M{i}" for i in range(1,10)]
transactions[cols].head(10)

### **Understanding identity.csv**

In [None]:
identity = pd.read_csv('/kaggle/input/fraud-detection-data/train_identity.csv')
identity.info()

In [None]:
cols = [f"id_{i:02d}" for i in range(1, 11)]
identity[cols].head(10)

In [None]:
cols = [f"id_{i:02d}" for i in range(11, 21)]
identity[cols].head(10)

In [None]:
cols = [f"id_{i:02d}" for i in range(21, 31)]
identity[cols].head(10)

In [None]:
cols = [f"id_{i:02d}" for i in range(31, 39)]
identity[cols].head(10)

In [None]:
identity['id_15'].head(20)

In [None]:
identity['is_identity_seen_before'] = identity['id_15']

In [None]:
identity['id_16'].head(20)

In [None]:
identity['id_30'].head(20)

In [None]:
identity['id_30'].value_counts()

In [None]:
for k, v in identity['id_30'].value_counts().items():
    print(f"{k}: {v}")


In [None]:
def normalize_os(os_str):
    if pd.isna(os_str):
        return "Other"

    s = os_str.lower()

    if "windows" in s:
        return "Windows"
    elif "ios" in s:
        return "iOS"
    elif "android" in s:
        return "Android"
    elif "mac" in s:
        return "macOS"
    elif "linux" in s:
        return "Linux"
    else:
        return "Other"


identity["user_os"] = identity["id_30"].apply(normalize_os)

In [None]:
identity['user_os'].value_counts()

In [None]:
identity['id_31'].head()

In [None]:
for k, v in identity['id_31'].value_counts().items():
    print(f"{k}: {v}")

In [None]:
def normalize_browser(browser):
    if pd.isna(browser):
        return "Other"

    s = browser.lower()

    if "chrome" in s or "chromium" in s:
        return "Chrome"
    elif "safari" in s:
        return "Safari"
    elif "firefox" in s:
        return "Firefox"
    elif "edge" in s:
        return "Edge"
    elif "ie" in s or "internet explorer" in s:
        return "IE"
    elif "samsung" in s:
        return "Samsung"
    elif "opera" in s:
        return "Opera"
    elif "webview" in s or "android browser" in s:
        return "Android WebView"
    else:
        return "Other"

In [None]:
identity["user_browser"] = identity["id_31"].apply(normalize_browser)
identity['user_browser'].value_counts()

In [None]:
identity[['is_identity_seen_before','id_34']].head()

In [None]:
identity['id_34'].head(10)

In [None]:
cols = [f"id_{i:02d}" for i in range(1, 39)]
identity = identity.drop(cols, axis=1)

In [None]:
identity = identity.drop('DeviceInfo', axis=1)

In [None]:
identity.info()

In [None]:
identity['DeviceType'].value_counts()

In [None]:
identity.to_csv('train_identity_v2.0.csv', index=False)