# Anomaly Detection

- The project is applied in claims in Motor LoB to detect possible "fraud" or anomalies.

## Set-up 

### General libraries

In [1]:
import os
import sys
import datetime
import pandas as pd
import numpy as np

from os.path import join

pd.set_option("display.max_columns", None)
pd.options.display.float_format = '{:,.2f}'.format### General libraries

### Logging

In [2]:
import logging

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

### Load enviroment variables

In [3]:
from dotenv import load_dotenv
load_dotenv('.env')

code_root = os.environ['CODE_ROOT']
cfg_path = os.environ['CFG_PATH']
data_root = os.environ['DATA_ROOT']

sys.path.insert(0, code_root)

### Specific libraries

In [4]:
from src.utils import get_data_dict, get_config_dict
from src.feature_selection.functions import get_features

### Environment

from src.auxiliar.blob import DataBackupExporter

CONNECTION_STRING = os.environ.get("CONNECTION_STRING", None) 
is_azure = True if CONNECTION_STRING is not None else False

if is_azure:
    client = DataBackupExporter()
else:
    pass

### Read data

In [5]:
config = get_config_dict(code_root=code_root, cfg_path=cfg_path, usage="general")
config

2023-11-09 15:13:00,613 - Loading parameters from /Users/allianz/workspace_github_pers/Anomaly_detection/conf/param_setup/param_setup_notebooks.yaml


{'close_date': datetime.date(2021, 12, 31),
 'code_root': '/Users/allianz/workspace_github_pers/Anomaly_detection'}

In [6]:
data_dict = get_data_dict(config)

2023-11-09 15:13:00,620 - Using file for data dict.


In [7]:
feature_columns, categorical_features, numerical_features = get_features(data_dict)

2023-11-09 15:13:00,628 - Using model None
2023-11-09 15:13:00,630 - Features: 20.
2023-11-09 15:13:00,631 - Categorical features: 16.
2023-11-09 15:13:00,631 - Numerical features: 4.


In [8]:
feature_columns

['weekday_surv_sin',
 'weekday_declaration',
 'd_surv_sin_is_weekend',
 'd_dcl_is_weekend',
 'reporting_delay_in_days',
 'contract_age_in_days',
 'is_closed',
 'is_serious_sinister',
 'has_judiciary_procedure',
 'witness_flag',
 'C_CAU',
 'C_FAM_PROD',
 'C_NAT_SIN',
 'C_ET',
 'C_FORM',
 'RESP_CIE',
 'C_APPLN_MALUS',
 'CNT_TY_GES',
 'IND_PMT_DIR',
 'C_ORGN_OUVT']

In [9]:
categorical_features

['d_surv_sin_is_weekend',
 'd_dcl_is_weekend',
 'is_closed',
 'is_serious_sinister',
 'has_judiciary_procedure',
 'witness_flag',
 'C_CAU',
 'C_FAM_PROD',
 'C_NAT_SIN',
 'C_ET',
 'C_FORM',
 'RESP_CIE',
 'C_APPLN_MALUS',
 'CNT_TY_GES',
 'IND_PMT_DIR',
 'C_ORGN_OUVT']

In [10]:
numerical_features

['weekday_surv_sin',
 'weekday_declaration',
 'reporting_delay_in_days',
 'contract_age_in_days']