# Script 01: Identifying Fraud Risk Accounts

In [None]:
import pandas as pd
import numpy as np
from datetime import date

from scipy import stats
get_ipython().magic(u'config IPCompleter.greedy=True')

In [None]:
## Testing AWS Connectivity
from contextlib import closing


import psycopg2
import simplejson
import sys
reload(sys)
sys.setdefaultencoding('utf8')

DEFAULT_DB = 'xxx'
DEFAULT_HOST = 'xxxx'
DEFAULT_PORT = 1111


class PsycopgConnector:
    '''
    A database connector that uses Psycopg to connect to Redshift.

    How to play:

        psy_conn = PsycopgConnector(username, password)
        df = psy_conn.run_query(sql=sql, return_data=True)

    NOTE: This class commits queries to redshift if return_data=False.
    This means INSERT, DROP, TRUNCATE, etc. all work against the DB.
    '''

    def __init__(
        self,
        username=None,
        password=None,
        db=DEFAULT_DB,
        host=DEFAULT_HOST,
        port=DEFAULT_PORT,
    ):

        self.db = DEFAULT_DB
        self.host = DEFAULT_HOST
        self.port = DEFAULT_PORT

        self.username = username
        self.password = password

    def _get_connection(self):

        self.conn = psycopg2.connect(
            dbname=self.db,
            user=self.username,
            password=self.password,
            host=self.host,
            port=self.port
        )

        return self.conn

    def run_query(self, sql, return_data=False):

        with closing(self._get_connection()) as conn:
            with conn, conn.cursor() as cur:
                if return_data:
                    return pd.read_sql(sql=sql, con=conn)
                else:
                    cur.execute(sql)
                    

# Read the database's credentials file 
with open("credentials.json.nogit") as fh:
    creds = simplejson.loads(fh.read())
    
username = creds.get("user_name")
password = creds.get("password")

pig = PsycopgConnector(username, password)

## Number of Fraud Risk Accounts (N)

Please enter the nubmer **(N)** of Fraud Risk Accounts that you want to check today.

In [None]:
#------------------------------------------------------------
# PLEASE READ AND ANSWER THIS QUESTION
#
# Question: How many accounts you want to check today?
# Answer: Number of Accounts: N
#-----------------------------------------------------------
N = 25
#-------------------------------------------------------------

## Computing Fraud Risk Score (FRS)

This following script will query new account (if already not labeled) those are signed-up within last 91 days (from today), and compute the corresponding **Fraud Risk Score (FRS)**.

In [None]:
## Pull new user accounts and compute Fraud Risk Score (FRS)
%run ./fraud_risk_score_computing.ipynb

In [None]:
## Import new user accounts and corresponding Fraud Risk Score (FRS)
path = "/Users/dwahid/Documents/GitHub/fraud_detection/data/fraud_risk_acc_to_be_labeled_all_features/"

## Without date
# file_name = "new_fraud_risk_acc_tbl_all_features"
# data = path + file_name + ".tsv"

## With date
file_name = "new_fraud_risk_acc_tbl_all_features_"
today = str(date.today())
data = path + file_name + today + ".tsv"

df_fra_all_features = pd.read_csv(data, sep="\t")   # FRA - Fraud Risk Accounts

In [None]:
df_fra_all_features.tail()

In [None]:
df_fra_all_features.shape

## Return Top N Fraud Risk Accounts for Support Labeling
It will return a list of top N FRA accounts for support labeleing. 

In [None]:
## Top N accounts for support team reporting
df_fra_topN_all_features = df_fra_all_features.head(N)

## Selecting colums for support team reporting
df_fra_topN_for_support = df_fra_topN_all_features[['systemid', 'admin_email', 'signup_date', 
                                                   'effective_date', 'days_on_platform', 'fraud_label', 'support_note']]

## Path and file name for user accounts for need to be labeled by support team
path = "/Users/dwahid/Documents/GitHub/fraud_detection/data/fraud_risk_acc_to_be_labeled_for_support/"

## Without date
# file_name = "new_fraud_risk_acc_tbl_for_support"
# path_fra_topN_for_support = path + file_name + ".csv"

## With date 
file_name = "new_fraud_risk_acc_tbl_for_support_"
today = str(date.today())
path_fra_topN_for_support = path + file_name + today + ".csv"

## Save the user accounts for need to be labeled by support team
df_fra_topN_for_support.to_csv(path_fra_topN_for_support, sep=",", index=False)

In [None]:
df_fra_topN_for_support.head(25)

In [None]:
df_fra_topN_for_support.shape

In [None]:
"You data file for Support labeling is ready"