In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import sys

sys.path.append("..")
from src.utils.sql_util import (
    get_db_conn,
    get_statedata,
    get_center_info,
    get_call_center_network,
    is_center_backup,
)

%load_ext autoreload
%autoreload 2

In [None]:
def get_data(query, conn, **kwargs):
    # Read SQL query into a DataFrame.
    return pd.read_sql_query(query, conn, **kwargs)

## Basic Count

- Number of rows in the dataset
- Number of columns in the dataset
- Number of unique exchange code
- Minimun and maximum datechange
- Number of unique datechange 

In [None]:
# Get db connection
db_conn = get_db_conn()

In [None]:
# Number of rows
query = """
    select 
        count(*)
    from raw.vibrant_routingtable_202206031725 vr ;
"""
result = get_data(query, db_conn)
print(f"Number of rows: {result['count'][0]}")

In [None]:
# Number of columns
query = """
    select 
        count(*)
    from information_schema.columns 
    where table_name = 'vibrant_routingtable_202206031725' ;
"""

result = get_data(query, db_conn)
print(f"Number of columns: {result['count'][0]}")

In [None]:
# Number of distinct exchange code
query = """
    select 
        count(distinct npanxx)
    from raw.vibrant_routingtable_202206031725 vr ;
"""

result = get_data(query, db_conn)
print(f"Number of distinct exchange code: {result['count'][0]}")

**Comment:** all the rows in the dataset are distinct

In [None]:
# Minimum and maximum datechange
query = """
    select 
        min(datechanged), 
        max(datechanged)
    from raw.vibrant_routingtable_202206031725 vr ;
"""
result = get_data(query, db_conn)
print(f"Minimum datechange: {result['min'][0]}")
print(f"Maximum datechange: {result['max'][0]}")

In [None]:
# Number of unique datechange
query = """
    select 
        count(distinct datechanged::date)
    from raw.vibrant_routingtable_202206031725 vr 
"""
result = get_data(query, db_conn)
print(f"Unique datechange: {result['count'][0]}")

## Routing Table Call Center Analysis

- List of columns
- Distribution of exchange codes across the call centers
- Total distinct exchange code in the routing table
- Assert 1-1 mapping between call center and termination numbers
- Distinct center roles
- Distribution of center1 roles
- Unique combination of center roles

In [None]:
# List of columns
query = """
    select 
        column_name 
    from information_schema.columns
    where table_name = 'vibrant_routingtable_202206031725' ;
"""
result = get_data(query, db_conn)
result["column_name"].values

In [None]:
# Distribution of exchange codes across the call centers
query = """
    select 
        count(distinct center1id) as count_distinct_center1id,
        count(distinct center2id) as count_distinct_center2id,
        count(distinct center3id) as count_distinct_center3id,
        count(distinct center4id) as count_distinct_center4id
    from raw.vibrant_routingtable_202206031725 vr;
"""

result = get_data(query, db_conn)
display(result.T)

**Comment:** 
- There are no exchange codes with center4id
- Call center and exchange code is not a one-to-one mapping i.e each call center can handle multiple exchange codes

In [None]:
# Total distinct exchange code in the routing table

query = """
    select 
        distinct {}
    from raw.vibrant_routingtable_202206031725 vr ;
"""

results = []
unique_results = set()
for center_id in ["center1id", "center2id", "center3id"]:
    result = get_data(query.format(center_id), db_conn).dropna()
    results.extend(result[center_id])

print(f"Number of distinct call centers: {len(set(results))}")

**Comment:** This includes `None` value (dropped). `None` values are encountered in columns where the calls were routed to the national backup centers. 

In [None]:
# Assert 1-1 mapping between call center and termination numbers

query = """
    select 
        count(distinct center1id) as count_distinct_center1id,
        count(distinct center1termination) as count_distinct_center1termination,
        count(distinct center2id) as count_distinct_center2id,
        count(distinct center2termination) as count_distinct_center2termination,
        count(distinct center3id) as count_distinct_center3id,
        count(distinct center3termination) as count_distinct_center3termination
    from raw.vibrant_routingtable_202206031725 vr ;
"""

result = get_data(query, db_conn)
assert (
    result["count_distinct_center1id"][0]
    == result["count_distinct_center1termination"][0]
)
assert (
    result["count_distinct_center2id"][0]
    == result["count_distinct_center2termination"][0]
)
assert (
    result["count_distinct_center3id"][0]
    == result["count_distinct_center3termination"][0]
)

In [None]:
# Distinct of center roles
query = """
    select 
        count(distinct center1role) as distinct_center1role,
        count(distinct center2role) as distinct_center2role,
        count(distinct center3role) as distinct_center3role
    from raw.vibrant_routingtable_202206031725 vr ;
"""

result = get_data(query, db_conn)
display(result.T)

In [None]:
# Distribution of center1 roles
query = """
    select 
        center1role , 
        count(*)
    from raw.vibrant_routingtable_202206031725 vr 
    group by 1
    order by 2 desc;
"""

result = get_data(query, db_conn)
display(result)

**Comment:** There are 27,763 exchange code with `None` values in their center1 i.e they were routed directly to backup center. 

In [None]:
# Unique combination of center roles
query = """
    select 
        center1role, 
        center2role,
        center3role, 
        count(*)
    from raw.vibrant_routingtable_202206031725 vr 
    group by 1, 2, 3
    order by 4 desc;
"""

result = get_data(query, db_conn)
display(result)

We should clarify the order. The order looks like `PrimaryFIPSCountyCode > PrimaryZipCode  > PrimaryAreaCode > PrimaryStateCode > BackupFIPSCountyCode > BackupAreaCode > BackupStateCode`. What is the `PrimaryZipCode` ?

## Digging Deep: invesitigative questions

### Exchange Code

- Number of exchange code not in call-center table
- Number of exchange number that should have been directly routed to National-Backup
- Out of the exchange code that should have been directly routed to National-Backup, how many exist in the call-center table?
- Out of the exchange code that should have been directly routed to National-Backup, how many exist in the call-center table and is terminated?
- Out of the exchange code that should have been directly routed to National-Backup and exists in the call-center table, what is the call network distribution?

In [None]:
# Number of exchange code not in call-center table
query = """
    select 
       count(distinct npanxx)
    from raw.vibrant_routingtable_202206031725 vr 
    left join raw.vibrant_centers_calls_202206031630 vcc 
    on vr.npanxx = vcc.caller_npanxx 
    where vcc.caller_npanxx isnull ;
"""

result = get_data(query, db_conn)
print(result["count"][0])

In [None]:
# Number of exchange number that should have been directly routed to National-Backup
query = """
    select 
        count(npanxx) 
    from raw.vibrant_routingtable_202206031725 vr 
    where center1id isnull;
"""

result = get_data(query, db_conn)
print(result["count"][0])

In [None]:
# Out of the exchange code that should have been directly routed to National-Backup, how many exist in the call-center table?

query = """
    select 
        count(distinct vr.npanxx),
        count(distinct vcc.caller_npanxx)
    from raw.vibrant_routingtable_202206031725 vr 
    inner join raw.vibrant_centers_calls_202206031630 vcc 
    on vr.npanxx = vcc.caller_npanxx 
    where center1id isnull  ;
"""

result = get_data(query, db_conn)
display(result)

In [None]:
# Out of the exchange code that should have been directly routed to National-Backup, how many exist in the call-center table and is terminated?

query = """
    select 
        count(distinct vr.npanxx)
    from raw.vibrant_routingtable_202206031725 vr 
    inner join raw.vibrant_centers_calls_202206031630 vcc 
    on vr.npanxx = vcc.caller_npanxx 
    where center1id isnull 
    and terminated=1;
"""

result = get_data(query, db_conn)
display(result)

In [None]:
# Out of the exchange code that should have been directly routed to National-Backup and exists in the call-center table, what is the call network distribution?


def exchange_code_distribution_per_subnetwork_with_null_in_center1(network):
    query = """
        select 
            count(distinct caller_npanxx) as {}
        from raw.vibrant_routingtable_202206031725 vr 
        inner join raw.vibrant_centers_calls_202206031630 vcc 
        on vr.npanxx = vcc.caller_npanxx 
        where center1id isnull 
        and terminated = 1
        and {} = 1;
    """

    result = get_data(query.format(network, network), db_conn)
    return result


results = []
networks = [
    "network_is_ll",
    "network_is_va",
    "network_is_ll_spanish",
    "network_is_ll_backup",
]
for network in networks:
    result = exchange_code_distribution_per_subnetwork_with_null_in_center1(network)
    results.append(result)
results = pd.concat(results, axis=1)
display(results)

**Comment:** Not all the exchange codes were routed directly to the National-Backup subnetwork.

In [None]:
# sanity check.
query = """
        select 
            network,
            count(distinct caller_npanxx)
        from raw.vibrant_routingtable_202206031725 vr 
        inner join raw.vibrant_centers_calls_202206031630 vcc 
        on vr.npanxx = vcc.caller_npanxx 
        where center1id isnull 
        and terminated = 1
        group by 1;
    """

result = get_data(query, db_conn)
result

### Call Centers

- What is the call centers distribution across the routing logic?
- How many call centers are never the first in the routing logic? 
- List of call centers are never the first in the routing logic? 
- Where are the call centers are never the first in the routing logic located? 
- How many of the call centers that are never the first in the routing logic also National-Backup centers? 
- List of the call centers that are never the first in the routing logic and are also National-Backup center 

In [None]:
# What is the call centers distribution across the routing logic?
query = """
    select 
        count(distinct  center1id) as count_center1id,
        count(distinct  center2id) as count_center2id,
        count(distinct  center3id) as count_center3id,
        count(distinct  center4id) as count_center4id 
    from raw.vibrant_routingtable_202206031725 vr ;
"""

result = get_data(query, db_conn)
result

In [None]:
query = """
    select 
        distinct {}
    from raw.vibrant_routingtable_202206031725 vr ;
"""

results = []
center_ids = ["center1id", "center2id", "center3id"]
all_center_ids = []
for center_id in center_ids:
    result = get_data(query.format(center_id), db_conn)
    result = result[~result[center_id].isnull()]
    results.append(result)
    all_center_ids.extend(result[center_id].values.tolist())

results = pd.concat(results, axis=1)
print(len(set(all_center_ids)))

In [None]:
# How many call centers are never the first in the routing logic?
len(set(all_center_ids) - set(results["center1id"]))

In [None]:
# List of call centers are never the first in the routing logic?
call_centers_not_center1 = set(all_center_ids) - set(results["center1id"])
call_centers_not_center1

In [None]:
# Where are the call centers are never the first in the routing logic located?
call_center_info = []
for call_center_id in call_centers_not_center1:
    result = get_center_info(db_conn, call_center_id)
    call_center_info.append(result)

call_center_info = pd.concat(call_center_info)
call_center_info

In [None]:
# How many of the call centers that are never the first in the routing logic also National-Backup centers?
call_center_is_backup = []
call_center_that_is_also_backup = []
for call_center_id in call_centers_not_center1:
    result = is_center_backup(db_conn, call_center_id)
    call_center_is_backup.append(result)
    if result:
        call_center_that_is_also_backup.append(call_center_id)

sum(call_center_is_backup), len(call_centers_not_center1)

In [None]:
# List of the call centers that are never the first in the routing logic and are also National-Backup center
call_center_that_is_also_backup

**Comment:** Only 1 center out of 14 centers that are never center 1 is also a National-Backup center. Most of these centers are from Newyork, current hypothesis is that NewYork has many local call centers, so these call centers doesn't get to be first. Probably worth verifying.