In [1]:

import os
import json
import functools
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
# Step 1: Read the config.json file
with open("D:\\Healthscore Codes\\containment_metric - Copy.json", 'r') as file:
    config_data = json.load(file)
    print(config_data)

{'features': [{'feature_name': 'SMART HELP', 'feature_id': '15', 'metrics': [{'Primary_intent': 'Equipment Support', 'Primary_intent_detail': 'SmartHelp', 'cont_display_metric_name': 'Smarthelp_containment_rate', 'cont_metric_seq_num': '1'}, {'Primary_intent': 'Equipment Support', 'Primary_intent_detail': 'PnP', 'cont_display_metric_name': 'PnP_containment_rate', 'cont_metric_seq_num': '2'}]}, {'feature_name': 'BIL LING', 'feature_id': '1', 'metrics': [{'Primary_intent': 'Billing', 'Primary_intent_detail': 'Billing Concern', 'cont_display_metric_name': 'Billing_containment_rate', 'cont_metric_seq_num': '1'}, {'Primary_intent': 'Billing', 'Primary_intent_detail': 'Billing General', 'cont_display_metric_name': 'Billing_containment_rate', 'cont_metric_seq_num': '2'}, {'Primary_intent': 'Billing', 'Primary_intent_detail': 'Billing Preferences', 'cont_display_metric_name': 'Billing_containment_rate', 'cont_metric_seq_num': '3'}, {'Primary_intent': 'Billing', 'Primary_intent_detail': 'EasyPa

In [3]:
def create_dataframe_from_json(data):
    # Flatten the JSON and create a list of dictionaries
    flattened_data = []

    for feature in data["features"]:
        feature_name = feature["feature_name"]
        feature_id = feature["feature_id"]

        for metric in feature["metrics"]:
            flattened_data.append({
                "hs_feature_name": feature_name,
                "hs_feature_id": feature_id,
                "primary_intent": metric["Primary_intent"],
                "primary_intent_detail": metric["Primary_intent_detail"],
                "cont_display_metric_name": metric["cont_display_metric_name"],
                "cont_metric_seq_num": metric["cont_metric_seq_num"]
            })

    # Create the DataFrame
    df = pd.DataFrame(flattened_data)

    # Add the 'Containment_metric_id' column as a sequence number
    df["containment_metric_id"] = range(1, len(df) + 1)
    df['create_dt'] = pd.to_datetime('today').normalize().date()

    # Reorder the columns if needed
    df = df[["containment_metric_id", "hs_feature_name", "hs_feature_id", "primary_intent", "primary_intent_detail", "cont_display_metric_name", "cont_metric_seq_num", "create_dt"]]

    return df

In [4]:
# Call the function with JSON data
df = create_dataframe_from_json(config_data)

# Print the DataFrame
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 500)
print(df.head())

   containment_metric_id hs_feature_name hs_feature_id     primary_intent  \
0                      1      SMART HELP            15  Equipment Support   
1                      2      SMART HELP            15  Equipment Support   
2                      3        BIL LING             1            Billing   
3                      4        BIL LING             1            Billing   
4                      5        BIL LING             1            Billing   

  primary_intent_detail    cont_display_metric_name cont_metric_seq_num  \
0             SmartHelp  Smarthelp_containment_rate                   1   
1                   PnP        PnP_containment_rate                   2   
2       Billing Concern    Billing_containment_rate                   1   
3       Billing General    Billing_containment_rate                   2   
4   Billing Preferences    Billing_containment_rate                   3   

    create_dt  
0  2025-02-18  
1  2025-02-18  
2  2025-02-18  
3  2025-02-18  
4  202

In [5]:
df.shape

(14, 8)

In [5]:
# Get unique values from the columns
primary_intent_values = df['primary_intent'].unique()
primary_intent_detail_values = df['primary_intent_detail'].unique()

In [6]:
print(primary_intent_values)
print(primary_intent_detail_values)

['Equipment Support']
['SmartHelp' 'PnP']


In [7]:
# Convert arrays to strings formatted for SQL IN clauses
primary_intent_str = "', '".join(primary_intent_values)
primary_intent_detail_str = "', '".join(primary_intent_detail_values)

In [8]:
print(primary_intent_str)
print(primary_intent_detail_str)

Equipment Support
SmartHelp', 'PnP


In [9]:
# Build the dynamic query
second_table_query = f"""
SELECT 		
    primary_intent_detail,  		
    CAST(contact_dt AS DATE) AS contact_dt, 		
    COUNT(DISTINCT sub_contact_id) AS sub_contact_id, 		
    COUNT(DISTINCT CASE WHEN selfservice_containment = 1 THEN sub_contact_id END) AS contained, 		
    CASE 		
        WHEN COUNT(DISTINCT sub_contact_id) > 0 THEN		
            ROUND(CAST(SUM(CASE WHEN selfservice_containment = 1 THEN 1 ELSE 0 END) AS DOUBLE) 		
            / COUNT(DISTINCT sub_contact_id) * 100, 2)		
        ELSE		
0		
    END AS containment_rate		
FROM 		
    ota_data_assets_temp.omni_intent_cntct_fact 		
WHERE 		
    CAST(contact_dt AS DATE) BETWEEN date_add('day', -90, DATE '2024-08-27') AND DATE '2024-08-27' 		
    AND primary_intent IN ('{primary_intent_str}')
    AND initial_channel = 'CoxApp'		
    AND lob = 'R'		
    AND primary_intent_detail IN ('{primary_intent_detail_str}')
GROUP BY 		
    primary_intent_detail, contact_dt		
ORDER BY 		
    contact_dt DESC
"""


In [10]:
print(second_table_query)


SELECT 		
    primary_intent_detail,  		
    CAST(contact_dt AS DATE) AS contact_dt, 		
    COUNT(DISTINCT sub_contact_id) AS sub_contact_id, 		
    COUNT(DISTINCT CASE WHEN selfservice_containment = 1 THEN sub_contact_id END) AS contained, 		
    CASE 		
        WHEN COUNT(DISTINCT sub_contact_id) > 0 THEN		
            ROUND(CAST(SUM(CASE WHEN selfservice_containment = 1 THEN 1 ELSE 0 END) AS DOUBLE) 		
            / COUNT(DISTINCT sub_contact_id) * 100, 2)		
        ELSE		
0		
    END AS containment_rate		
FROM 		
    ota_data_assets_temp.omni_intent_cntct_fact 		
WHERE 		
    CAST(contact_dt AS DATE) BETWEEN date_add('day', -90, DATE '2024-08-27') AND DATE '2024-08-27' 		
    AND primary_intent IN ('Equipment Support')
    AND initial_channel = 'CoxApp'		
    AND lob = 'R'		
    AND primary_intent_detail IN ('SmartHelp', 'PnP')
GROUP BY 		
    primary_intent_detail, contact_dt		
ORDER BY 		
    contact_dt DESC



In [11]:
second_table_df = pd.read_csv(r"D:\Healthsore Data\hs_new_metric.csv")

In [12]:
second_table_df.head()

Unnamed: 0,primary_intent_detail,contact_dt,sub_contact_id,contained,containment_rate
0,PnP,2024-08-27,1839,1593,86.62
1,SmartHelp,2024-08-27,1453,1346,92.64
2,SmartHelp,2024-08-26,1500,1399,93.27
3,PnP,2024-08-26,1932,1658,85.82
4,PnP,2024-08-25,1900,1680,88.42


In [13]:
# Merge the two dataframes on 'primary_intent_detail' using a left join
combined_df = pd.merge(second_table_df, df, on='primary_intent_detail', how='left')


In [14]:
combined_df.head()

Unnamed: 0,primary_intent_detail,contact_dt,sub_contact_id,contained,containment_rate,containment_metric_id,hs_feature_name,hs_feature_id,primary_intent,cont_display_metric_name,cont_metric_seq_num,create_dt
0,PnP,2024-08-27,1839,1593,86.62,2,SMART HELP,15,Equipment Support,PnP_containment_rate,2,2025-02-17
1,SmartHelp,2024-08-27,1453,1346,92.64,1,SMART HELP,15,Equipment Support,Smarthelp_containment_rate,1,2025-02-17
2,SmartHelp,2024-08-26,1500,1399,93.27,1,SMART HELP,15,Equipment Support,Smarthelp_containment_rate,1,2025-02-17
3,PnP,2024-08-26,1932,1658,85.82,2,SMART HELP,15,Equipment Support,PnP_containment_rate,2,2025-02-17
4,PnP,2024-08-25,1900,1680,88.42,2,SMART HELP,15,Equipment Support,PnP_containment_rate,2,2025-02-17


In [15]:
second_table_columns = ['containment_metric_id', 'primary_intent', 'primary_intent_detail', 'sub_contact_id', 'contained', 'contact_dt', 'containment_rate', 'hs_feature_name', 'cont_display_metric_name']
second_tabledf = combined_df[second_table_columns]
second_tabledf.head()

Unnamed: 0,containment_metric_id,primary_intent,primary_intent_detail,sub_contact_id,contained,contact_dt,containment_rate,hs_feature_name,cont_display_metric_name
0,2,Equipment Support,PnP,1839,1593,2024-08-27,86.62,SMART HELP,PnP_containment_rate
1,1,Equipment Support,SmartHelp,1453,1346,2024-08-27,92.64,SMART HELP,Smarthelp_containment_rate
2,1,Equipment Support,SmartHelp,1500,1399,2024-08-26,93.27,SMART HELP,Smarthelp_containment_rate
3,2,Equipment Support,PnP,1932,1658,2024-08-26,85.82,SMART HELP,PnP_containment_rate
4,2,Equipment Support,PnP,1900,1680,2024-08-25,88.42,SMART HELP,PnP_containment_rate


In [17]:
third_table_columns = ['containment_metric_id', 'primary_intent', 'primary_intent_detail', 'contact_dt', 'containment_rate']
third_tabledf = combined_df[third_table_columns]
third_tabledf.head()

Unnamed: 0,containment_metric_id,primary_intent,primary_intent_detail,contact_dt,containment_rate
0,2,Equipment Support,PnP,2024-08-27,86.62
1,1,Equipment Support,SmartHelp,2024-08-27,92.64
2,1,Equipment Support,SmartHelp,2024-08-26,93.27
3,2,Equipment Support,PnP,2024-08-26,85.82
4,2,Equipment Support,PnP,2024-08-25,88.42


In [23]:
# Pivot the data
df_pivot = third_tabledf.pivot_table(index=['containment_metric_id', 'primary_intent', 'primary_intent_detail'],
                          columns='contact_dt',
                          values='containment_rate',
                          aggfunc='first').reset_index()

# Display the result
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 200)      # Show 100 rows
df_pivot.head()

contact_dt,containment_metric_id,primary_intent,primary_intent_detail,2024-05-29,2024-05-30,2024-05-31,2024-06-01,2024-06-02,2024-06-03,2024-06-04,2024-06-05,2024-06-06,2024-06-07,2024-06-08,2024-06-09,2024-06-10,2024-06-11,2024-06-12,2024-06-13,2024-06-14,2024-06-15,2024-06-16,2024-06-17,2024-06-18,2024-06-19,2024-06-20,2024-06-21,2024-06-22,2024-06-23,2024-06-24,2024-06-25,2024-06-26,2024-06-27,2024-06-28,2024-06-29,2024-06-30,2024-07-01,2024-07-02,2024-07-03,2024-07-04,2024-07-05,2024-07-06,2024-07-07,2024-07-08,2024-07-09,2024-07-10,2024-07-11,2024-07-12,2024-07-13,2024-07-14,2024-07-15,2024-07-16,2024-07-17,2024-07-18,2024-07-19,2024-07-20,2024-07-21,2024-07-22,2024-07-23,2024-07-24,2024-07-25,2024-07-26,2024-07-27,2024-07-28,2024-07-29,2024-07-30,2024-07-31,2024-08-01,2024-08-02,2024-08-03,2024-08-04,2024-08-05,2024-08-06,2024-08-07,2024-08-08,2024-08-09,2024-08-10,2024-08-11,2024-08-12,2024-08-13,2024-08-14,2024-08-15,2024-08-16,2024-08-17,2024-08-18,2024-08-19,2024-08-20,2024-08-21,2024-08-22,2024-08-23,2024-08-24,2024-08-25,2024-08-26,2024-08-27
0,1,Equipment Support,SmartHelp,94.14,94.43,93.26,93.95,94.63,93.33,93.25,94.05,93.85,93.27,94.1,94.78,93.53,93.25,94.45,95.08,95.2,94.05,94.82,93.99,94.87,95.07,94.41,94.42,95.62,94.64,94.31,95.01,94.84,94.5,94.01,94.23,95.17,93.18,93.42,94.31,94.96,94.49,95.63,95.18,93.1,94.97,95.32,94.62,93.19,93.19,95.5,94.29,95.0,94.07,93.71,94.93,94.83,96.1,95.04,95.27,95.36,95.15,95.65,94.1,93.72,94.14,94.12,94.07,93.29,92.37,93.4,94.45,93.14,93.52,93.38,93.66,94.16,94.38,95.41,93.37,93.35,94.37,94.09,94.29,94.68,95.39,93.18,93.55,95.57,93.16,94.81,93.8,94.62,93.27,92.64
1,2,Equipment Support,PnP,87.16,88.69,86.08,85.33,88.9,84.89,86.31,85.3,85.15,85.52,88.92,87.71,86.08,86.83,88.61,87.68,88.73,87.35,88.62,84.69,86.77,87.76,87.07,87.07,89.18,88.29,85.22,86.94,87.12,87.62,88.54,87.79,89.16,84.34,85.57,87.49,90.11,85.82,87.45,88.89,84.34,88.06,89.18,90.29,89.3,89.55,90.15,88.55,87.37,88.6,89.39,89.15,90.6,90.45,87.88,88.78,87.86,88.7,88.98,89.53,90.25,86.52,87.74,87.5,86.97,87.02,89.82,89.51,85.21,87.46,86.45,86.79,88.25,88.42,89.45,85.68,87.3,87.95,87.54,86.65,88.36,89.33,84.66,87.49,88.72,87.58,87.32,87.09,88.42,85.82,86.62


In [21]:
print(len(df_pivot.columns))

94


In [33]:
df_pivot.dtypes

contact_dt
containment_metric_id      int64
primary_intent            object
primary_intent_detail     object
2024-05-29               float64
2024-05-30               float64
2024-05-31               float64
2024-06-01               float64
2024-06-02               float64
2024-06-03               float64
2024-06-04               float64
2024-06-05               float64
2024-06-06               float64
2024-06-07               float64
2024-06-08               float64
2024-06-09               float64
2024-06-10               float64
2024-06-11               float64
2024-06-12               float64
2024-06-13               float64
2024-06-14               float64
2024-06-15               float64
2024-06-16               float64
2024-06-17               float64
2024-06-18               float64
2024-06-19               float64
2024-06-20               float64
2024-06-21               float64
2024-06-22               float64
2024-06-23               float64
2024-06-24               float64

In [26]:
import numpy as np
import pandas as pd

def calculate_last7_and_30_days(df):
    # Extract the date columns (skip the non-date columns like 'display_names' and 'operating_system_type')
    datecolumn = [col for col in df.columns if '-' in col]
    
    # Exclude the latest date column (the first one)
    last_7_columns = datecolumn[-8:-1]  # Get the last 7 columns excluding the latest date
    last_30_columns = datecolumn[-31:-1]

    # Extract the date part and convert them to datetime objects
    date_objects = [pd.to_datetime(col.split()[0], format='%Y-%m-%d') for col in datecolumn]

    # Get the column corresponding to the latest date
    latest_date = max(date_objects)
    latest_date_column = datecolumn[date_objects.index(latest_date)]

    # Create a new column 'Yesterday' with the values from the latest date column
    df.loc[:, 'Yesterday'] = df[latest_date_column]
    
    # Ensure numeric columns before performing mean calculation
    df[last_7_columns] = df[last_7_columns].apply(pd.to_numeric, errors='coerce')
    df[last_30_columns] = df[last_30_columns].apply(pd.to_numeric, errors='coerce')

    # Calculate the mean for each row across the last 7 and 30 date columns
    df.loc[:, 'last_7_days'] = df[last_7_columns].mean(axis=1)
    df.loc[:, 'last_30_days'] = df[last_30_columns].mean(axis=1)

    # Replace 0 and NaN values in 'last_7_days' and 'last_30_days' with NaN to avoid division by zero
    df['last_7_days'] = df['last_7_days'].replace(0, np.nan)
    df['last_30_days'] = df['last_30_days'].replace(0, np.nan)

    # Calculate the percentage change for 'Last 7 Days' with a check for NaN
    df.loc[:, '% Change Last 7 Days'] = np.where(
        df['last_7_days'].isna(), 0, 
        (df['Yesterday'] - df['last_7_days']) / df['last_7_days'] * 100
    )

    # Calculate the percentage change for 'Last 30 Days' with a check for NaN
    df.loc[:, '% Change Last 30 Days'] = np.where(
        df['last_30_days'].isna(), 0, 
        (df['Yesterday'] - df['last_30_days']) / df['last_30_days'] * 100
    )

    # Round the percentage changes to 1 decimal place
    df.loc[:, '% Change Last 7 Days'] = df['% Change Last 7 Days'].round(1)
    df.loc[:, '% Change Last 30 Days'] = df['% Change Last 30 Days'].round(1)
    
    return df


In [30]:
last_7_and_30_days_df  = calculate_last7_and_30_days(df_pivot)
last_7_and_30_days_df = last_7_and_30_days_df[['containment_metric_id', 'primary_intent', 'primary_intent_detail', 'Yesterday', 'last_7_days', 'last_30_days', '% Change Last 7 Days', '% Change Last 30 Days']]
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 100)
last_7_and_30_days_df.head()

contact_dt,containment_metric_id,primary_intent,primary_intent_detail,Yesterday,last_7_days,last_30_days,% Change Last 7 Days,% Change Last 30 Days
0,1,Equipment Support,SmartHelp,92.64,94.111429,93.957,-1.6,-1.4
1,2,Equipment Support,PnP,86.62,87.491429,87.575667,-1.0,-1.1


In [31]:
result_df = pd.merge(df, last_7_and_30_days_df, on="containment_metric_id", how="inner")
result_df.head()

Unnamed: 0,containment_metric_id,hs_feature_name,hs_feature_id,primary_intent_x,primary_intent_detail_x,cont_display_metric_name,cont_metric_seq_num,create_dt,primary_intent_y,primary_intent_detail_y,Yesterday,last_7_days,last_30_days,% Change Last 7 Days,% Change Last 30 Days
0,1,SMART HELP,15,Equipment Support,SmartHelp,Smarthelp_containment_rate,1,2025-02-17,Equipment Support,SmartHelp,92.64,94.111429,93.957,-1.6,-1.4
1,2,SMART HELP,15,Equipment Support,PnP,PnP_containment_rate,2,2025-02-17,Equipment Support,PnP,86.62,87.491429,87.575667,-1.0,-1.1


In [32]:
result_df = result_df[['containment_metric_id', 'hs_feature_name', 'cont_display_metric_name', 'Yesterday', 'last_7_days', 'last_30_days', '% Change Last 7 Days', '% Change Last 30 Days', 'create_dt']]
result_df.head()

Unnamed: 0,containment_metric_id,hs_feature_name,cont_display_metric_name,Yesterday,last_7_days,last_30_days,% Change Last 7 Days,% Change Last 30 Days,create_dt
0,1,SMART HELP,Smarthelp_containment_rate,92.64,94.111429,93.957,-1.6,-1.4,2025-02-17
1,2,SMART HELP,PnP_containment_rate,86.62,87.491429,87.575667,-1.0,-1.1,2025-02-17
