In [1]:

import os
import json
import functools
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
df = pd.read_csv(r"D:\shalini\HS_DataClean_Nov.csv")

In [3]:
# Step 1: Read the config.json file
with open("D:\\Healthscore Codes\\healthscore_dashboard_config.json", 'r') as file:
    "D:\\Healthscore Codes\\healthscore_dashboard_config.json"
    config_data = json.load(file)

# Step 2: Extract config1 and config2 from the file
display_name_mapping = config_data['display_name_mapping']
level_1_metrics = config_data['level_1_metrics']
level_2_metrics = config_data['level_2_metrics']
level_3_metrics = config_data['level_3_metrics']
level_4_metrics = config_data['level_4_metrics']
level_5_metrics = config_data['level_5_metrics']
feature_groups = config_data['feature_groups']


# Step 3: Verify the contents
print(display_name_mapping)
print(level_1_metrics)
print(level_2_metrics)
print(level_3_metrics)
print(level_4_metrics)
print(level_5_metrics)
print(feature_groups)


{'DR_Visits': 'Visits', 'Unique Visitors': 'Visitors', 'Page Views': 'Page Views', 'Average Time Spent on Page': 'Average Time Spent (mins)', 'DR_New Visitors': 'New Visitors', 'DR_Server Errors': 'Server Errors', 'DR_Overall Chat': 'Visits to Chat', 'DR_continuing chat from new chat page': 'Continuing chat from new chat page', 'DR_Message Us': 'Visits to Chat (Bot)', 'DR_Agent Chat': 'Visits to Agent Chat (Skip bot)', 'DR_Self Service Initiated': 'Visits to Self Service Features', 'DR_CoxApp | Visits with Chat and No Self Service Initiated': 'Visits with Chat but No Self Service', 'DR_CoxApp | Visits with No Self Service and No Chat': 'Visits with Neither Self Service nor Chat', 'DR_CoxApp | Visits not logged in': 'Not Logged-in Visits', 'DR_Login Page Visits': 'Login Page Visits', 'DR_Successful logins': 'Login Success Rate', 'DR_Auto Logins': 'Share of Auto Login (Remember/Biometrics)', 'DR_Clicks to MFA Verification': 'with TSV', 'DR_Clicks on Forgot User ID': 'User ID Recovery', '

In [4]:
df['display_names'] = df['Metrics'].map(display_name_mapping)

In [5]:
df['level_1'] = df['display_names'].apply(lambda x: x if x in level_1_metrics else None)
df['level_2'] = df['display_names'].apply(lambda x: x if x in level_2_metrics else None)
df['level_3'] = df['display_names'].apply(lambda x: x if x in level_3_metrics else None)
df['level_4'] = df['display_names'].apply(lambda x: x if x in level_4_metrics else None)
df['level_5'] = df['display_names'].apply(lambda x: x if x in level_5_metrics else None)

In [6]:
feature_column = 'Unknown'
df['Feature'] = feature_column
for group, metrics in feature_groups.items():
   df['Feature'] = np.where(df['Metrics'].isin(metrics), group, df['Feature'])

In [7]:
def move_row(df, from_idx, to_idx):
    row = df.iloc[from_idx]
    df = df.drop(from_idx).reset_index(drop=True)
    df = pd.concat([df.iloc[:to_idx], row.to_frame().T, df.iloc[to_idx:]]).reset_index(drop=True)
    return df

df = move_row(df, 50, 46)
df = move_row(df, 51, 47)
df = move_row(df, 52, 48)
df = move_row(df, 53, 49)
df = move_row(df, 164, 158)
df = move_row(df, 165, 159)
df = move_row(df, 180, 174)
df = move_row(df, 181, 175)
df = move_row(df, 218, 206)
df = move_row(df, 219, 207)
df = move_row(df, 220, 208)
df = move_row(df, 221, 209)
df = move_row(df, 280, 268)
df = move_row(df, 281, 269)
df = move_row(df, 282, 270)
df = move_row(df, 283, 271)


In [8]:
df['unique_identifier'] = df['Feature'].apply(lambda x: ''.join([word[0].upper() for word in x.split()]))

In [9]:
def generate_hierarchy_id_grouped_by_os(df):
    df['Hierarchy_ID'] = None

   
    df['Feature_Order'] = df.groupby('Operating System Type')['unique_identifier'].transform(lambda x: pd.Series(range(len(x)), index=x.index))

    grouped = df.groupby(['Operating System Type', 'unique_identifier'], group_keys=False)
    results = []

   
    for (os_name, unique_id), group in grouped:
        current_ids = {'level_1': 0, 'level_2': 0, 'level_3': 0, 'level_4': 0, 'level_5': 0}  

        group = group.sort_values('Feature_Order')  

        for index, row in group.iterrows():
            if pd.isna(row['display_names']):
                continue  

            if not pd.isna(row['level_1']):  
                current_ids['level_1'] += 1
                current_ids['level_2'] = 0  
                current_ids['level_3'] = 0  
                current_ids['level_4'] = 0  
                current_ids['level_5'] = 0  
                group.at[index, 'Hierarchy_ID'] = f"{os_name}_{unique_id}_{current_ids['level_1']}"

            elif not pd.isna(row['level_2']):  
                current_ids['level_2'] += 1
                current_ids['level_3'] = 0  
                current_ids['level_4'] = 0  
                current_ids['level_5'] = 0  
                group.at[index, 'Hierarchy_ID'] = f"{os_name}_{unique_id}_{current_ids['level_1']}.{current_ids['level_2']}"

            elif not pd.isna(row['level_3']):  
                current_ids['level_3'] += 1
                current_ids['level_4'] = 0  
                current_ids['level_5'] = 0  
                group.at[index, 'Hierarchy_ID'] = f"{os_name}_{unique_id}_{current_ids['level_1']}.{current_ids['level_2']}.{current_ids['level_3']}"

            elif not pd.isna(row['level_4']):  
                current_ids['level_4'] += 1
                current_ids['level_5'] = 0  
                group.at[index, 'Hierarchy_ID'] = f"{os_name}_{unique_id}_{current_ids['level_1']}.{current_ids['level_2']}.{current_ids['level_3']}.{current_ids['level_4']}"

            elif not pd.isna(row['level_5']):  
                current_ids['level_5'] += 1
                group.at[index, 'Hierarchy_ID'] = f"{os_name}_{unique_id}_{current_ids['level_1']}.{current_ids['level_2']}.{current_ids['level_3']}.{current_ids['level_4']}.{current_ids['level_5']}"

        results.append(group)

   
    df = pd.concat(results, ignore_index=True)

    
    #df = df[df['Display_Names'].notna()]

    
    cols = [col for col in df.columns if col != 'Hierarchy_ID' and col != 'Feature_Order'] + ['Hierarchy_ID']
    return df[cols]


df = generate_hierarchy_id_grouped_by_os(df)


In [10]:
df = generate_hierarchy_id_grouped_by_os(df)

def assign_parent_id(row):
    if not pd.isna(row['level_5']):  
        return f"{row['Hierarchy_ID'].rsplit('.', 1)[0]}"
    elif not pd.isna(row['level_4']):  
        return f"{row['Hierarchy_ID'].rsplit('.', 1)[0]}"
    elif not pd.isna(row['level_3']):  
        return f"{row['Hierarchy_ID'].rsplit('.', 1)[0]}"
    elif not pd.isna(row['level_2']):  
        return f"{row['Hierarchy_ID'].rsplit('.', 1)[0]}"
    elif not pd.isna(row['level_1']): 
        return None
    else:
        return None


df['Parent_ID'] = df.apply(assign_parent_id, axis=1)


In [11]:
df['metric_id'] = range(1, len(df) + 1)

In [12]:
df['metric_sequence_num'] = df.groupby('Operating System Type').cumcount() + 1

In [13]:
def calculate_level_no(row):
   
    if pd.isna(row['Hierarchy_ID']) or pd.isna(row['Parent_ID']):
        return 1
    
   
    return row['Hierarchy_ID'].count('.') + 1


df['level_no'] = df.apply(calculate_level_no, axis=1)


In [14]:
feature_names = df['Feature'].unique()
feature_mapping = {feature_name: idx + 1 for idx, feature_name in enumerate(feature_names)}

df['feature_id'] = df['Feature'].map(feature_mapping)


In [15]:
df['create_dt'] = pd.to_datetime('today').normalize().date()

In [16]:
df = df.rename(columns={'Parent_ID': 'parent_id_old'})

In [17]:
def calculate_parent_id(row, df):
    
    if pd.isna(row['Hierarchy_ID']):
        return None
   
    hierarchy_parts = row['Hierarchy_ID'].rsplit('.', 1)
    if len(hierarchy_parts) > 1:
       
        parent_hierarchy = hierarchy_parts[0]
        
        parent_row = df[df['Hierarchy_ID'] == parent_hierarchy]
        if not parent_row.empty:
            return parent_row['metric_id'].values[0]

    return None

df['parent_id'] = df.apply(calculate_parent_id, axis=1, df=df)


df.loc[df['Hierarchy_ID'].isna(), 'parent_id'] = None


In [18]:
df = df.rename(columns={
    'Metrics': 'metrics',
    'Operating System Type': 'operating_system_type',
    'Feature': 'feature_name',
    'Operating System Type': 'operating_system_type',
    'Feature': 'feature_name',
    'Operating System Type': 'operating_system_type',
    'Hierarchy_ID': 'hierarchy_id',
})

In [19]:
first_table_columns = ['metric_id', 'metrics','operating_system_type','display_names', 'feature_name', 'unique_identifier', 'level_1', 'level_2', 'level_3', 'level_4','level_5', 'hierarchy_id', 'parent_id_old', 'metric_sequence_num', 'level_no', 'feature_id', 'parent_id','create_dt' ]
first_table_df = df[first_table_columns]

In [20]:
#first_table_df.to_csv(r'D:\shalini\table1_allfeature_data.csv', index = False)

In [21]:
def is_date_column(col_name):
    date_formats = ['%d-%m-%Y']
    for fmt in date_formats:
        try:
            pd.to_datetime(col_name, format=fmt)
            return True
        except ValueError:
            continue  # Try the next format
    return False

# Filter columns with date format 'm/d/Y'
date_columns = [col for col in df.columns if is_date_column(col)]

In [22]:
second_table_columns1 = ['metric_id', 'create_dt', 'metrics', 'operating_system_type']
second_table_columns = second_table_columns1 + date_columns
second_table_df = df[second_table_columns]

In [23]:
filtered_date_columns = second_table_df.columns[4:].to_list()

In [24]:
id_vars = ['metric_id', 'create_dt']

In [25]:
second_table_df_melted = second_table_df.melt(id_vars = id_vars, value_vars = filtered_date_columns, var_name = 'Date', value_name = 'Value')

In [26]:
second_table_df_melted.shape

(41670, 4)

In [27]:
#second_table_df_melted.to_csv(r'D:\shalini\table2_allfeature_data.csv', index = False)

In [28]:
#sample_df = df[df['feature_name'].isin(['SMART HELP', 'RESET MODEM', 'RESET TV BOX'])]
#sample_df.shape

In [29]:
def calculate_percentage(df, datecolumns, metricid, parentid):
    # Ensure these columns exist in the DataFrame
    if metricid not in df.columns or parentid not in df.columns:
        raise ValueError(f"'{metricid}' or '{parentid}' columns not found in DataFrame.")
    
    # Create a copy of the DataFrame to avoid setting on a view
    df = df.copy()
    
    # Iterate through each row and calculate the percentage for each DATE column
    for idx, row in df.iterrows():
        # Iterate through all the DATE columns
        for date_col in datecolumns:
            # Get the numerator (DATE value)
            numerator = row[date_col]
            
            # Get the parent hierarchy ID
            parent_id = row[parentid]
            
            # Check if the parent is empty
            if pd.isna(parent_id) or parent_id == "":
                # If the PARENTID is empty, keep the original DATE value
                percentage = numerator  # Assuming 100% when no parent is found
            else:
                # Find the row where HIERARCHYID == parent_hierarchyid to get the parent's DATE value
                parent_row = df[df[metricid] == parent_id]
                
                # If the parent exists, get the parent's DATE value for the current DATE column
                if not parent_row.empty:
                    denominator = parent_row[date_col].values[0]
                else:
                    denominator = 0  # In case no parent is found, avoid division by zero
                
                # Calculate the percentage
                if denominator != 0:
                    percentage = (numerator / denominator) * 100
                else:
                    percentage = 0  # To handle division by zero if no parent found
                
            # Round to 3 decimal places
            percentage = round(percentage, 3)
            
            # Add the percentage to the DataFrame in a new column
            percentage_column_name = f"{date_col} PERCENTAGE"
            if percentage_column_name not in df.columns:
                df[percentage_column_name] = None  # Initialize the column safely
            
            # Set the percentage for the current row and current date column using .loc
            df.loc[idx, percentage_column_name] = percentage

    return df


In [30]:
percentage_df = calculate_percentage(df, date_columns, 'metric_id', 'parent_id')
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 5)
percentage_df.head(5)

Unnamed: 0,metrics,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,display_names,level_1,level_2,level_3,level_4,level_5,feature_name,unique_identifier,hierarchy_id,parent_id_old,metric_id,metric_sequence_num,level_no,feature_id,create_dt,parent_id,29-08-2024 PERCENTAGE,30-08-2024 PERCENTAGE,31-08-2024 PERCENTAGE,01-09-2024 PERCENTAGE,02-09-2024 PERCENTAGE,03-09-2024 PERCENTAGE,04-09-2024 PERCENTAGE,05-09-2024 PERCENTAGE,06-09-2024 PERCENTAGE,07-09-2024 PERCENTAGE,08-09-2024 PERCENTAGE,09-09-2024 PERCENTAGE,10-09-2024 PERCENTAGE,11-09-2024 PERCENTAGE,12-09-2024 PERCENTAGE,13-09-2024 PERCENTAGE,14-09-2024 PERCENTAGE,15-09-2024 PERCENTAGE,16-09-2024 PERCENTAGE,17-09-2024 PERCENTAGE,18-09-2024 PERCENTAGE,19-09-2024 PERCENTAGE,20-09-2024 PERCENTAGE,21-09-2024 PERCENTAGE,22-09-2024 PERCENTAGE,23-09-2024 PERCENTAGE,24-09-2024 PERCENTAGE,25-09-2024 PERCENTAGE,26-09-2024 PERCENTAGE,27-09-2024 PERCENTAGE,28-09-2024 PERCENTAGE,29-09-2024 PERCENTAGE,30-09-2024 PERCENTAGE,01-10-2024 PERCENTAGE,02-10-2024 PERCENTAGE,03-10-2024 PERCENTAGE,04-10-2024 PERCENTAGE,05-10-2024 PERCENTAGE,06-10-2024 PERCENTAGE,07-10-2024 PERCENTAGE,08-10-2024 PERCENTAGE,09-10-2024 PERCENTAGE,10-10-2024 PERCENTAGE,11-10-2024 PERCENTAGE,12-10-2024 PERCENTAGE,13-10-2024 PERCENTAGE,14-10-2024 PERCENTAGE,15-10-2024 PERCENTAGE,16-10-2024 PERCENTAGE,17-10-2024 PERCENTAGE,18-10-2024 PERCENTAGE,19-10-2024 PERCENTAGE,20-10-2024 PERCENTAGE,21-10-2024 PERCENTAGE,22-10-2024 PERCENTAGE,23-10-2024 PERCENTAGE,24-10-2024 PERCENTAGE,25-10-2024 PERCENTAGE,26-10-2024 PERCENTAGE,27-10-2024 PERCENTAGE,28-10-2024 PERCENTAGE,29-10-2024 PERCENTAGE,30-10-2024 PERCENTAGE,31-10-2024 PERCENTAGE,01-11-2024 PERCENTAGE,02-11-2024 PERCENTAGE,03-11-2024 PERCENTAGE,04-11-2024 PERCENTAGE,05-11-2024 PERCENTAGE,06-11-2024 PERCENTAGE,07-11-2024 PERCENTAGE,08-11-2024 PERCENTAGE,09-11-2024 PERCENTAGE,10-11-2024 PERCENTAGE,11-11-2024 PERCENTAGE,12-11-2024 PERCENTAGE,13-11-2024 PERCENTAGE,14-11-2024 PERCENTAGE,15-11-2024 PERCENTAGE,16-11-2024 PERCENTAGE,17-11-2024 PERCENTAGE,18-11-2024 PERCENTAGE,19-11-2024 PERCENTAGE,20-11-2024 PERCENTAGE,21-11-2024 PERCENTAGE,22-11-2024 PERCENTAGE,23-11-2024 PERCENTAGE,24-11-2024 PERCENTAGE,25-11-2024 PERCENTAGE,26-11-2024 PERCENTAGE
0,DR_Visits to Billing Home,Apple iOS,71742.0,76236.0,54132.0,52596.0,54447.0,68414.0,70355.0,79002.0,78610.0,50904.0,47500.0,59885.0,60599.0,66866.0,75881.0,78958.0,52352.0,48231.0,60364.0,57000.0,63905.0,64500.0,71753.0,49102.0,46906.0,58086.0,61623.0,65367.0,64902.0,74267.0,56022.0,49393.0,69232.0,77165.0,66744.0,73629.0,73180.0,53129.0,47358.0,57406.0,58604.0,65363.0,70718.0,73814.0,50310.0,42931.0,59605.0,62231.0,65649.0,67679.0,73278.0,49194.0,46554.0,58498.0,60820.0,61559.0,64139.0,66553.0,45236.0,40903.0,53844.0,55602.0,62178.0,65295.0,80962.0,55882.0,53873.0,60230.0,61201.0,61253.0,69508.0,73999.0,50243.0,43774.0,54835.0,56204.0,64712.0,75234.0,81910.0,50202.0,45884.0,57871.0,58212.0,65289.0,60150.0,69650.0,48374.0,43920.0,58120.0,59662.0,Billing Section Visits,Billing Section Visits,,,,,BIL LING,BL,Apple iOS_BL_1,,1,1,1,1,2025-01-21,,71742.0,76236.0,54132.0,52596.0,54447.0,68414.0,70355.0,79002.0,78610.0,50904.0,47500.0,59885.0,60599.0,66866.0,75881.0,78958.0,52352.0,48231.0,60364.0,57000.0,63905.0,64500.0,71753.0,49102.0,46906.0,58086.0,61623.0,65367.0,64902.0,74267.0,56022.0,49393.0,69232.0,77165.0,66744.0,73629.0,73180.0,53129.0,47358.0,57406.0,58604.0,65363.0,70718.0,73814.0,50310.0,42931.0,59605.0,62231.0,65649.0,67679.0,73278.0,49194.0,46554.0,58498.0,60820.0,61559.0,64139.0,66553.0,45236.0,40903.0,53844.0,55602.0,62178.0,65295.0,80962.0,55882.0,53873.0,60230.0,61201.0,61253.0,69508.0,73999.0,50243.0,43774.0,54835.0,56204.0,64712.0,75234.0,81910.0,50202.0,45884.0,57871.0,58212.0,65289.0,60150.0,69650.0,48374.0,43920.0,58120.0,59662.0
1,DR_Billing Make a Payment,Apple iOS,28280.0,33162.0,20184.0,19342.0,18744.0,23074.0,25597.0,31689.0,34706.0,17713.0,15849.0,20131.0,21119.0,25574.0,30764.0,36552.0,19503.0,16992.0,20648.0,19263.0,23134.0,25557.0,31429.0,17411.0,16219.0,19266.0,20398.0,23602.0,25123.0,31089.0,21215.0,17343.0,24285.0,29331.0,25040.0,29782.0,32396.0,20023.0,17063.0,20040.0,20800.0,25264.0,30306.0,35522.0,20276.0,16983.0,21302.0,22778.0,25942.0,28664.0,33399.0,18857.0,17125.0,20712.0,21245.0,23520.0,26071.0,29029.0,16364.0,14129.0,17573.0,18863.0,24225.0,29091.0,38172.0,21867.0,19587.0,21349.0,22435.0,24008.0,29916.0,34305.0,19246.0,16278.0,18593.0,19058.0,25251.0,32159.0,38041.0,18992.0,16510.0,20010.0,20630.0,25402.0,23977.0,30860.0,18463.0,15813.0,19604.0,21972.0,Make Payments,,Make Payments,,,,BIL LING,BL,Apple iOS_BL_1.1,Apple iOS_BL_1,2,2,2,1,2025-01-21,1.0,39.419,43.499,37.287,36.775,34.426,33.727,36.383,40.112,44.15,34.797,33.366,33.616,34.85,38.247,40.542,46.293,37.254,35.23,34.206,33.795,36.201,39.623,43.802,35.459,34.578,33.168,33.101,36.107,38.709,41.861,37.869,35.112,35.078,38.011,37.516,40.449,44.269,37.688,36.03,34.909,35.492,38.652,42.855,48.124,40.302,39.559,35.739,36.602,39.516,42.353,45.578,38.332,36.785,35.406,34.931,38.207,40.648,43.618,36.175,34.543,32.637,33.925,38.961,44.553,47.148,39.131,36.358,35.446,36.658,39.195,43.04,46.359,38.306,37.186,33.907,33.909,39.021,42.745,46.442,37.831,35.982,34.577,35.439,38.907,39.862,44.307,38.167,36.004,33.73,36.827
2,DR_Client Errors on Make Payment,Apple iOS,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,Client Errors on Payment Page,,,Client Errors on Payment Page,,,BIL LING,BL,Apple iOS_BL_1.1.1,Apple iOS_BL_1.1,3,3,3,1,2025-01-21,2.0,0.004,0.0,0.005,0.005,0.0,0.0,0.0,0.0,0.0,0.006,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.004,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.003,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.003,0.0,0.005,0.0,0.0,0.0,0.004,0.004,0.0,0.0,0.006,0.0,0.0
3,DR_Server Errors on Make Payment,Apple iOS,1.0,3.0,3.0,2.0,0.0,1.0,3.0,2.0,4.0,0.0,0.0,2.0,1.0,0.0,1.0,1.0,2.0,0.0,0.0,1.0,1.0,4.0,5.0,1.0,0.0,1.0,5.0,0.0,2.0,0.0,2.0,2.0,0.0,3.0,1.0,2.0,4.0,3.0,1.0,0.0,2.0,0.0,0.0,1.0,2.0,3.0,1.0,5.0,2.0,4.0,3.0,3.0,4.0,5.0,0.0,1.0,2.0,5.0,2.0,0.0,0.0,1.0,2.0,0.0,1.0,3.0,1.0,3.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,4.0,0.0,2.0,1.0,2.0,2.0,0.0,1.0,3.0,4.0,3.0,1.0,0.0,0.0,4.0,Server Errors on Payment Page,,,Server Errors on Payment Page,,,BIL LING,BL,Apple iOS_BL_1.1.2,Apple iOS_BL_1.1,4,4,3,1,2025-01-21,2.0,0.004,0.009,0.015,0.01,0.0,0.004,0.012,0.006,0.012,0.0,0.0,0.01,0.005,0.0,0.003,0.003,0.01,0.0,0.0,0.005,0.004,0.016,0.016,0.006,0.0,0.005,0.025,0.0,0.008,0.0,0.009,0.012,0.0,0.01,0.004,0.007,0.012,0.015,0.006,0.0,0.01,0.0,0.0,0.003,0.01,0.018,0.005,0.022,0.008,0.014,0.009,0.016,0.023,0.024,0.0,0.004,0.008,0.017,0.012,0.0,0.0,0.005,0.008,0.0,0.003,0.014,0.005,0.014,0.004,0.0,0.0,0.003,0.005,0.006,0.011,0.021,0.0,0.006,0.003,0.011,0.012,0.0,0.005,0.012,0.017,0.01,0.005,0.0,0.0,0.018
4,DR_Payment Submit,Apple iOS,26212.0,31110.0,19019.0,18204.0,17326.0,20983.0,23423.0,29701.0,33071.0,16852.0,14778.0,18200.0,19220.0,23587.0,28774.0,34617.0,18211.0,15928.0,18855.0,17903.0,21523.0,23888.0,29788.0,16412.0,15165.0,17586.0,18563.0,22058.0,23397.0,29558.0,20092.0,16298.0,22406.0,27424.0,23156.0,27909.0,30548.0,18812.0,15905.0,18233.0,19096.0,23552.0,28419.0,33966.0,19237.0,16081.0,19971.0,21266.0,24401.0,26950.0,31916.0,17943.0,16117.0,19129.0,19610.0,22043.0,24472.0,27782.0,15600.0,13653.0,16740.0,17454.0,22722.0,27921.0,36824.0,20833.0,18596.0,19835.0,21075.0,22430.0,28403.0,32874.0,18367.0,15596.0,17297.0,17747.0,23847.0,30710.0,36348.0,18021.0,15709.0,18541.0,19209.0,23821.0,23340.0,29620.0,17465.0,14847.0,18261.0,20443.0,Payment Submit,,Payment Submit,,,,BIL LING,BL,Apple iOS_BL_1.2,Apple iOS_BL_1,5,5,2,1,2025-01-21,1.0,36.536,40.807,35.134,34.611,31.822,30.671,33.293,37.595,42.07,33.105,31.112,30.392,31.717,35.275,37.92,43.842,34.786,33.024,31.236,31.409,33.68,37.036,41.515,33.424,32.331,30.276,30.123,33.745,36.05,39.8,35.864,32.997,32.364,35.539,34.694,37.905,41.744,35.408,33.585,31.761,32.585,36.033,40.186,46.016,38.237,37.458,33.506,34.173,37.169,39.82,43.555,36.474,34.62,32.7,32.243,35.808,38.155,41.744,34.486,33.379,31.09,31.391,36.543,42.761,45.483,37.28,34.518,32.932,34.436,36.619,40.863,44.425,36.556,35.628,31.544,31.576,36.851,40.819,44.376,35.897,34.236,32.038,32.998,36.485,38.803,42.527,36.104,33.805,31.419,34.265


In [31]:
percentage_df.shape

(463, 198)

In [32]:
id_columns = ['create_dt','metric_id', 'feature_id', 'metric_sequence_num', 'level_no','parent_id', 'unique_identifier', 'feature_name', 'metrics', 'display_names', 'operating_system_type' ]

In [33]:
# List of columns that contain "PERCENTAGE"
percentage_columns = [col for col in percentage_df.columns if 'PERCENTAGE' in col]

In [34]:
percent_df = percentage_df[id_columns + percentage_columns]

In [35]:
# Rename columns to remove the word "PERCENTAGE"
percent_df.columns = [col.replace(" PERCENTAGE", "") if "PERCENTAGE" in col else col for col in percent_df.columns]

In [36]:
percent_df.shape

(463, 101)

In [37]:
percent_df.head()

Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024
0,2025-01-21,1,1,1,1,,BL,BIL LING,DR_Visits to Billing Home,Billing Section Visits,Apple iOS,71742.0,76236.0,54132.0,52596.0,54447.0,68414.0,70355.0,79002.0,78610.0,50904.0,47500.0,59885.0,60599.0,66866.0,75881.0,78958.0,52352.0,48231.0,60364.0,57000.0,63905.0,64500.0,71753.0,49102.0,46906.0,58086.0,61623.0,65367.0,64902.0,74267.0,56022.0,49393.0,69232.0,77165.0,66744.0,73629.0,73180.0,53129.0,47358.0,57406.0,58604.0,65363.0,70718.0,73814.0,50310.0,42931.0,59605.0,62231.0,65649.0,67679.0,73278.0,49194.0,46554.0,58498.0,60820.0,61559.0,64139.0,66553.0,45236.0,40903.0,53844.0,55602.0,62178.0,65295.0,80962.0,55882.0,53873.0,60230.0,61201.0,61253.0,69508.0,73999.0,50243.0,43774.0,54835.0,56204.0,64712.0,75234.0,81910.0,50202.0,45884.0,57871.0,58212.0,65289.0,60150.0,69650.0,48374.0,43920.0,58120.0,59662.0
1,2025-01-21,2,1,2,2,1.0,BL,BIL LING,DR_Billing Make a Payment,Make Payments,Apple iOS,39.419,43.499,37.287,36.775,34.426,33.727,36.383,40.112,44.15,34.797,33.366,33.616,34.85,38.247,40.542,46.293,37.254,35.23,34.206,33.795,36.201,39.623,43.802,35.459,34.578,33.168,33.101,36.107,38.709,41.861,37.869,35.112,35.078,38.011,37.516,40.449,44.269,37.688,36.03,34.909,35.492,38.652,42.855,48.124,40.302,39.559,35.739,36.602,39.516,42.353,45.578,38.332,36.785,35.406,34.931,38.207,40.648,43.618,36.175,34.543,32.637,33.925,38.961,44.553,47.148,39.131,36.358,35.446,36.658,39.195,43.04,46.359,38.306,37.186,33.907,33.909,39.021,42.745,46.442,37.831,35.982,34.577,35.439,38.907,39.862,44.307,38.167,36.004,33.73,36.827
2,2025-01-21,3,1,3,3,2.0,BL,BIL LING,DR_Client Errors on Make Payment,Client Errors on Payment Page,Apple iOS,0.004,0.0,0.005,0.005,0.0,0.0,0.0,0.0,0.0,0.006,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.004,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.003,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.003,0.0,0.005,0.0,0.0,0.0,0.004,0.004,0.0,0.0,0.006,0.0,0.0
3,2025-01-21,4,1,4,3,2.0,BL,BIL LING,DR_Server Errors on Make Payment,Server Errors on Payment Page,Apple iOS,0.004,0.009,0.015,0.01,0.0,0.004,0.012,0.006,0.012,0.0,0.0,0.01,0.005,0.0,0.003,0.003,0.01,0.0,0.0,0.005,0.004,0.016,0.016,0.006,0.0,0.005,0.025,0.0,0.008,0.0,0.009,0.012,0.0,0.01,0.004,0.007,0.012,0.015,0.006,0.0,0.01,0.0,0.0,0.003,0.01,0.018,0.005,0.022,0.008,0.014,0.009,0.016,0.023,0.024,0.0,0.004,0.008,0.017,0.012,0.0,0.0,0.005,0.008,0.0,0.003,0.014,0.005,0.014,0.004,0.0,0.0,0.003,0.005,0.006,0.011,0.021,0.0,0.006,0.003,0.011,0.012,0.0,0.005,0.012,0.017,0.01,0.005,0.0,0.0,0.018
4,2025-01-21,5,1,5,2,1.0,BL,BIL LING,DR_Payment Submit,Payment Submit,Apple iOS,36.536,40.807,35.134,34.611,31.822,30.671,33.293,37.595,42.07,33.105,31.112,30.392,31.717,35.275,37.92,43.842,34.786,33.024,31.236,31.409,33.68,37.036,41.515,33.424,32.331,30.276,30.123,33.745,36.05,39.8,35.864,32.997,32.364,35.539,34.694,37.905,41.744,35.408,33.585,31.761,32.585,36.033,40.186,46.016,38.237,37.458,33.506,34.173,37.169,39.82,43.555,36.474,34.62,32.7,32.243,35.808,38.155,41.744,34.486,33.379,31.09,31.391,36.543,42.761,45.483,37.28,34.518,32.932,34.436,36.619,40.863,44.425,36.556,35.628,31.544,31.576,36.851,40.819,44.376,35.897,34.236,32.038,32.998,36.485,38.803,42.527,36.104,33.805,31.419,34.265


In [38]:
def calculate_both(df, metricid, parentid):
    """
    Generate new rows for each unique display_name based on given conditions.

    Args:
    - df (DataFrame): Input DataFrame containing the original data.

    Returns:
    - DataFrame: Updated DataFrame with newly calculated rows.
    """
    new_rows = []  # Store new rows
    seq_num = 1  # Sequential ID for metric_id

    for display_name in df['display_names'].unique():
        # Filter rows for the current display_name
        display_df = df[df['display_names'] == display_name]
            
        # Case 1: If `parent_id_old` has null or empty values
        if display_df[parentid].isnull().any() or display_df[parentid].eq('').any():
            # Sum of `Apple iOS` and `Google Android` for `parent_id_old` being null
            ios_rows = display_df[display_df["operating_system_type"] == "Apple iOS"]
            android_rows = display_df[display_df["operating_system_type"] == "Google Android"]
            
            # Calculate numerator using `where` to avoid downcasting issues
            ios_sums = ios_rows[date_columns].apply(pd.to_numeric, errors='coerce').reset_index(drop=True)
            android_sums = android_rows[date_columns].apply(pd.to_numeric, errors='coerce').reset_index(drop=True)
            numerator = ios_sums + android_sums
            
            # Store numerator in new row, rounded to 3 decimal places
            new_row_values = numerator.sum(axis=0).fillna(0).apply(lambda x: round(x, 3)).to_dict()
        else:
            # Case 2: If `parent_id_old` is not null
            ios_rows = display_df[(display_df["operating_system_type"] == "Apple iOS") & 
                                  (display_df[parentid].notna()) & 
                                  (display_df[parentid] != '')]
            android_rows = display_df[(display_df["operating_system_type"] == "Google Android") & 
                                      (display_df[parentid].notna()) & 
                                      (display_df[parentid] != '')]
            
            # Calculate numerator using `where` to avoid downcasting issues
            ios_sums = ios_rows[date_columns].apply(pd.to_numeric, errors='coerce').reset_index(drop=True)
            android_sums = android_rows[date_columns].apply(pd.to_numeric, errors='coerce').reset_index(drop=True)
            numerator = ios_sums + android_sums
            
            # Filter rows where `parent_id_old` matches `hierarchy_id`
            filtered_data = df[df[metricid].isin(display_df[parentid])]
            ios_data = filtered_data[filtered_data['operating_system_type'] == "Apple iOS"]
            android_data = filtered_data[filtered_data['operating_system_type'] == "Google Android"]

            # Calculate denominator
            ios_sums_denom = ios_data[date_columns].apply(pd.to_numeric, errors='coerce').reset_index(drop=True)
            android_sums_denom = android_data[date_columns].apply(pd.to_numeric, errors='coerce').reset_index(drop=True)
            denominator = ios_sums_denom + android_sums_denom

            # Calculate result as percentage
            result = (numerator / denominator) * 100 if not denominator.empty else pd.DataFrame(0, columns=date_columns, index=numerator.index)
            result = result.round(3)  # Round result to 3 decimal places
            new_row_values = result.sum(axis=0).fillna(0).apply(lambda x: round(x, 3)).to_dict()

        # Create the new row for the current display_name
        new_row = {
            'display_names': display_name,
            'operating_system_type': 'Both',
            'metric_id': f'Both_ID_{seq_num}'
        }
        # Add calculated values to the new row
        new_row.update(new_row_values)
        new_rows.append(new_row)
        seq_num += 1

    # Append new rows to the original DataFrame
    new_rows_df = pd.DataFrame(new_rows)
    result_df = pd.concat([df, new_rows_df], ignore_index=True)

    # Reset index
    result_df.reset_index(drop=True, inplace=True)
    return new_rows_df


In [39]:
both_calc_df = calculate_both(df,'metric_id', 'parent_id')
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 300)      # Show 100 rows
both_calc_df.head(500)

Unnamed: 0,display_names,operating_system_type,metric_id,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024
0,Billing Section Visits,Both,Both_ID_1,93064.0,99690.0,70846.0,68476.0,70777.0,89457.0,91825.0,102981.0,102789.0,66804.0,61675.0,77547.0,78493.0,86857.0,97900.0,102689.0,68247.0,63041.0,77821.0,73506.0,82820.0,83588.0,93325.0,63873.0,60709.0,74423.0,79316.0,85112.0,84457.0,96598.0,72554.0,63698.0,89212.0,101101.0,87238.0,97925.0,96309.0,69725.0,61658.0,74383.0,76292.0,85505.0,92385.0,96333.0,65765.0,55555.0,76985.0,80619.0,85310.0,88308.0,95437.0,64272.0,60384.0,76148.0,79110.0,79923.0,83441.0,86559.0,59192.0,53204.0,69949.0,72277.0,80683.0,85194.0,107366.0,73527.0,70262.0,78308.0,79580.0,80010.0,90858.0,96496.0,65867.0,57008.0,70899.0,72458.0,84140.0,97421.0,106566.0,65666.0,59653.0,74638.0,75190.0,84863.0,79320.0,91369.0,63344.0,56984.0,74961.0,77062.0
1,Make Payments,Both,Both_ID_2,39.606,44.064,37.685,36.988,34.547,34.328,36.616,40.437,44.384,34.913,33.561,33.578,34.942,38.425,40.869,46.502,37.508,35.141,34.211,33.784,36.433,39.903,44.02,35.538,34.632,33.025,33.01,36.374,38.726,42.087,37.558,34.992,35.308,38.815,37.707,41.385,44.555,37.698,35.867,35.04,35.479,38.856,43.07,48.501,40.19,39.424,35.586,36.35,39.715,42.45,45.719,38.139,36.596,35.199,34.814,38.253,40.838,43.697,36.285,34.64,33.157,34.059,39.326,44.588,48.098,39.479,36.464,35.653,36.678,39.403,43.29,46.531,38.359,37.126,33.855,33.854,39.324,42.852,46.429,37.971,35.968,34.436,35.372,38.999,40.205,44.483,38.076,35.8,33.743,36.827
2,Client Errors on Payment Page,Both,Both_ID_3,0.003,0.0,0.004,0.004,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.003,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.002,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.002,0.0,0.004,0.0,0.0,0.0,0.003,0.003,0.0,0.0,0.005,0.0,0.0
3,Server Errors on Payment Page,Both,Both_ID_4,0.263,0.355,0.36,0.276,0.344,0.374,0.404,0.358,0.379,0.412,0.367,0.338,0.368,0.318,0.315,0.285,0.359,0.316,0.316,0.455,0.335,0.315,0.277,0.326,0.366,0.309,0.439,0.355,0.321,0.275,0.349,0.319,0.333,0.385,0.365,0.358,0.329,0.38,0.348,0.315,0.351,0.331,0.279,0.274,0.299,0.356,0.318,0.358,0.342,0.269,0.252,0.396,0.385,0.351,0.305,0.383,0.302,0.336,0.396,0.336,0.375,0.284,0.334,0.329,0.354,0.365,0.324,0.38,0.291,0.333,0.353,0.314,0.336,0.378,0.308,0.314,0.287,0.275,0.249,0.361,0.354,0.315,0.323,0.299,0.376,0.335,0.328,0.284,0.265,0.296
4,Payment Submit,Both,Both_ID_5,36.462,41.049,34.924,34.27,31.465,30.872,33.069,37.242,41.623,32.492,30.623,29.973,31.421,35.098,37.783,43.567,34.5,32.344,30.771,30.814,33.442,36.806,41.259,32.956,31.839,29.644,29.648,33.469,35.623,39.535,35.018,32.337,32.155,35.779,34.416,38.292,41.526,34.8,32.912,31.405,32.103,35.698,39.924,45.811,37.511,36.647,32.779,33.418,36.799,39.445,43.142,35.722,33.83,32.013,31.626,35.349,37.835,41.235,33.891,32.657,30.898,30.977,36.346,42.056,45.845,36.951,33.91,32.62,33.834,36.352,40.48,44.016,35.872,34.767,30.91,31.043,36.587,40.331,43.891,35.379,33.524,31.437,32.337,36.045,38.28,42.035,35.435,33.023,30.82,33.805
5,Payment Success Rate,Both,Both_ID_6,98.108,98.458,98.165,97.797,97.62,97.777,97.533,97.679,97.983,97.609,97.496,97.483,97.697,97.943,98.332,98.549,98.051,97.705,97.524,96.397,97.805,98.173,98.151,97.796,97.791,97.72,97.325,97.616,98.006,98.288,98.008,97.83,97.874,98.2,98.002,98.171,98.18,97.82,97.388,97.598,97.505,97.828,98.224,98.502,96.165,92.185,97.579,97.513,98.003,98.151,98.375,97.792,97.557,97.772,97.826,97.802,98.239,98.218,97.812,97.784,97.474,97.396,97.893,98.27,98.501,98.167,97.612,97.588,97.883,98.009,98.178,98.472,97.926,97.76,97.655,96.848,97.879,98.341,98.499,97.998,97.79,97.656,97.721,98.003,98.199,98.443,97.808,97.253,97.728,97.635
6,Promise to Pay - Linkout,Both,Both_ID_7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Promise to Pay - Native,Both,Both_ID_8,4.885,4.631,5.316,5.361,6.025,6.0,6.094,5.83,5.601,5.461,6.186,6.449,6.022,5.507,4.733,4.528,5.099,5.968,5.811,5.899,5.302,5.045,4.85,5.229,5.997,5.944,5.858,5.428,5.037,4.736,5.123,5.738,5.582,4.866,5.186,5.257,5.192,5.856,6.074,6.432,5.999,5.581,5.152,4.767,5.469,6.149,6.297,5.681,5.392,5.075,4.738,5.695,6.267,6.152,5.643,5.653,5.096,4.873,5.803,5.584,5.374,5.949,5.56,5.118,4.413,5.414,5.605,5.985,5.514,5.529,5.304,4.853,5.716,6.455,6.306,5.823,5.044,4.736,4.245,5.188,5.98,6.028,5.893,5.468,4.452,4.716,5.647,6.412,5.864,5.636
8,Additional time to pay - Linkout,Both,Both_ID_9,63.858,62.421,67.127,67.066,68.105,66.052,67.012,65.49,65.694,65.543,68.65,68.206,67.273,64.583,63.703,63.075,64.626,68.448,67.625,68.589,66.181,64.643,65.753,66.946,68.827,68.061,67.198,64.892,64.245,65.071,67.097,69.658,65.783,65.142,65.805,64.083,65.8,67.72,69.105,67.914,68.342,65.256,64.034,63.807,66.444,69.731,67.966,67.271,66.913,65.73,64.485,66.093,68.869,68.068,67.115,67.53,66.533,66.927,67.191,69.135,66.933,67.744,65.671,65.528,65.238,66.918,68.918,67.485,67.525,66.207,66.155,65.001,66.826,71.005,68.978,67.86,64.515,64.781,64.125,67.068,68.181,67.348,67.163,65.819,63.466,66.582,68.912,70.088,68.744,67.764
9,Future date payment - Linkout,Both,Both_ID_10,7.347,7.018,7.807,8.662,8.114,8.385,8.381,7.145,8.129,7.209,9.515,9.778,9.351,9.826,8.416,8.215,9.569,9.038,9.553,9.064,8.472,8.086,8.772,8.323,9.063,9.584,9.858,8.701,9.262,8.284,8.232,7.989,9.378,8.516,8.378,9.421,9.24,10.115,9.987,10.619,9.591,8.194,9.202,9.059,9.452,10.597,9.695,10.786,9.065,9.17,8.359,9.153,9.619,10.395,8.58,8.92,9.478,7.515,9.287,7.304,7.236,9.442,8.56,8.784,7.345,8.088,8.304,9.452,9.093,9.358,8.799,8.542,9.19,10.163,10.825,9.955,9.001,8.019,7.847,8.6,8.663,9.58,10.02,8.944,6.372,7.612,9.309,10.673,9.645,9.44


In [40]:
both_calc_df.shape

(181, 93)

In [41]:
unique_display_names_df = df.drop_duplicates(subset='display_names')
unique_display_names_df.shape

(181, 108)

In [42]:
id_columns_both = ['create_dt', 'feature_id', 'unique_identifier', 'feature_name', 'metrics', 'display_names' ]
id_column_both_df = unique_display_names_df[id_columns_both]
#id_column_both_df = id_column_both_df[id_column_both_df['feature_name'].isin(['SMART HELP'])]
id_column_both_df.shape

(181, 6)

In [43]:
# Perform a left merge on 'display_names'
both_df =  pd.merge(both_calc_df, id_column_both_df, on='display_names', how='inner')
both_df.shape

(181, 98)

In [44]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 300)      # Show 100 rows
both_df.head()


Unnamed: 0,display_names,operating_system_type,metric_id,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,create_dt,feature_id,unique_identifier,feature_name,metrics
0,Billing Section Visits,Both,Both_ID_1,93064.0,99690.0,70846.0,68476.0,70777.0,89457.0,91825.0,102981.0,102789.0,66804.0,61675.0,77547.0,78493.0,86857.0,97900.0,102689.0,68247.0,63041.0,77821.0,73506.0,82820.0,83588.0,93325.0,63873.0,60709.0,74423.0,79316.0,85112.0,84457.0,96598.0,72554.0,63698.0,89212.0,101101.0,87238.0,97925.0,96309.0,69725.0,61658.0,74383.0,76292.0,85505.0,92385.0,96333.0,65765.0,55555.0,76985.0,80619.0,85310.0,88308.0,95437.0,64272.0,60384.0,76148.0,79110.0,79923.0,83441.0,86559.0,59192.0,53204.0,69949.0,72277.0,80683.0,85194.0,107366.0,73527.0,70262.0,78308.0,79580.0,80010.0,90858.0,96496.0,65867.0,57008.0,70899.0,72458.0,84140.0,97421.0,106566.0,65666.0,59653.0,74638.0,75190.0,84863.0,79320.0,91369.0,63344.0,56984.0,74961.0,77062.0,2025-01-21,1,BL,BIL LING,DR_Visits to Billing Home
1,Make Payments,Both,Both_ID_2,39.606,44.064,37.685,36.988,34.547,34.328,36.616,40.437,44.384,34.913,33.561,33.578,34.942,38.425,40.869,46.502,37.508,35.141,34.211,33.784,36.433,39.903,44.02,35.538,34.632,33.025,33.01,36.374,38.726,42.087,37.558,34.992,35.308,38.815,37.707,41.385,44.555,37.698,35.867,35.04,35.479,38.856,43.07,48.501,40.19,39.424,35.586,36.35,39.715,42.45,45.719,38.139,36.596,35.199,34.814,38.253,40.838,43.697,36.285,34.64,33.157,34.059,39.326,44.588,48.098,39.479,36.464,35.653,36.678,39.403,43.29,46.531,38.359,37.126,33.855,33.854,39.324,42.852,46.429,37.971,35.968,34.436,35.372,38.999,40.205,44.483,38.076,35.8,33.743,36.827,2025-01-21,1,BL,BIL LING,DR_Billing Make a Payment
2,Client Errors on Payment Page,Both,Both_ID_3,0.003,0.0,0.004,0.004,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.003,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.002,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.002,0.0,0.004,0.0,0.0,0.0,0.003,0.003,0.0,0.0,0.005,0.0,0.0,2025-01-21,1,BL,BIL LING,DR_Client Errors on Make Payment
3,Server Errors on Payment Page,Both,Both_ID_4,0.263,0.355,0.36,0.276,0.344,0.374,0.404,0.358,0.379,0.412,0.367,0.338,0.368,0.318,0.315,0.285,0.359,0.316,0.316,0.455,0.335,0.315,0.277,0.326,0.366,0.309,0.439,0.355,0.321,0.275,0.349,0.319,0.333,0.385,0.365,0.358,0.329,0.38,0.348,0.315,0.351,0.331,0.279,0.274,0.299,0.356,0.318,0.358,0.342,0.269,0.252,0.396,0.385,0.351,0.305,0.383,0.302,0.336,0.396,0.336,0.375,0.284,0.334,0.329,0.354,0.365,0.324,0.38,0.291,0.333,0.353,0.314,0.336,0.378,0.308,0.314,0.287,0.275,0.249,0.361,0.354,0.315,0.323,0.299,0.376,0.335,0.328,0.284,0.265,0.296,2025-01-21,1,BL,BIL LING,DR_Server Errors on Make Payment
4,Payment Submit,Both,Both_ID_5,36.462,41.049,34.924,34.27,31.465,30.872,33.069,37.242,41.623,32.492,30.623,29.973,31.421,35.098,37.783,43.567,34.5,32.344,30.771,30.814,33.442,36.806,41.259,32.956,31.839,29.644,29.648,33.469,35.623,39.535,35.018,32.337,32.155,35.779,34.416,38.292,41.526,34.8,32.912,31.405,32.103,35.698,39.924,45.811,37.511,36.647,32.779,33.418,36.799,39.445,43.142,35.722,33.83,32.013,31.626,35.349,37.835,41.235,33.891,32.657,30.898,30.977,36.346,42.056,45.845,36.951,33.91,32.62,33.834,36.352,40.48,44.016,35.872,34.767,30.91,31.043,36.587,40.331,43.891,35.379,33.524,31.437,32.337,36.045,38.28,42.035,35.435,33.023,30.82,33.805,2025-01-21,1,BL,BIL LING,DR_Payment Submit


In [45]:
third_table_df = pd.concat([percent_df, both_df], ignore_index=True)

In [46]:
third_table_df.shape

(644, 101)

In [47]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 200)      # Show 100 rows
third_table_df.head(5)

Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024
0,2025-01-21,1,1,1.0,1.0,,BL,BIL LING,DR_Visits to Billing Home,Billing Section Visits,Apple iOS,71742.0,76236.0,54132.0,52596.0,54447.0,68414.0,70355.0,79002.0,78610.0,50904.0,47500.0,59885.0,60599.0,66866.0,75881.0,78958.0,52352.0,48231.0,60364.0,57000.0,63905.0,64500.0,71753.0,49102.0,46906.0,58086.0,61623.0,65367.0,64902.0,74267.0,56022.0,49393.0,69232.0,77165.0,66744.0,73629.0,73180.0,53129.0,47358.0,57406.0,58604.0,65363.0,70718.0,73814.0,50310.0,42931.0,59605.0,62231.0,65649.0,67679.0,73278.0,49194.0,46554.0,58498.0,60820.0,61559.0,64139.0,66553.0,45236.0,40903.0,53844.0,55602.0,62178.0,65295.0,80962.0,55882.0,53873.0,60230.0,61201.0,61253.0,69508.0,73999.0,50243.0,43774.0,54835.0,56204.0,64712.0,75234.0,81910.0,50202.0,45884.0,57871.0,58212.0,65289.0,60150.0,69650.0,48374.0,43920.0,58120.0,59662.0
1,2025-01-21,2,1,2.0,2.0,1.0,BL,BIL LING,DR_Billing Make a Payment,Make Payments,Apple iOS,39.419,43.499,37.287,36.775,34.426,33.727,36.383,40.112,44.15,34.797,33.366,33.616,34.85,38.247,40.542,46.293,37.254,35.23,34.206,33.795,36.201,39.623,43.802,35.459,34.578,33.168,33.101,36.107,38.709,41.861,37.869,35.112,35.078,38.011,37.516,40.449,44.269,37.688,36.03,34.909,35.492,38.652,42.855,48.124,40.302,39.559,35.739,36.602,39.516,42.353,45.578,38.332,36.785,35.406,34.931,38.207,40.648,43.618,36.175,34.543,32.637,33.925,38.961,44.553,47.148,39.131,36.358,35.446,36.658,39.195,43.04,46.359,38.306,37.186,33.907,33.909,39.021,42.745,46.442,37.831,35.982,34.577,35.439,38.907,39.862,44.307,38.167,36.004,33.73,36.827
2,2025-01-21,3,1,3.0,3.0,2.0,BL,BIL LING,DR_Client Errors on Make Payment,Client Errors on Payment Page,Apple iOS,0.004,0.0,0.005,0.005,0.0,0.0,0.0,0.0,0.0,0.006,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.004,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.003,0.0,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.003,0.0,0.005,0.0,0.0,0.0,0.004,0.004,0.0,0.0,0.006,0.0,0.0
3,2025-01-21,4,1,4.0,3.0,2.0,BL,BIL LING,DR_Server Errors on Make Payment,Server Errors on Payment Page,Apple iOS,0.004,0.009,0.015,0.01,0.0,0.004,0.012,0.006,0.012,0.0,0.0,0.01,0.005,0.0,0.003,0.003,0.01,0.0,0.0,0.005,0.004,0.016,0.016,0.006,0.0,0.005,0.025,0.0,0.008,0.0,0.009,0.012,0.0,0.01,0.004,0.007,0.012,0.015,0.006,0.0,0.01,0.0,0.0,0.003,0.01,0.018,0.005,0.022,0.008,0.014,0.009,0.016,0.023,0.024,0.0,0.004,0.008,0.017,0.012,0.0,0.0,0.005,0.008,0.0,0.003,0.014,0.005,0.014,0.004,0.0,0.0,0.003,0.005,0.006,0.011,0.021,0.0,0.006,0.003,0.011,0.012,0.0,0.005,0.012,0.017,0.01,0.005,0.0,0.0,0.018
4,2025-01-21,5,1,5.0,2.0,1.0,BL,BIL LING,DR_Payment Submit,Payment Submit,Apple iOS,36.536,40.807,35.134,34.611,31.822,30.671,33.293,37.595,42.07,33.105,31.112,30.392,31.717,35.275,37.92,43.842,34.786,33.024,31.236,31.409,33.68,37.036,41.515,33.424,32.331,30.276,30.123,33.745,36.05,39.8,35.864,32.997,32.364,35.539,34.694,37.905,41.744,35.408,33.585,31.761,32.585,36.033,40.186,46.016,38.237,37.458,33.506,34.173,37.169,39.82,43.555,36.474,34.62,32.7,32.243,35.808,38.155,41.744,34.486,33.379,31.09,31.391,36.543,42.761,45.483,37.28,34.518,32.932,34.436,36.619,40.863,44.425,36.556,35.628,31.544,31.576,36.851,40.819,44.376,35.897,34.236,32.038,32.998,36.485,38.803,42.527,36.104,33.805,31.419,34.265


In [48]:
#third_table_df.to_csv(r'D:\Healthscore Codes\table data\table3_allfeatures_data.csv', index = False)

In [49]:
third_table_melted_df = pd.melt(third_table_df, id_vars=id_columns, 
                     value_vars=date_columns, 
                     var_name='Date', 
                     value_name='Value')

In [50]:
third_table_melted_df.shape

(57960, 13)

In [51]:
third_table_final_df = third_table_melted_df[['create_dt', 'metric_id', 'feature_id', 'unique_identifier', 'feature_name', 'metrics', 'display_names', 'operating_system_type', 'Date', 'Value']]

In [52]:
third_table_final_df.head()

Unnamed: 0,create_dt,metric_id,feature_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,Date,Value
0,2025-01-21,1,1,BL,BIL LING,DR_Visits to Billing Home,Billing Section Visits,Apple iOS,29-08-2024,71742.0
1,2025-01-21,2,1,BL,BIL LING,DR_Billing Make a Payment,Make Payments,Apple iOS,29-08-2024,39.419
2,2025-01-21,3,1,BL,BIL LING,DR_Client Errors on Make Payment,Client Errors on Payment Page,Apple iOS,29-08-2024,0.004
3,2025-01-21,4,1,BL,BIL LING,DR_Server Errors on Make Payment,Server Errors on Payment Page,Apple iOS,29-08-2024,0.004
4,2025-01-21,5,1,BL,BIL LING,DR_Payment Submit,Payment Submit,Apple iOS,29-08-2024,36.536


In [53]:
third_table_final_df.shape

(57960, 10)

In [54]:
#third_table_final_df.to_csv(r'D:\Healthscore Codes\table data\table3_all_features_data_athena.csv', index = False)

In [55]:
third_table_melted_df.dtypes

create_dt                 object
metric_id                 object
feature_id                 int64
metric_sequence_num      float64
level_no                 float64
parent_id                float64
unique_identifier         object
feature_name              object
metrics                   object
display_names             object
operating_system_type     object
Date                      object
Value                     object
dtype: object

In [56]:
# Convert 'create_dt' and 'Date' to datetime
third_table_melted_df['create_dt'] = pd.to_datetime(third_table_melted_df['create_dt'], errors='coerce')
third_table_melted_df['Date'] = pd.to_datetime(third_table_melted_df['Date'], errors='coerce')

# Convert 'feature_id', 'metric_sequence_num', 'level_no', 'parent_id' to integers
third_table_melted_df['feature_id'] = third_table_melted_df['feature_id'].astype('Int64')  # 'Int64' to handle missing values (NaN)
third_table_melted_df['metric_sequence_num'] = third_table_melted_df['metric_sequence_num'].astype('Int64')
third_table_melted_df['level_no'] = third_table_melted_df['level_no'].astype('Int64')
third_table_melted_df['parent_id'] = third_table_melted_df['parent_id'].astype('Int64')

# Convert 'metric_id', 'unique_identifier', 'feature_name', 'metrics', 'display_names', 'operating_system_type' to strings
third_table_melted_df['metric_id'] = third_table_melted_df['metric_id'].astype(str)
third_table_melted_df['unique_identifier'] = third_table_melted_df['unique_identifier'].astype(str)
third_table_melted_df['feature_name'] = third_table_melted_df['feature_name'].astype(str)
third_table_melted_df['metrics'] = third_table_melted_df['metrics'].astype(str)
third_table_melted_df['display_names'] = third_table_melted_df['display_names'].astype(str)
third_table_melted_df['operating_system_type'] = third_table_melted_df['operating_system_type'].astype(str)

# Convert 'Value' to float
third_table_melted_df['Value'] = pd.to_numeric(third_table_melted_df['Value'], errors='coerce')

  third_table_melted_df['Date'] = pd.to_datetime(third_table_melted_df['Date'], errors='coerce')


In [57]:
third_table_melted_df.dtypes

create_dt                datetime64[ns]
metric_id                        object
feature_id                        Int64
metric_sequence_num               Int64
level_no                          Int64
parent_id                         Int64
unique_identifier                object
feature_name                     object
metrics                          object
display_names                    object
operating_system_type            object
Date                     datetime64[ns]
Value                           float64
dtype: object

In [58]:
#Calculate 1st and 3rd quartiles
quartiles = third_table_melted_df.groupby(['operating_system_type', 'display_names'])['Value'].quantile([0.25, 0.75]).unstack()
quartiles.reset_index(inplace=True)
quartiles.columns.name = None  # Remove the index name
quartiles.columns = ['operating_system_type', 'display_names', '1st Quartile', '3rd Quartile']
#Calculate IQR
quartiles['IQR'] = quartiles['3rd Quartile'] - quartiles['1st Quartile']

In [59]:
quartiles.shape

(543, 5)

In [60]:
quartiles_iqr_df = pd.merge(third_table_df, quartiles, on=['display_names', 'operating_system_type'], how='outer')

In [61]:
quartiles_iqr_df.shape

(647, 104)

In [62]:
quartiles_iqr_df.head()

Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,1st Quartile,3rd Quartile,IQR
0,2025-01-21,186,15.0,186.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Apple iOS,78.947,84.0,80.909,68.235,78.049,74.118,73.786,77.622,81.053,77.228,75.455,83.784,88.095,72.727,85.321,82.653,80.435,89.046,60.099,8.108,8.333,7.955,6.667,4.615,7.595,10.938,16.667,9.091,11.628,11.765,10.145,7.921,10.753,8.974,10.667,63.636,75.862,75.789,73.958,75.309,69.663,68.085,74.667,75.714,78.824,60.317,66.667,63.725,57.955,57.647,58.889,54.455,62.195,47.561,61.765,58.095,63.014,54.545,59.211,56.79,65.116,51.25,55.102,54.545,71.429,67.442,65.854,63.095,54.412,66.667,60.0,53.846,57.895,46.479,53.086,65.556,56.164,63.529,70.0,60.0,54.545,55.357,51.351,59.459,55.0,56.962,56.818,62.295,66.142,66.216,54.42275,73.52125,19.0985
1,2025-01-21,Both_ID_143,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Both,78.689,80.556,80.392,68.462,76.923,70.29,74.497,76.682,79.221,71.014,74.405,75.214,83.871,76.271,84.516,83.099,77.419,90.612,60.912,11.0,10.345,7.965,6.542,7.407,8.13,9.195,16.176,9.804,11.29,14.286,11.828,8.955,12.5,8.85,8.612,65.254,77.344,69.014,73.485,75.0,66.942,65.854,72.727,70.707,74.167,61.053,66.154,65.909,57.895,58.586,62.295,55.072,61.947,55.085,66.412,58.451,64.356,54.762,57.983,58.182,64.516,49.495,57.246,56.25,71.329,66.387,67.816,64.8,59.574,68.687,54.701,55.455,57.522,47.423,55.446,67.391,57.009,61.789,65.254,57.895,53.153,52.941,49.541,55.446,56.911,57.895,48.701,65.318,61.053,64.151,54.71625,70.60275,15.8865
2,2025-01-21,420,15.0,184.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Google Android,78.261,72.727,79.07,68.889,74.286,64.151,76.087,75.0,76.271,54.054,72.414,60.465,75.0,82.927,82.609,84.091,68.75,92.754,62.5,19.231,14.815,8.0,6.25,11.628,9.091,4.348,14.706,11.111,10.526,18.919,16.667,12.121,18.519,8.571,3.39,68.293,80.488,55.319,72.222,74.074,59.375,58.621,66.667,58.621,62.857,62.5,64.706,73.333,57.692,64.286,71.875,56.757,61.29,72.222,82.759,59.459,67.857,55.263,55.814,62.069,63.158,42.105,62.5,60.0,71.053,63.636,72.549,68.293,73.077,73.333,40.625,59.375,56.757,50.0,65.0,70.833,58.824,57.895,55.263,53.488,50.0,48.276,45.714,44.444,60.465,60.0,37.879,72.549,50.794,59.375,50.0,72.13525,22.13525
3,2025-01-21,187,15.0,187.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Apple iOS,14.474,10.0,11.818,18.824,15.854,15.294,16.505,11.888,10.526,12.871,16.364,8.108,7.143,19.481,11.009,14.286,9.783,9.541,21.182,1.351,0.0,2.273,1.333,0.0,0.0,0.0,0.0,1.515,1.163,0.0,0.0,0.0,0.0,0.0,0.0,10.39,9.195,9.474,16.667,18.519,15.73,13.83,13.333,11.429,11.765,22.222,17.708,22.549,28.409,22.353,22.222,37.624,23.171,26.829,26.471,27.619,26.027,26.136,22.368,24.691,29.07,25.0,22.449,25.974,17.143,24.419,20.325,27.381,27.941,21.739,23.529,17.949,25.0,33.803,28.395,20.0,27.397,17.647,20.0,23.333,23.377,26.786,22.973,25.676,17.5,20.253,22.727,14.754,16.535,21.622,10.0975,23.2925,13.195
4,2025-01-21,Both_ID_144,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Both,13.115,11.111,13.725,18.462,17.094,21.739,16.779,13.901,11.688,16.667,19.048,14.53,12.903,14.407,12.258,11.972,12.903,7.755,23.127,1.0,0.0,1.77,0.935,0.0,0.0,1.149,0.0,0.98,0.806,0.0,0.0,0.0,0.0,0.0,0.0,8.475,10.156,15.493,18.182,18.519,16.529,16.26,15.152,15.152,17.5,22.105,15.385,22.727,26.316,25.253,21.311,34.058,24.779,21.186,25.191,26.761,27.723,30.159,24.37,24.545,27.419,27.273,22.464,23.214,19.58,24.37,21.264,22.4,23.404,19.192,24.786,20.0,25.664,34.021,27.723,18.841,24.299,17.886,18.644,21.805,27.928,29.412,26.606,26.733,21.951,21.053,25.974,13.295,18.947,23.585,12.41925,24.1205,11.70125


In [63]:
quartiles_iqr_df_sample = quartiles_iqr_df[quartiles_iqr_df['feature_name'].isin(['SMART HELP'])]

In [65]:
quartiles_iqr_df_sample.shape

(54, 104)

In [66]:
import pandas as pd

def calculate_upper_lower_test(df, parent_column='parent_id'):
    # Ensure parent_column is present in the dataframe
    if parent_column not in df.columns:
        raise ValueError(f"{parent_column} not found in the dataframe")
    
    # Define the function to apply to each row
    def calculate_row(row):
        # First, check if the parent_id is null, empty, or None for the given display_names
        display_name = row['display_names']
        is_parent_null = pd.isna(row[parent_column]) or row[parent_column] == ''
        
        # Find if this unique display_name has any null parent_id rows
        if is_parent_null:
            # If parent_id is null for this display_name, apply the 1.25 logic for IOS, Android, or Both
            if row['operating_system_type'] in ['Apple IOS', 'Google Android', 'Both']:
                upper = row['3rd Quartile'] + (1.25 * row['IQR'])
                lower = row['1st Quartile'] - (1.25 * row['IQR'])
            else:
                # Apply the default logic for other operating_system_types (if applicable)
                upper = row['3rd Quartile'] + (0.75 * row['IQR'])
                lower = row['1st Quartile'] - (0.75 * row['IQR'])
        else:
            # For rows where parent_id is not null, apply the 0.75 logic
            upper = row['3rd Quartile'] + (0.75 * row['IQR'])
            lower = row['1st Quartile'] - (0.75 * row['IQR'])
        
        return pd.Series({'Upper': upper, 'Lower': lower})
    
    # Apply the function row by row
    df[['Upper', 'Lower']] = df.apply(calculate_row, axis=1)

    # Round the Upper column to 1 decimal place
    df['Upper'] = df['Upper'].round(1)

    return df


In [69]:
# Example usage
upper_lower_test_df = calculate_upper_lower_test(quartiles_iqr_df_sample)
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 100)      # Show 100 rows
upper_lower_test_df.head(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['Upper', 'Lower']] = df.apply(calculate_row, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Upper'] = df['Upper'].round(1)


Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,1st Quartile,3rd Quartile,IQR,Upper,Lower
0,2025-01-21,186,15.0,186.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Apple iOS,78.947,84.0,80.909,68.235,78.049,74.118,73.786,77.622,81.053,77.228,75.455,83.784,88.095,72.727,85.321,82.653,80.435,89.046,60.099,8.108,8.333,7.955,6.667,4.615,7.595,10.938,16.667,9.091,11.628,11.765,10.145,7.921,10.753,8.974,10.667,63.636,75.862,75.789,73.958,75.309,69.663,68.085,74.667,75.714,78.824,60.317,66.667,63.725,57.955,57.647,58.889,54.455,62.195,47.561,61.765,58.095,63.014,54.545,59.211,56.79,65.116,51.25,55.102,54.545,71.429,67.442,65.854,63.095,54.412,66.667,60.0,53.846,57.895,46.479,53.086,65.556,56.164,63.529,70.0,60.0,54.545,55.357,51.351,59.459,55.0,56.962,56.818,62.295,66.142,66.216,54.42275,73.52125,19.0985,87.8,40.098875
1,2025-01-21,Both_ID_143,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Both,78.689,80.556,80.392,68.462,76.923,70.29,74.497,76.682,79.221,71.014,74.405,75.214,83.871,76.271,84.516,83.099,77.419,90.612,60.912,11.0,10.345,7.965,6.542,7.407,8.13,9.195,16.176,9.804,11.29,14.286,11.828,8.955,12.5,8.85,8.612,65.254,77.344,69.014,73.485,75.0,66.942,65.854,72.727,70.707,74.167,61.053,66.154,65.909,57.895,58.586,62.295,55.072,61.947,55.085,66.412,58.451,64.356,54.762,57.983,58.182,64.516,49.495,57.246,56.25,71.329,66.387,67.816,64.8,59.574,68.687,54.701,55.455,57.522,47.423,55.446,67.391,57.009,61.789,65.254,57.895,53.153,52.941,49.541,55.446,56.911,57.895,48.701,65.318,61.053,64.151,54.71625,70.60275,15.8865,90.5,34.858125
2,2025-01-21,420,15.0,184.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Google Android,78.261,72.727,79.07,68.889,74.286,64.151,76.087,75.0,76.271,54.054,72.414,60.465,75.0,82.927,82.609,84.091,68.75,92.754,62.5,19.231,14.815,8.0,6.25,11.628,9.091,4.348,14.706,11.111,10.526,18.919,16.667,12.121,18.519,8.571,3.39,68.293,80.488,55.319,72.222,74.074,59.375,58.621,66.667,58.621,62.857,62.5,64.706,73.333,57.692,64.286,71.875,56.757,61.29,72.222,82.759,59.459,67.857,55.263,55.814,62.069,63.158,42.105,62.5,60.0,71.053,63.636,72.549,68.293,73.077,73.333,40.625,59.375,56.757,50.0,65.0,70.833,58.824,57.895,55.263,53.488,50.0,48.276,45.714,44.444,60.465,60.0,37.879,72.549,50.794,59.375,50.0,72.13525,22.13525,88.7,33.398562
3,2025-01-21,187,15.0,187.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Apple iOS,14.474,10.0,11.818,18.824,15.854,15.294,16.505,11.888,10.526,12.871,16.364,8.108,7.143,19.481,11.009,14.286,9.783,9.541,21.182,1.351,0.0,2.273,1.333,0.0,0.0,0.0,0.0,1.515,1.163,0.0,0.0,0.0,0.0,0.0,0.0,10.39,9.195,9.474,16.667,18.519,15.73,13.83,13.333,11.429,11.765,22.222,17.708,22.549,28.409,22.353,22.222,37.624,23.171,26.829,26.471,27.619,26.027,26.136,22.368,24.691,29.07,25.0,22.449,25.974,17.143,24.419,20.325,27.381,27.941,21.739,23.529,17.949,25.0,33.803,28.395,20.0,27.397,17.647,20.0,23.333,23.377,26.786,22.973,25.676,17.5,20.253,22.727,14.754,16.535,21.622,10.0975,23.2925,13.195,33.2,0.20125
4,2025-01-21,Both_ID_144,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Both,13.115,11.111,13.725,18.462,17.094,21.739,16.779,13.901,11.688,16.667,19.048,14.53,12.903,14.407,12.258,11.972,12.903,7.755,23.127,1.0,0.0,1.77,0.935,0.0,0.0,1.149,0.0,0.98,0.806,0.0,0.0,0.0,0.0,0.0,0.0,8.475,10.156,15.493,18.182,18.519,16.529,16.26,15.152,15.152,17.5,22.105,15.385,22.727,26.316,25.253,21.311,34.058,24.779,21.186,25.191,26.761,27.723,30.159,24.37,24.545,27.419,27.273,22.464,23.214,19.58,24.37,21.264,22.4,23.404,19.192,24.786,20.0,25.664,34.021,27.723,18.841,24.299,17.886,18.644,21.805,27.928,29.412,26.606,26.733,21.951,21.053,25.974,13.295,18.947,23.585,12.41925,24.1205,11.70125,38.7,-2.207313
5,2025-01-21,421,15.0,185.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Google Android,10.87,13.636,18.605,17.778,20.0,32.075,17.391,17.5,13.559,27.027,24.138,25.581,25.0,4.878,15.217,6.818,21.875,5.314,26.923,0.0,0.0,0.0,0.0,0.0,0.0,4.348,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.878,12.195,27.66,22.222,18.519,18.75,24.138,20.833,24.138,31.429,21.875,8.824,23.333,19.231,42.857,18.75,24.324,29.032,8.333,20.69,24.324,32.143,39.474,27.907,24.138,23.684,36.842,22.5,17.143,26.316,24.242,23.529,12.195,11.538,13.333,28.125,25.0,27.027,34.615,25.0,16.667,17.647,18.421,15.789,18.605,38.235,34.483,34.286,29.63,30.233,22.857,30.303,9.804,23.81,28.125,10.0705,25.43575,15.36525,37.0,-1.453437
6,2025-01-21,188,15.0,188.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Incomplete Install,3rd common reason (Incomplete Install),Apple iOS,5.263,4.0,7.273,8.235,1.22,5.882,4.854,5.594,3.158,6.931,3.636,5.405,3.571,6.494,3.67,1.02,6.522,1.06,10.345,0.0,3.333,3.409,0.0,0.0,2.532,1.562,2.941,0.0,2.326,1.471,0.0,0.0,0.0,0.0,0.0,6.494,6.897,3.158,0.0,1.235,4.494,7.447,1.333,2.857,2.353,3.175,7.292,4.902,5.682,9.412,5.556,3.96,4.878,6.098,2.941,0.952,1.37,4.545,1.316,2.469,1.163,5.0,9.184,9.091,2.857,1.163,0.813,2.381,5.882,4.348,2.353,7.692,6.579,4.225,12.346,5.556,6.849,5.882,5.0,6.667,12.987,1.786,8.108,8.108,13.75,8.861,9.091,9.016,7.874,6.757,1.618,6.645,5.027,10.4,-2.15225
7,2025-01-21,Both_ID_145,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Incomplete Install,3rd common reason (Incomplete Install),Both,6.557,5.556,6.536,7.692,2.564,5.072,4.027,4.933,3.896,9.42,3.571,5.983,3.226,6.78,3.226,1.408,7.258,1.224,9.121,0.0,2.299,2.655,0.0,0.0,3.252,1.149,2.941,0.0,1.613,0.952,0.0,0.746,0.0,1.77,0.957,5.932,4.688,3.521,0.758,0.926,4.959,6.504,2.02,6.061,3.333,4.211,10.0,4.545,7.018,8.081,6.557,5.797,3.54,7.627,2.29,3.521,0.99,3.968,3.361,5.455,2.419,4.04,7.246,8.036,3.497,2.521,0.575,4.8,6.383,5.051,5.128,5.455,9.735,4.124,11.881,3.623,11.215,8.943,10.169,9.774,10.811,5.882,10.092,11.881,9.756,9.649,12.338,10.983,10.0,6.604,2.53175,7.189,4.65725,13.0,-3.289813
8,2025-01-21,422,15.0,186.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Incomplete Install,3rd common reason (Incomplete Install),Google Android,8.696,9.091,4.651,6.667,5.714,3.774,2.174,3.75,5.085,16.216,3.448,6.977,2.5,7.317,2.174,2.273,9.375,1.449,6.731,0.0,0.0,0.0,0.0,0.0,4.545,0.0,2.941,0.0,0.0,0.0,0.0,3.03,0.0,5.714,3.39,4.878,0.0,4.255,2.778,0.0,6.25,3.448,4.167,13.793,5.714,6.25,17.647,3.333,11.538,0.0,9.375,10.811,0.0,11.111,0.0,10.811,0.0,2.632,6.977,13.793,5.263,0.0,2.5,5.714,5.263,6.061,0.0,9.756,7.692,6.667,12.5,0.0,16.216,3.846,10.0,0.0,20.588,15.789,21.053,16.279,5.882,13.793,14.286,22.222,2.326,11.429,16.667,15.686,14.286,6.25,2.174,9.66075,7.48675,15.3,-3.441063
54,2025-01-21,196,15.0,196.0,2.0,183.0,SH,SMART HELP,DR_SmartHelp - Billing & Account Support,Clicked Billing & Account Support,Apple iOS,17.393,19.575,18.435,17.918,17.397,19.285,18.201,19.36,20.379,17.599,15.179,18.8,16.493,17.202,16.672,18.344,18.157,14.035,19.002,19.014,18.425,18.158,18.944,16.361,14.618,19.413,16.877,19.594,18.029,18.611,17.41,15.812,19.513,19.219,19.153,19.22,18.463,18.084,16.29,18.53,19.139,17.155,18.188,16.83,16.191,17.39,18.723,17.167,17.448,16.667,15.134,14.562,15.631,18.462,18.912,19.514,16.48,17.116,17.533,16.434,18.329,19.264,18.943,17.39,19.637,18.325,15.309,18.852,19.04,18.928,19.075,16.242,16.098,15.235,17.126,18.788,18.067,17.077,14.046,17.571,14.476,18.139,19.067,19.045,18.752,16.915,17.274,14.371,18.754,19.132,16.84175,18.93925,2.0975,20.5,15.268625


In [68]:
upper_lower_test_df.shape

(54, 106)

In [63]:

def calculate_upper_lower(df, parent_column='parent_id'):
    # Ensure parent_column is present in the dataframe
    if parent_column not in df.columns:
        raise ValueError(f"{parent_column} not found in the dataframe")
    
    # Define the function to apply to each row
    def calculate_row(row):
        if pd.isna(row[parent_column]) or row[parent_column] == '':
            # Apply 1.25 logic for rows where parent_id is null, empty, or None
            upper = row['3rd Quartile'] + (1.25 * row['IQR'])
            lower = row['1st Quartile'] - (1.25 * row['IQR'])
        else:
            # Apply 0.75 logic for other rows
            upper = row['3rd Quartile'] + (0.75 * row['IQR'])
            lower = row['1st Quartile'] - (0.75 * row['IQR'])
        
        return pd.Series({'Upper': upper, 'Lower': lower})
    
    # Apply the function row by row
    df[['Upper', 'Lower']] = df.apply(calculate_row, axis=1)

    # Round the Upper column to 1 decimal place
    df['Upper'] = df['Upper'].round(1)

    return df


In [64]:
upper_lower_df = calculate_upper_lower(quartiles_iqr_df)
#upper_lower_df =upper_lower_df[['display_names','operating_system_type','1st Quartile','3rd Quartile', 'IQR', 'Upper', 'Lower']]
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 100)      # Show 100 rows
upper_lower_df.head()

Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,1st Quartile,3rd Quartile,IQR,Upper,Lower
0,2025-01-21,186,15.0,186.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Apple iOS,78.947,84.0,80.909,68.235,78.049,74.118,73.786,77.622,81.053,77.228,75.455,83.784,88.095,72.727,85.321,82.653,80.435,89.046,60.099,8.108,8.333,7.955,6.667,4.615,7.595,10.938,16.667,9.091,11.628,11.765,10.145,7.921,10.753,8.974,10.667,63.636,75.862,75.789,73.958,75.309,69.663,68.085,74.667,75.714,78.824,60.317,66.667,63.725,57.955,57.647,58.889,54.455,62.195,47.561,61.765,58.095,63.014,54.545,59.211,56.79,65.116,51.25,55.102,54.545,71.429,67.442,65.854,63.095,54.412,66.667,60.0,53.846,57.895,46.479,53.086,65.556,56.164,63.529,70.0,60.0,54.545,55.357,51.351,59.459,55.0,56.962,56.818,62.295,66.142,66.216,54.42275,73.52125,19.0985,87.8,40.098875
1,2025-01-21,Both_ID_143,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Both,78.689,80.556,80.392,68.462,76.923,70.29,74.497,76.682,79.221,71.014,74.405,75.214,83.871,76.271,84.516,83.099,77.419,90.612,60.912,11.0,10.345,7.965,6.542,7.407,8.13,9.195,16.176,9.804,11.29,14.286,11.828,8.955,12.5,8.85,8.612,65.254,77.344,69.014,73.485,75.0,66.942,65.854,72.727,70.707,74.167,61.053,66.154,65.909,57.895,58.586,62.295,55.072,61.947,55.085,66.412,58.451,64.356,54.762,57.983,58.182,64.516,49.495,57.246,56.25,71.329,66.387,67.816,64.8,59.574,68.687,54.701,55.455,57.522,47.423,55.446,67.391,57.009,61.789,65.254,57.895,53.153,52.941,49.541,55.446,56.911,57.895,48.701,65.318,61.053,64.151,54.71625,70.60275,15.8865,90.5,34.858125
2,2025-01-21,420,15.0,184.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Google Android,78.261,72.727,79.07,68.889,74.286,64.151,76.087,75.0,76.271,54.054,72.414,60.465,75.0,82.927,82.609,84.091,68.75,92.754,62.5,19.231,14.815,8.0,6.25,11.628,9.091,4.348,14.706,11.111,10.526,18.919,16.667,12.121,18.519,8.571,3.39,68.293,80.488,55.319,72.222,74.074,59.375,58.621,66.667,58.621,62.857,62.5,64.706,73.333,57.692,64.286,71.875,56.757,61.29,72.222,82.759,59.459,67.857,55.263,55.814,62.069,63.158,42.105,62.5,60.0,71.053,63.636,72.549,68.293,73.077,73.333,40.625,59.375,56.757,50.0,65.0,70.833,58.824,57.895,55.263,53.488,50.0,48.276,45.714,44.444,60.465,60.0,37.879,72.549,50.794,59.375,50.0,72.13525,22.13525,88.7,33.398562
3,2025-01-21,187,15.0,187.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Apple iOS,14.474,10.0,11.818,18.824,15.854,15.294,16.505,11.888,10.526,12.871,16.364,8.108,7.143,19.481,11.009,14.286,9.783,9.541,21.182,1.351,0.0,2.273,1.333,0.0,0.0,0.0,0.0,1.515,1.163,0.0,0.0,0.0,0.0,0.0,0.0,10.39,9.195,9.474,16.667,18.519,15.73,13.83,13.333,11.429,11.765,22.222,17.708,22.549,28.409,22.353,22.222,37.624,23.171,26.829,26.471,27.619,26.027,26.136,22.368,24.691,29.07,25.0,22.449,25.974,17.143,24.419,20.325,27.381,27.941,21.739,23.529,17.949,25.0,33.803,28.395,20.0,27.397,17.647,20.0,23.333,23.377,26.786,22.973,25.676,17.5,20.253,22.727,14.754,16.535,21.622,10.0975,23.2925,13.195,33.2,0.20125
4,2025-01-21,Both_ID_144,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Both,13.115,11.111,13.725,18.462,17.094,21.739,16.779,13.901,11.688,16.667,19.048,14.53,12.903,14.407,12.258,11.972,12.903,7.755,23.127,1.0,0.0,1.77,0.935,0.0,0.0,1.149,0.0,0.98,0.806,0.0,0.0,0.0,0.0,0.0,0.0,8.475,10.156,15.493,18.182,18.519,16.529,16.26,15.152,15.152,17.5,22.105,15.385,22.727,26.316,25.253,21.311,34.058,24.779,21.186,25.191,26.761,27.723,30.159,24.37,24.545,27.419,27.273,22.464,23.214,19.58,24.37,21.264,22.4,23.404,19.192,24.786,20.0,25.664,34.021,27.723,18.841,24.299,17.886,18.644,21.805,27.928,29.412,26.606,26.733,21.951,21.053,25.974,13.295,18.947,23.585,12.41925,24.1205,11.70125,38.7,-2.207313


In [65]:
upper_lower_df.shape

(647, 106)

In [66]:
import numpy as np
import pandas as pd

def calculate_last7_and_30_days(df):
    # Extract the date columns (skip the non-date columns like 'display_names' and 'operating_system_type')
    datecolumn = [col for col in df.columns if '-' in col]
    
    # Exclude the latest date column (the first one)
    last_7_columns = datecolumn[-8:-1]  # Get the last 7 columns excluding the latest date
    last_30_columns = datecolumn[-31:-1]

    # Extract the date part and convert them to datetime objects
    date_objects = [pd.to_datetime(col.split()[0], format='%d-%m-%Y') for col in datecolumn]

    # Get the column corresponding to the latest date
    latest_date = max(date_objects)
    latest_date_column = datecolumn[date_objects.index(latest_date)]

    # Create a new column 'Yesterday' with the values from the latest date column
    df.loc[:, 'Yesterday'] = df[latest_date_column]
    
    # Ensure numeric columns before performing mean calculation
    df[last_7_columns] = df[last_7_columns].apply(pd.to_numeric, errors='coerce')
    df[last_30_columns] = df[last_30_columns].apply(pd.to_numeric, errors='coerce')

    # Calculate the mean for each row across the last 7 and 30 date columns
    df.loc[:, 'last_7_days'] = df[last_7_columns].mean(axis=1)
    df.loc[:, 'last_30_days'] = df[last_30_columns].mean(axis=1)

    # Replace 0 and NaN values in 'last_7_days' and 'last_30_days' with NaN to avoid division by zero
    df['last_7_days'] = df['last_7_days'].replace(0, np.nan)
    df['last_30_days'] = df['last_30_days'].replace(0, np.nan)

    # Calculate the percentage change for 'Last 7 Days' with a check for NaN
    df.loc[:, '% Change Last 7 Days'] = np.where(
        df['last_7_days'].isna(), 0, 
        (df['Yesterday'] - df['last_7_days']) / df['last_7_days'] * 100
    )

    # Calculate the percentage change for 'Last 30 Days' with a check for NaN
    df.loc[:, '% Change Last 30 Days'] = np.where(
        df['last_30_days'].isna(), 0, 
        (df['Yesterday'] - df['last_30_days']) / df['last_30_days'] * 100
    )

    # Round the percentage changes to 1 decimal place
    df.loc[:, '% Change Last 7 Days'] = df['% Change Last 7 Days'].round(1)
    df.loc[:, '% Change Last 30 Days'] = df['% Change Last 30 Days'].round(1)
    
    return df


In [67]:
last_7_and_30_days_df  = calculate_last7_and_30_days(upper_lower_df)
#last_7_and_30_days_df = last_7_and_30_days_df[['metric_id', 'metrics', 'display_names', 'operating_system_type', 'feature_name', 'parent_id','Yesterday', 'last_7_days', 'last_30_days', '% Change Last 7 Days', '% Change Last 30 Days']]
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 100)
last_7_and_30_days_df.head(5)

Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,1st Quartile,3rd Quartile,IQR,Upper,Lower,Yesterday,last_7_days,last_30_days,% Change Last 7 Days,% Change Last 30 Days
0,2025-01-21,186,15.0,186.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Apple iOS,78.947,84.0,80.909,68.235,78.049,74.118,73.786,77.622,81.053,77.228,75.455,83.784,88.095,72.727,85.321,82.653,80.435,89.046,60.099,8.108,8.333,7.955,6.667,4.615,7.595,10.938,16.667,9.091,11.628,11.765,10.145,7.921,10.753,8.974,10.667,63.636,75.862,75.789,73.958,75.309,69.663,68.085,74.667,75.714,78.824,60.317,66.667,63.725,57.955,57.647,58.889,54.455,62.195,47.561,61.765,58.095,63.014,54.545,59.211,56.79,65.116,51.25,55.102,54.545,71.429,67.442,65.854,63.095,54.412,66.667,60.0,53.846,57.895,46.479,53.086,65.556,56.164,63.529,70.0,60.0,54.545,55.357,51.351,59.459,55.0,56.962,56.818,62.295,66.142,66.216,54.42275,73.52125,19.0985,87.8,40.098875,66.216,58.289571,59.2062,13.598365,11.839638
1,2025-01-21,Both_ID_143,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Both,78.689,80.556,80.392,68.462,76.923,70.29,74.497,76.682,79.221,71.014,74.405,75.214,83.871,76.271,84.516,83.099,77.419,90.612,60.912,11.0,10.345,7.965,6.542,7.407,8.13,9.195,16.176,9.804,11.29,14.286,11.828,8.955,12.5,8.85,8.612,65.254,77.344,69.014,73.485,75.0,66.942,65.854,72.727,70.707,74.167,61.053,66.154,65.909,57.895,58.586,62.295,55.072,61.947,55.085,66.412,58.451,64.356,54.762,57.983,58.182,64.516,49.495,57.246,56.25,71.329,66.387,67.816,64.8,59.574,68.687,54.701,55.455,57.522,47.423,55.446,67.391,57.009,61.789,65.254,57.895,53.153,52.941,49.541,55.446,56.911,57.895,48.701,65.318,61.053,64.151,54.71625,70.60275,15.8865,90.5,34.858125,64.151,56.409286,58.837533,13.724184,9.030743
2,2025-01-21,420,15.0,184.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Google Android,78.261,72.727,79.07,68.889,74.286,64.151,76.087,75.0,76.271,54.054,72.414,60.465,75.0,82.927,82.609,84.091,68.75,92.754,62.5,19.231,14.815,8.0,6.25,11.628,9.091,4.348,14.706,11.111,10.526,18.919,16.667,12.121,18.519,8.571,3.39,68.293,80.488,55.319,72.222,74.074,59.375,58.621,66.667,58.621,62.857,62.5,64.706,73.333,57.692,64.286,71.875,56.757,61.29,72.222,82.759,59.459,67.857,55.263,55.814,62.069,63.158,42.105,62.5,60.0,71.053,63.636,72.549,68.293,73.077,73.333,40.625,59.375,56.757,50.0,65.0,70.833,58.824,57.895,55.263,53.488,50.0,48.276,45.714,44.444,60.465,60.0,37.879,72.549,50.794,59.375,50.0,72.13525,22.13525,88.7,33.398562,59.375,53.120714,58.3318,11.773723,1.78839
3,2025-01-21,187,15.0,187.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Apple iOS,14.474,10.0,11.818,18.824,15.854,15.294,16.505,11.888,10.526,12.871,16.364,8.108,7.143,19.481,11.009,14.286,9.783,9.541,21.182,1.351,0.0,2.273,1.333,0.0,0.0,0.0,0.0,1.515,1.163,0.0,0.0,0.0,0.0,0.0,0.0,10.39,9.195,9.474,16.667,18.519,15.73,13.83,13.333,11.429,11.765,22.222,17.708,22.549,28.409,22.353,22.222,37.624,23.171,26.829,26.471,27.619,26.027,26.136,22.368,24.691,29.07,25.0,22.449,25.974,17.143,24.419,20.325,27.381,27.941,21.739,23.529,17.949,25.0,33.803,28.395,20.0,27.397,17.647,20.0,23.333,23.377,26.786,22.973,25.676,17.5,20.253,22.727,14.754,16.535,21.622,10.0975,23.2925,13.195,33.2,0.20125,21.622,20.059714,23.125533,7.788175,-6.501616
4,2025-01-21,Both_ID_144,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Both,13.115,11.111,13.725,18.462,17.094,21.739,16.779,13.901,11.688,16.667,19.048,14.53,12.903,14.407,12.258,11.972,12.903,7.755,23.127,1.0,0.0,1.77,0.935,0.0,0.0,1.149,0.0,0.98,0.806,0.0,0.0,0.0,0.0,0.0,0.0,8.475,10.156,15.493,18.182,18.519,16.529,16.26,15.152,15.152,17.5,22.105,15.385,22.727,26.316,25.253,21.311,34.058,24.779,21.186,25.191,26.761,27.723,30.159,24.37,24.545,27.419,27.273,22.464,23.214,19.58,24.37,21.264,22.4,23.404,19.192,24.786,20.0,25.664,34.021,27.723,18.841,24.299,17.886,18.644,21.805,27.928,29.412,26.606,26.733,21.951,21.053,25.974,13.295,18.947,23.585,12.41925,24.1205,11.70125,38.7,-2.207313,23.585,22.079857,23.356433,6.816814,0.978603


In [68]:
last_7_and_30_days_df.shape

(647, 111)

In [69]:
last_7_and_30_days_df[['1st Quartile', '3rd Quartile', 'IQR','Upper', 'Lower', 'Yesterday', 'last_7_days', 'last_30_days', '% Change Last 7 Days', '% Change Last 30 Days']] = last_7_and_30_days_df[['1st Quartile', '3rd Quartile', 'IQR','Upper', 'Lower', 'Yesterday', 'last_7_days', 'last_30_days', '% Change Last 7 Days', '% Change Last 30 Days']].round(3)

In [70]:
last_7_and_30_days_df.head()

Unnamed: 0,create_dt,metric_id,feature_id,metric_sequence_num,level_no,parent_id,unique_identifier,feature_name,metrics,display_names,operating_system_type,29-08-2024,30-08-2024,31-08-2024,01-09-2024,02-09-2024,03-09-2024,04-09-2024,05-09-2024,06-09-2024,07-09-2024,08-09-2024,09-09-2024,10-09-2024,11-09-2024,12-09-2024,13-09-2024,14-09-2024,15-09-2024,16-09-2024,17-09-2024,18-09-2024,19-09-2024,20-09-2024,21-09-2024,22-09-2024,23-09-2024,24-09-2024,25-09-2024,26-09-2024,27-09-2024,28-09-2024,29-09-2024,30-09-2024,01-10-2024,02-10-2024,03-10-2024,04-10-2024,05-10-2024,06-10-2024,07-10-2024,08-10-2024,09-10-2024,10-10-2024,11-10-2024,12-10-2024,13-10-2024,14-10-2024,15-10-2024,16-10-2024,17-10-2024,18-10-2024,19-10-2024,20-10-2024,21-10-2024,22-10-2024,23-10-2024,24-10-2024,25-10-2024,26-10-2024,27-10-2024,28-10-2024,29-10-2024,30-10-2024,31-10-2024,01-11-2024,02-11-2024,03-11-2024,04-11-2024,05-11-2024,06-11-2024,07-11-2024,08-11-2024,09-11-2024,10-11-2024,11-11-2024,12-11-2024,13-11-2024,14-11-2024,15-11-2024,16-11-2024,17-11-2024,18-11-2024,19-11-2024,20-11-2024,21-11-2024,22-11-2024,23-11-2024,24-11-2024,25-11-2024,26-11-2024,1st Quartile,3rd Quartile,IQR,Upper,Lower,Yesterday,last_7_days,last_30_days,% Change Last 7 Days,% Change Last 30 Days
0,2025-01-21,186,15.0,186.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Apple iOS,78.947,84.0,80.909,68.235,78.049,74.118,73.786,77.622,81.053,77.228,75.455,83.784,88.095,72.727,85.321,82.653,80.435,89.046,60.099,8.108,8.333,7.955,6.667,4.615,7.595,10.938,16.667,9.091,11.628,11.765,10.145,7.921,10.753,8.974,10.667,63.636,75.862,75.789,73.958,75.309,69.663,68.085,74.667,75.714,78.824,60.317,66.667,63.725,57.955,57.647,58.889,54.455,62.195,47.561,61.765,58.095,63.014,54.545,59.211,56.79,65.116,51.25,55.102,54.545,71.429,67.442,65.854,63.095,54.412,66.667,60.0,53.846,57.895,46.479,53.086,65.556,56.164,63.529,70.0,60.0,54.545,55.357,51.351,59.459,55.0,56.962,56.818,62.295,66.142,66.216,54.423,73.521,19.099,87.8,40.099,66.216,58.29,59.206,13.598365,11.839638
1,2025-01-21,Both_ID_143,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Both,78.689,80.556,80.392,68.462,76.923,70.29,74.497,76.682,79.221,71.014,74.405,75.214,83.871,76.271,84.516,83.099,77.419,90.612,60.912,11.0,10.345,7.965,6.542,7.407,8.13,9.195,16.176,9.804,11.29,14.286,11.828,8.955,12.5,8.85,8.612,65.254,77.344,69.014,73.485,75.0,66.942,65.854,72.727,70.707,74.167,61.053,66.154,65.909,57.895,58.586,62.295,55.072,61.947,55.085,66.412,58.451,64.356,54.762,57.983,58.182,64.516,49.495,57.246,56.25,71.329,66.387,67.816,64.8,59.574,68.687,54.701,55.455,57.522,47.423,55.446,67.391,57.009,61.789,65.254,57.895,53.153,52.941,49.541,55.446,56.911,57.895,48.701,65.318,61.053,64.151,54.716,70.603,15.886,90.5,34.858,64.151,56.409,58.838,13.724184,9.030743
2,2025-01-21,420,15.0,184.0,4.0,419.0,SH,SMART HELP,DR_SmartHelp Result - Unknown,1st common reason (Unknown),Google Android,78.261,72.727,79.07,68.889,74.286,64.151,76.087,75.0,76.271,54.054,72.414,60.465,75.0,82.927,82.609,84.091,68.75,92.754,62.5,19.231,14.815,8.0,6.25,11.628,9.091,4.348,14.706,11.111,10.526,18.919,16.667,12.121,18.519,8.571,3.39,68.293,80.488,55.319,72.222,74.074,59.375,58.621,66.667,58.621,62.857,62.5,64.706,73.333,57.692,64.286,71.875,56.757,61.29,72.222,82.759,59.459,67.857,55.263,55.814,62.069,63.158,42.105,62.5,60.0,71.053,63.636,72.549,68.293,73.077,73.333,40.625,59.375,56.757,50.0,65.0,70.833,58.824,57.895,55.263,53.488,50.0,48.276,45.714,44.444,60.465,60.0,37.879,72.549,50.794,59.375,50.0,72.135,22.135,88.7,33.399,59.375,53.121,58.332,11.773723,1.78839
3,2025-01-21,187,15.0,187.0,4.0,185.0,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Apple iOS,14.474,10.0,11.818,18.824,15.854,15.294,16.505,11.888,10.526,12.871,16.364,8.108,7.143,19.481,11.009,14.286,9.783,9.541,21.182,1.351,0.0,2.273,1.333,0.0,0.0,0.0,0.0,1.515,1.163,0.0,0.0,0.0,0.0,0.0,0.0,10.39,9.195,9.474,16.667,18.519,15.73,13.83,13.333,11.429,11.765,22.222,17.708,22.549,28.409,22.353,22.222,37.624,23.171,26.829,26.471,27.619,26.027,26.136,22.368,24.691,29.07,25.0,22.449,25.974,17.143,24.419,20.325,27.381,27.941,21.739,23.529,17.949,25.0,33.803,28.395,20.0,27.397,17.647,20.0,23.333,23.377,26.786,22.973,25.676,17.5,20.253,22.727,14.754,16.535,21.622,10.098,23.292,13.195,33.2,0.201,21.622,20.06,23.126,7.788175,-6.501616
4,2025-01-21,Both_ID_144,15.0,,,,SH,SMART HELP,DR_SmartHelp Result - Disconnected,2nd common reason (Disconnected),Both,13.115,11.111,13.725,18.462,17.094,21.739,16.779,13.901,11.688,16.667,19.048,14.53,12.903,14.407,12.258,11.972,12.903,7.755,23.127,1.0,0.0,1.77,0.935,0.0,0.0,1.149,0.0,0.98,0.806,0.0,0.0,0.0,0.0,0.0,0.0,8.475,10.156,15.493,18.182,18.519,16.529,16.26,15.152,15.152,17.5,22.105,15.385,22.727,26.316,25.253,21.311,34.058,24.779,21.186,25.191,26.761,27.723,30.159,24.37,24.545,27.419,27.273,22.464,23.214,19.58,24.37,21.264,22.4,23.404,19.192,24.786,20.0,25.664,34.021,27.723,18.841,24.299,17.886,18.644,21.805,27.928,29.412,26.606,26.733,21.951,21.053,25.974,13.295,18.947,23.585,12.419,24.12,11.701,38.7,-2.207,23.585,22.08,23.356,6.816814,0.978603


In [72]:
#last_7_and_30_days_df.to_csv(r'D:\Healthscore Codes\table data\table4_allfeatures_data.csv', index = False)

In [71]:
fouth_table_columns = ['create_dt','metric_id', 'feature_id', 'unique_identifier', 'feature_name', 'metrics', 'display_names', 'operating_system_type', '1st Quartile', '3rd Quartile', 'IQR', 'Upper', 'Lower', 'Yesterday', 'last_7_days', 'last_30_days', '% Change Last 7 Days', '% Change Last 30 Days']

In [73]:
fourth_table_melted_df = pd.melt(last_7_and_30_days_df, id_vars=fouth_table_columns, 
                     value_vars=date_columns, 
                     var_name='Date', 
                     value_name='Value')

In [74]:
fourth_table_melted_df.shape

(58230, 20)

In [75]:
fourth_table_melted_df.to_csv(r'D:\Healthscore Codes\table data\table4_allfeatures_data_athena.csv', index = False)