In [1]:
import pandas as pd

df = pd.read_csv('dataset/Complaint Category.csv')

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19853 entries, 0 to 19852
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Code            19853 non-null  int64  
 1   Description     19853 non-null  object 
 2   OrgCode         19853 non-null  object 
 3   Parent          19762 non-null  float64
 4   Stage           19853 non-null  int64  
 5   MonitoringCode  16382 non-null  float64
dtypes: float64(2), int64(2), object(2)
memory usage: 930.7+ KB


In [3]:
new_df = df[df['Parent'].isnull()]
new_df = new_df.drop(['Parent', 'Stage', 'MonitoringCode', 'OrgCode'], axis=1)
new_df.head()

Unnamed: 0,Code,Description
0,1,Telecommunications
60,61,Posts
250,251,Banking
352,353,Insurance
397,398,School Education


In [4]:
# All root nodes
for index, row in new_df.iterrows():
    print(f"Code: {row['Code']} | Description: {row['Description']}")

Code: 1 | Description: Telecommunications
Code: 61 | Description: Posts
Code: 251 | Description: Banking
Code: 353 | Description: Insurance
Code: 398 | Description: School Education
Code: 616 | Description: Road Transport and Highways
Code: 656 | Description: Health and Family Welfare
Code: 1221 | Description: External Affairs
Code: 1341 | Description: Petroleum and Natural Gas
Code: 1442 | Description: Civil Aviation
Code: 2113 | Description: Higher Education
Code: 2173 | Description: Labour and Employment
Code: 2426 | Description: Housing and Urban Affairs
Code: 2565 | Description: Railway
Code: 2570 | Description: Personnel and Training
Code: 4414 | Description: Central Board of Direct Taxes (Income Tax)
Code: 4465 | Description: Central Board of Indirect Taxes and Customs
Code: 4622 | Description: Department of Revenue
Code: 4742 | Description: Home Affairs
Code: 4976 | Description: Rural Development
Code: 5256 | Description: Department of Justice
Code: 5293 | Description: Ministry

In [5]:
def find_children_of_root(data, root_code):
    # Check if the given root_code is actually a root
    if pd.isna(data[data['Code'] == root_code]['Parent'].iloc[0]):
        # Find all rows where the 'Parent' column matches the root_code
        children = data[data['Parent'] == root_code]

        if not children.empty:
            print(f"Children of root node {root_code}:")
            for _, row in children.iterrows():
                print(f"Code: {row['Code']} | Description: {row['Description']}")
        else:
            print(f"No children found for root node {root_code}.")
    else:
        print(f"The given code {root_code} is not a root node.")


# Example usage
root_code = 22801
find_children_of_root(df, root_code)

Children of root node 22801:
Code: 22802 | Description: Hospital Related
Code: 22843 | Description: Ayush Schemes
Code: 22873 | Description: College Permission & Admission
Code: 23047 | Description: Ayush Education in National Institutes
Code: 23114 | Description: Ayush Research
Code: 23168 | Description: Ayush Drugs Policy
Code: 23174 | Description: Covid- 19
Code: 23178 | Description: Matters relating to the Employees of Ministry of Ayush and its Organizations
Code: 23190 | Description: Lab & Pharmocopeia
Code: 23194 | Description: Miscellaneous


In [6]:
def find_all_roots_and_their_children(data):
    root_nodes = data[pd.isna(data['Parent'])]['Code']
    for root_code in root_nodes:
        children = data[data['Parent'] == root_code]
        print(
            f"Root Node: {root_code} | Number of Children: {len(children)} | Description: {data[data['Code'] == root_code]['Description'].iloc[0]}")


find_all_roots_and_their_children(df)

Root Node: 1 | Number of Children: 9 | Description: Telecommunications
Root Node: 61 | Number of Children: 13 | Description: Posts
Root Node: 251 | Number of Children: 25 | Description: Banking
Root Node: 353 | Number of Children: 12 | Description: Insurance
Root Node: 398 | Number of Children: 18 | Description: School Education
Root Node: 616 | Number of Children: 8 | Description: Road Transport and Highways
Root Node: 656 | Number of Children: 12 | Description: Health and Family Welfare
Root Node: 1221 | Number of Children: 18 | Description: External Affairs
Root Node: 1341 | Number of Children: 16 | Description: Petroleum and Natural Gas
Root Node: 1442 | Number of Children: 13 | Description: Civil Aviation
Root Node: 2113 | Number of Children: 16 | Description: Higher Education
Root Node: 2173 | Number of Children: 34 | Description: Labour and Employment
Root Node: 2426 | Number of Children: 25 | Description: Housing and Urban Affairs
Root Node: 2565 | Number of Children: 13 | Desc

In [8]:
from bigtree import list_to_tree, tree_to_dot, print_tree, hprint_tree

# Create a code-description mapping
code_to_desc = pd.Series(df.Description.values, index=df.Code).to_dict()

paths = []


def build_path(code):
    path = code_to_desc.get(code, "Unknown Code")  # Fallback to "Unknown Code" if code is not found
    path = "Root/" + path
    while True:
        parent_code = df.loc[df['Code'] == code, 'Parent'].values
        if len(parent_code) == 0 or pd.isnull(parent_code[0]):
            break  # Exit the loop if no parent or parent is null (reached an original root node)
        parent_code = parent_code[0]
        if parent_code not in code_to_desc:
            # print(f"Warning: Parent code {parent_code} not found for child {code}.")
            break
        path = "Root/" + code_to_desc[parent_code] + '/' + path.split('/', 1)[1]  # Rebuild the path with the new parent
        code = parent_code
    return path


for code in df['Code']:
    path = build_path(code)
    paths.append(path)

root = list_to_tree(paths)

print_tree(root)

Root
├── Telecommunications
│   ├── Mobile Related
│   │   ├── Call Drop
│   │   ├── Improper Network Coverage
│   │   ├── Data Speed lower than commited
│   │   ├── Mobile Number Portability (MNP)
│   │   ├── UCC related complaints
│   │   ├── Activation
│   │   │   ├── Deactivation of Value Added Services without explicit consent
│   │   │   └── Deactivation
│   │   │       └── Fault of Sim Card
│   │   ├── AADHAR Linking
│   │   │   └── Documents verification
│   │   ├── Tariff 
│   │   │   └──  Recharge issue 
│   │   │       └──  Billing issue of Postpaid
│   │   ├── No Network Coverage
│   │   └── Others
│   ├── Pension Related
│   │   ├── Pension not started 
│   │   │   └──  Pension stopped
│   │   ├── Revision of Pension
│   │   ├── Miscalculation in Pension
│   │   ├── Adding beneficiary in PPO
│   │   └── Others
│   ├── Broadband Related
│   │   ├── Delay in New BB Connection
│   │   ├── Landline working but Internet not working
│   │   │   └── Frequent disconnection
│   │  

In [9]:
print_tree(root, node_name_or_path="Telecommunications")

Telecommunications
├── Mobile Related
│   ├── Call Drop
│   ├── Improper Network Coverage
│   ├── Data Speed lower than commited
│   ├── Mobile Number Portability (MNP)
│   ├── UCC related complaints
│   ├── Activation
│   │   ├── Deactivation of Value Added Services without explicit consent
│   │   └── Deactivation
│   │       └── Fault of Sim Card
│   ├── AADHAR Linking
│   │   └── Documents verification
│   ├── Tariff 
│   │   └──  Recharge issue 
│   │       └──  Billing issue of Postpaid
│   ├── No Network Coverage
│   └── Others
├── Pension Related
│   ├── Pension not started 
│   │   └──  Pension stopped
│   ├── Revision of Pension
│   ├── Miscalculation in Pension
│   ├── Adding beneficiary in PPO
│   └── Others
├── Broadband Related
│   ├── Delay in New BB Connection
│   ├── Landline working but Internet not working
│   │   └── Frequent disconnection
│   ├── Billing issue
│   ├── Speed lower than committed
│   └── Others
├── Landline Related
│   ├── Delay or issue in providing

In [14]:
print_tree(root, node_name_or_path="Root/Banking", max_depth=2)

Banking
├── Bank Locker Related
├── Deficiency in Customer Service Related
├── Education Loans Related
├── Housing Loan Related
├── Non Banking Finance Company (NBFC) other than Housing Finance Companies Related
├── Pradhan Mantri Jan Dhan Yojna (PMJDY) Related
├── Pradhan Mantri Mudra Yojana (PMMY) Related
├── Agricultural Matters Related
├── Credit
├── Service Charges Related
├── Government sponsored Schemes Related
├── Fraud
├── Mobile Banking
├── Misappropriation
├── Misbehaviour
├── One Time Settlement of Loan
├── Opening of New Branches
├── Pradhan Mantri Jeevan Jyoti Yojna
├── Pradhan Mantri Jeevan Suraksha Yojana
├── Recruitment Related
├── Service Matter Related
├── Pension
├── Stand Up India Related
├── Vacation of Premises
└── Miscellaneous


In [20]:
print_tree(root, style="custom", custom_style=("│  ", "├→ ", "╰→ "), node_name_or_path="Root/Railway")

Railway
├→ Train and Station related grievances (redirect to RailMadad)
├→ Land Acquisition
├→ Construction of FOB
│  ╰→ RUB
│     ╰→ ROB
├→ Construction 
│  ╰→ Renovation of Railway lines or Bridges
├→ Pension
├→ Pre Appointment
├→ Service matter
├→ Encroachment of FOB, ROB, RUB, LC Gate, Railway Premises
├→ Bribery and Corruption other than Train and station
├→ Complaint against staff other than Train and station
├→ Tender
│  ╰→ Contract Matters
├→ Issues related to Introduction
│  ╰→ Extension
│     ╰→ Stoppage of trains
╰→ Miscellaneous
