# NSW Data Case Study Notebook
This notebook provides step-by-step examples and ingestion outlines for datasets:
- ASIC directors/officers data
- ABR trading names data
- Complaints data from Fair Trading
- Proactive compliance inspections data
- Investigations data
- Merging data and building relationship graphs

Code is commented and structured following best practices.

In [1]:
# Imports
import pandas as pd
import networkx as nx


## 1. ASIC Data: Directors/Officers Example

In [2]:
# Example DataFrame for ASIC directors/officers
asic_directors_example = pd.DataFrame([
    {
        'ACN': '123456789',
        'CompanyName': 'ABC Pty Ltd',
        'DirectorName': 'John Smith',
        'AppointmentDate': '2020-05-15',
        'CessationDate': None
    },
    {
        'ACN': '987654321',
        'CompanyName': 'XYZ Holdings Pty Ltd',
        'DirectorName': 'Jane Doe',
        'AppointmentDate': '2018-03-10',
        'CessationDate': '2023-11-30'
    }
])
# Display the example
asic_directors_example

Unnamed: 0,ACN,CompanyName,DirectorName,AppointmentDate,CessationDate
0,123456789,ABC Pty Ltd,John Smith,2020-05-15,
1,987654321,XYZ Holdings Pty Ltd,Jane Doe,2018-03-10,2023-11-30


## 2. ABR Data: Trading and Business Names Example

In [3]:
# Example DataFrame for ABR trading names
abr_example = pd.DataFrame([
    {
        'ABN': '12 345 678 901',
        'LegalName': 'Alpha Innovations Pty Ltd',
        'BusinessNames': ['Alpha Innovations'],
        'TradingNames': ['Alpha Tech', 'Alpha Solutions'],
        'State': 'NSW',
        'Postcode': '2000',
        'ACN': '123456789',
        'GSTStatus': 'Active',
        'RegistrationDate': '2019-07-01'
    },
    {
        'ABN': '98 765 432 109',
        'LegalName': 'Beta Services Pty Ltd',
        'BusinessNames': ['Beta Services'],
        'TradingNames': ['Beta Consulting'],
        'State': 'NSW',
        'Postcode': '2140',
        'ACN': '987654321',
        'GSTStatus': 'Active',
        'RegistrationDate': '2021-02-15'
    }
])
abr_example

Unnamed: 0,ABN,LegalName,BusinessNames,TradingNames,State,Postcode,ACN,GSTStatus,RegistrationDate
0,12 345 678 901,Alpha Innovations Pty Ltd,[Alpha Innovations],"[Alpha Tech, Alpha Solutions]",NSW,2000,123456789,Active,2019-07-01
1,98 765 432 109,Beta Services Pty Ltd,[Beta Services],[Beta Consulting],NSW,2140,987654321,Active,2021-02-15


## 3. Complaints Data Example

In [4]:
# Example DataFrame for complaints
complaints_example = pd.DataFrame([
    {
        'ComplaintID': 'C202401001',
        'BusinessName': 'ABC Pty Ltd',
        'ABN': '12 345 678 901',
        'DateReceived': '2024-06-01',
        'IssueCategory': 'Product quality',
        'Status': 'Resolved'
    },
    {
        'ComplaintID': 'C202402015',
        'BusinessName': 'XYZ Holdings Pty Ltd',
        'ABN': '98 765 432 109',
        'DateReceived': '2024-05-20',
        'IssueCategory': 'Misleading advertising',
        'Status': 'Under investigation'
    }
])
complaints_example

Unnamed: 0,ComplaintID,BusinessName,ABN,DateReceived,IssueCategory,Status
0,C202401001,ABC Pty Ltd,12 345 678 901,2024-06-01,Product quality,Resolved
1,C202402015,XYZ Holdings Pty Ltd,98 765 432 109,2024-05-20,Misleading advertising,Under investigation


## 4. Proactive Compliance Inspections Data Example

In [5]:
# Example DataFrame for inspections
inspections_example = pd.DataFrame([
    {
        'InspectionID': 'I20230101',
        'BusinessName': 'ABC Pty Ltd',
        'ABN': '12 345 678 901',
        'InspectionDate': '2023-10-15',
        'Location': 'Sydney',
        'Outcome': 'Compliant',
        'InspectorName': 'Alice Brown',
        'Notes': 'No issues found'
    },
    {
        'InspectionID': 'I20230202',
        'BusinessName': 'Beta Services Pty Ltd',
        'ABN': '98 765 432 109',
        'InspectionDate': '2024-03-22',
        'Location': 'Parramatta',
        'Outcome': 'Non-compliant',
        'InspectorName': 'Bob Green',
        'Notes': 'Labeling issues; follow-up required'
    }
])
inspections_example

Unnamed: 0,InspectionID,BusinessName,ABN,InspectionDate,Location,Outcome,InspectorName,Notes
0,I20230101,ABC Pty Ltd,12 345 678 901,2023-10-15,Sydney,Compliant,Alice Brown,No issues found
1,I20230202,Beta Services Pty Ltd,98 765 432 109,2024-03-22,Parramatta,Non-compliant,Bob Green,Labeling issues; follow-up required


## 5. Investigations Data Example

In [6]:
# Example DataFrame for investigations
investigations_example = pd.DataFrame([
    {
        'InvestigationID': 'INV2024001',
        'BusinessName': 'XYZ Holdings Pty Ltd',
        'ABN': '98 765 432 109',
        'StartDate': '2024-05-25',
        'EndDate': None,
        'Outcome': 'Ongoing',
        'RelatedEntities': ['Alpha Innovations Pty Ltd']
    },
    {
        'InvestigationID': 'INV2023005',
        'BusinessName': 'Gamma Trading Pty Ltd',
        'ABN': '11 222 333 444',
        'StartDate': '2023-01-10',
        'EndDate': '2023-12-05',
        'Outcome': 'Enforcement action taken',
        'RelatedEntities': []
    }
])
investigations_example

Unnamed: 0,InvestigationID,BusinessName,ABN,StartDate,EndDate,Outcome,RelatedEntities
0,INV2024001,XYZ Holdings Pty Ltd,98 765 432 109,2024-05-25,,Ongoing,[Alpha Innovations Pty Ltd]
1,INV2023005,Gamma Trading Pty Ltd,11 222 333 444,2023-01-10,2023-12-05,Enforcement action taken,[]


## 6. Merging Data and Building Relationships

In [7]:
# Example: merge datasets on ABN/ACN and build a master table
# First, normalize ABN/ACN formats: remove spaces
def normalize_abn(acn_abn):
    return acn_abn.replace(' ', '') if isinstance(acn_abn, str) else acn_abn

asic_directors_example['ACN'] = asic_directors_example['ACN'].apply(normalize_abn)
abr_example['ACN'] = abr_example['ACN'].apply(normalize_abn)
abr_example['ABN'] = abr_example['ABN'].apply(normalize_abn)
complaints_example['ABN'] = complaints_example['ABN'].apply(normalize_abn)
inspections_example['ABN'] = inspections_example['ABN'].apply(normalize_abn)
investigations_example['ABN'] = investigations_example['ABN'].apply(normalize_abn)

# Merge ASIC and ABR on ACN
df_master = pd.merge(
    asic_directors_example[['ACN', 'CompanyName']].drop_duplicates(),
    abr_example[['ACN', 'ABN', 'LegalName']],
    on='ACN', how='left'
)
# Join complaints count
complaints_count = complaints_example.groupby('ABN').size().reset_index(name='ComplaintCount')
df_master = pd.merge(df_master, complaints_count, on='ABN', how='left').fillna({'ComplaintCount': 0})
# Display master
df_master

Unnamed: 0,ACN,CompanyName,ABN,LegalName,ComplaintCount
0,123456789,ABC Pty Ltd,12345678901,Alpha Innovations Pty Ltd,1
1,987654321,XYZ Holdings Pty Ltd,98765432109,Beta Services Pty Ltd,1


In [8]:
# Build relationship graph: companies and directors
G = nx.Graph()
for _, row in asic_directors_example.iterrows():
    comp = normalize_abn(row['ACN'])
    person = f"Person: {row['DirectorName']}"
    G.add_node(comp, type='company')
    G.add_node(person, type='person')
    G.add_edge(comp, person, relation='director')

# Example: print nodes and edges counts
print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")

Nodes: 4, Edges: 2


## Next Steps
1. Adapt ingestion code for real API endpoints and bulk data files.
2. Apply data cleaning, validation, and storage (e.g., database).
3. Extend graph analysis: shareholdings, complaint triggers, cluster detection.
4. Build visualizations or dashboards.
