In [38]:
# create some fake data records

# create N records
num_records = 3000  # Adjust as needed.



import json
import pyodbc
import uuid
import datetime
import random
import string
import copy
import re

import hashlib  # for SHA-256 hashing


def generate_random_id(original):
    """
    Generate a new random string of the same length as original/source one,
    ensuring string does not contain the substring '0x' (case-insensitive) ,
    which can fail on OWASP Ruleset – ‘SQL Hex Encoding Identified’
    """
    pool = string.ascii_letters + string.digits
    desired_length = len(original)
    while True:
        new_id = ''.join(random.choices(pool, k=desired_length))
        # If new_id does not contain '0x' (ignoring case), return it.
        if '0x' not in new_id.lower():
            return new_id


# this ver allows through potential hex combinations within generated _id fields
# either regenerate within generate_random_id() or sanitise here(see ver below)
def update_ids(obj):
    """
    Recursively update any dictionary key that ends with '_id'
    or is exactly 'unique_pupil_number' with a new random id.
    """
    if isinstance(obj, dict):
        for key, value in obj.items():
            if isinstance(value, str) and (key.endswith("_id") or key == "unique_pupil_number"):
                obj[key] = generate_random_id(value)
            elif isinstance(value, (dict, list)):
                update_ids(value)
    elif isinstance(obj, list):
        for item in obj:
            update_ids(item)

# # use this only if unable to obtain valid non-x0 ids from generate_random_id()
# # fixed to remove potential x0 combinations within _id fields
# def update_ids(obj):
#     """
#     Recursively update any dictionary key that ends with '_id'
#     or is exactly 'unique_pupil_number' with a new random id,
#     and sanitise the generated id by replacing any occurrence of '0x'
#     (or '0X') with a random combination of alphabetic characters of the same length.

#     For example, if the new id contains "0x", this substring will be replaced
#     by two random letters (e.g. "Ab") instead of simply being removed.
#     """
#     if isinstance(obj, dict):
#         for key, value in obj.items():
#             if isinstance(value, str) and (key.endswith("_id") or key == "unique_pupil_number"):
#                 new_id = generate_random_id(value)
#                 # Replace any occurrence of "0x" (case-insensitive) with random string of letters of equal length
#                 new_id = re.sub(
#                     r'0x', 
#                     lambda m: ''.join(random.choice(string.ascii_letters) for _ in range(len(m.group(0)))),
#                     new_id,
#                     flags=re.IGNORECASE
#                 )
#                 obj[key] = new_id
#             elif isinstance(value, (dict, list)):
#                 update_ids(value)
#     elif isinstance(obj, list):
#         for item in obj:
#             update_ids(item)



def generate_records(sample_record, num_records):
    """
    Duplicate the sample record num_records times.
    Each duplicate is deep-copied and then its id fields are updated.
    """
    records = []
    for _ in range(num_records):
        new_record = copy.deepcopy(sample_record)
        update_ids(new_record)
        records.append(new_record)
    return records



def sanitise_id_fields(record):
    """
    Recursively traverses a JSON object (dict or list) and sanitises any fields 
    whose key ends with '_id' by removing any occurrence of '0x' (or '0X') 
    from their string values. This helps avoid potential false positives for
    SQL hex encoding detected in API security scanning.
    
    Parameters:
        record (dict or list): The JSON-like structure to sanitise.
        
    Returns:
        The sanitised record.
    """
    if isinstance(record, dict):
        for key, value in record.items():
            # Process nested dictionaries or lists first.
            if isinstance(value, (dict, list)):
                sanitise_id_fields(value)
            # For any key ending with '_id' and with a string value,
            # remove all occurrences of '0x' (case-insensitive).
            if isinstance(value, str) and key.endswith("_id"):
                sanitized_value = re.sub(r'0x', '', value, flags=re.IGNORECASE)
                record[key] = sanitized_value
    elif isinstance(record, list):
        for item in record:
            sanitise_id_fields(item)
    return record

def insert_into_new_table(records, conn):
    """
    Drop the table if it exists, create it with the new definition,
    and insert each record with auto-generated id, person_id,
    and default values for other fields. The generated person_id is also
    assigned to the "la_child_id" field within the JSON payload.
    The current_hash field is computed as the SHA-256 hash of the json_payload.
    """
    cursor = conn.cursor()
    
    # # Drop the table if it exists and create it with the new schema.
    # cursor.execute("DROP TABLE IF EXISTS ssd_api_data_staging_anon")
    
    # Check if table exists. If yes, truncate; otherwise, create table
    create_or_truncate_table_sql = """
    IF OBJECT_ID('ssd_api_data_staging_anon', 'U') IS NOT NULL
    BEGIN
        TRUNCATE TABLE ssd_api_data_staging_anon;
    END
    ELSE
    BEGIN
        CREATE TABLE ssd_api_data_staging_anon (
            id                      INT PRIMARY KEY,          -- Unique sequential identifier          
            person_id               NVARCHAR(48) NULL,         -- Link value (_person_id or equivalent)
            previous_json_payload   NVARCHAR(MAX) NULL,        -- Enable sub-attribute purge tracking
            json_payload            NVARCHAR(MAX) NULL,        -- JSON data payload
            partial_json_payload    NVARCHAR(MAX) NULL,        -- Reductive JSON data payload
            previous_hash           BINARY(32) NULL,           -- Previous hash of JSON payload
            current_hash            BINARY(32) NULL,           -- Current hash of JSON payload
            row_state               NVARCHAR(10) NULL,         -- Record state: New, Updated, Deleted, Unchanged
            last_updated            DATETIME NULL,             -- Last update timestamp
            submission_status       NVARCHAR(50) NULL,         -- Status: pending, sent, error
            api_response            NVARCHAR(MAX) NULL,        -- API response or error messages
            submission_timestamp    DATETIME                   -- Timestamp on API submission
        );
    END
    """
    cursor.execute(create_or_truncate_table_sql)
    conn.commit()
    
    # Prepare the insert statement.
    insert_sql = """
        INSERT INTO ssd_api_data_staging_anon (
            id,
            person_id,
            previous_json_payload,
            json_payload,
            partial_json_payload,
            previous_hash,
            current_hash,
            row_state,
            last_updated,
            submission_status,
            api_response,
            submission_timestamp
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """
    
    now = datetime.datetime.now()
    for i, record in enumerate(records, start=1):
        record_id = i  # sequential id
        
        # Generate a UUID without hyphens (32 characters) and use it as person_id.
        person_id = uuid.uuid4().hex
        
        # Override the "la_child_id" field in the JSON with the generated person_id.
        record["la_child_id"] = person_id
        
        previous_json_payload = json.dumps(record) # duplicate the json_payload field
        json_payload = json.dumps(record)
        partial_json_payload = None
        previous_hash = hashlib.sha256(json_payload.encode('utf-8')).digest() # duplicate the json_payload field hash
        
        # Compute the SHA-256 hash of the json_payload string.
        current_hash = hashlib.sha256(json_payload.encode('utf-8')).digest()
        
        row_state = 'new'
        last_updated = now
        submission_status = 'pending'
        api_response = None
        submission_timestamp = now
        
        cursor.execute(insert_sql, 
                       record_id,
                       person_id,
                       previous_json_payload,
                       json_payload,
                       partial_json_payload,
                       previous_hash,
                       current_hash,
                       row_state,
                       last_updated,
                       submission_status,
                       api_response,
                       submission_timestamp)
    conn.commit()
    print(f"{len(records)} records inserted into ssd_api_data_staging_anon.")



def query_staging_table(conn_str):
    """Query and print all records from the ssd_api_data_staging_anon table."""
    conn_str = conn_str
    
    try:
        conn = pyodbc.connect(conn_str)
        print("Database connection successful.")
    except Exception as e:
        print("Error connecting to database:", e)
        return

    cursor = conn.cursor()
    query_sql = "SELECT top 5 person_id, json_payload, previous_hash, current_hash, row_state, last_updated, submission_status, api_response, submission_timestamp FROM ssd_api_data_staging_anon"
    cursor.execute(query_sql)
    
    rows = cursor.fetchall()
    for row in rows:
        person_id, json_payload, previous_hash, current_hash, row_state, last_updated, submission_status, api_response, submission_timestamp = row
        
        # Convert binary hash vals to hex string so i can review them
        previous_hash_hex = previous_hash.hex() if previous_hash is not None else None
        current_hash_hex = current_hash.hex() if current_hash is not None else None

        record = json.loads(json_payload)
        print(f"PersonID: {person_id}, PrevHash: {previous_hash_hex}, CurrHash: {current_hash_hex}, "
              f"RowState: {row_state}, LastUpdated: {last_updated}, SubmissionStatus: {submission_status}, "
              f"API: {api_response}, SubmissionTimestamp: {submission_timestamp}")
        print("JSON Payload: (payload output commented for now)")
        print(record)
        print("-" * 60)
    
    conn.close()

    
def main():
    # Define sample JSON record - taken 1:1 from DfE spec. 
    # Further spec revisions can be dropped straight in to replicate. 
    
    sample_json_str = """
    {
        "la_child_id" : "Child1234",
        "mis_child_id" : "Supplier-Child-1234",
        "child_details" : {
            "unique_pupil_number" : "ABC0123456789",
            "former_unique_pupil_number" : "DEF0123456789",
            "unique_pupil_number_unknown_reason" : "UN1",
            "first_name" : "John",
            "surname" : "Doe",
            "date_of_birth" : "2022-06-14",
            "expected_date_of_birth" : "2022-06-14",
            "sex" : "M",
            "ethnicity" : "WBRI",
            "disabilities" : [
                "HAND",
                "VIS"
            ],
            "postcode" : "AB12 3DE",
            "uasc_flag" : true,
            "uasc_end_date" : "2022-06-14",
            "purge" : false
        },
        "health_and_wellbeing" : {
            "sdq_assessments" : [{
                "date" : "2022-06-14",
                "score" : 20
            }],
            "purge" : false
        },
        "social_care_episodes" : [{
            "social_care_episode_id" : "ABC123456",
            "referral_date" : "2022-06-14",
            "referral_source" : "1C",
            "referral_no_further_action_flag" : false,
            "care_worker_details" : [{
                "worker_id" : "ABC123",
                "start_date" : "2022-06-14",
                "end_date" : "2022-06-14"
            }],
            "child_and_family_assessments" : [{
                "child_and_family_assessment_id" : "ABC123456",
                "start_date" : "2022-06-14",
                "authorisation_date" : "2022-06-14",
                "factors" : [
                    "1C",
                    "4A"
                ],
                "purge" : false
            }],
            "child_in_need_plans": [{
                "child_in_need_plan_id": "ABC123456",
                "start_date": "2022-06-14",
                "end_date": "2022-06-14",
                "purge" : false
            }],
            "section_47_assessments": [{
                "section_47_assessment_id": "ABC123456",
                "start_date": "2022-06-14",
                "icpc_required_flag": true,
                "icpc_date": "2022-06-14",
                "end_date": "2022-06-14",
                "purge" : false
            }],
            "child_protection_plans": [{
                "child_protection_plan_id": "ABC123456",
                "start_date": "2022-06-14",
                "end_date": "2022-06-14",
                "purge" : false
            }],
            "child_looked_after_placements": [{
                "child_looked_after_placement_id": "ABC123456",
                "start_date": "2022-06-14",
                "start_reason": "S",
                "placement_type": "K1",
                "postcode": "AB12 3DE",
                "end_date": "2022-06-14",
                "end_reason": "E3",
                "change_reason": "CHILD",
                "purge" : false
            }],
            "adoption" : {
                "initial_decision_date" : "2022-06-14",
                "matched_date" : "2022-06-14",
                "placed_date" : "2022-06-14",
                "purge" : false
            },
            "care_leavers" : {
                "contact_date" : "2022-06-14",
                "activity" : "F2",
                "accommodation" : "D",
                "purge" : false
            },
            "closure_date": "2022-06-14",
            "closure_reason": "RC7",
            "purge" : false
        }],
        "purge" : false
    }
    """
    sample_record = json.loads(sample_json_str)

    # Generate duplicated records (with unique id values updated within the JSON payloads).
    records = generate_records(sample_record, num_records)

    server = "ESLLREPORTS04V"
    database = "HDM_Local"
    trusted_connection = "yes"
    driver = '{ODBC Driver 17 for SQL Server}' # or {SQL Server}

    conn_str = f"DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection={trusted_connection}"
    
    try:
        conn = pyodbc.connect(conn_str)
        print("Database connection successful.")
    except Exception as e:
        print("Error connecting to database:", e)
        return

    
    insert_into_new_table(records, conn)
    query_staging_table(conn_str)

    conn.close()



if __name__ == '__main__':
    main()


Database connection successful.
3000 records inserted into ssd_api_data_staging_anon.
Database connection successful.
PersonID: f38261d32d504f74b7a86600bfaa101f, PrevHash: 097dc7185f10cbaec0312638c96e35dd33df390f3fba174c1e688156b6dbb22e, CurrHash: 097dc7185f10cbaec0312638c96e35dd33df390f3fba174c1e688156b6dbb22e, RowState: new, LastUpdated: 2025-04-10 13:12:15.633000, SubmissionStatus: pending, API: None, SubmissionTimestamp: 2025-04-10 13:12:15.633000
JSON Payload: (payload output commented for now)
{'la_child_id': 'f38261d32d504f74b7a86600bfaa101f', 'mis_child_id': 'pSWBQmfWRUepp8lJX0I', 'child_details': {'unique_pupil_number': 'zs6FjwzTZa0kc', 'former_unique_pupil_number': 'DEF0123456789', 'unique_pupil_number_unknown_reason': 'UN1', 'first_name': 'John', 'surname': 'Doe', 'date_of_birth': '2022-06-14', 'expected_date_of_birth': '2022-06-14', 'sex': 'M', 'ethnicity': 'WBRI', 'disabilities': ['HAND', 'VIS'], 'postcode': 'AB12 3DE', 'uasc_flag': True, 'uasc_end_date': '2022-06-14', 'pu

In [39]:
# ## reset flag values afer a run of internal/external api testing

# import pyodbc


# def reset_table_fields(percent):
#     """
#     Resets the row_state, submission_status, last_updated, and api_response fields
#     for a percentage (1-100) of records in the ssd_api_data_staging_anon table.
    
#     Parameters:
#         percent (int): The percentage of records to update (must be between 1 and 100).
#     """
#     if not (1 <= percent <= 100):
#         raise ValueError("The percentage must be an integer between 1 and 100.")
    
#     server = "ESLLREPORTS04V"
#     database = "HDM_Local"
#     trusted_connection = "yes"
#     driver = '{ODBC Driver 17 for SQL Server}'  # or '{SQL Server}'
    
#     conn_str = f"DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection={trusted_connection}"
    
#     try
#         conn = pyodbc.connect(conn_str)
#         print("Database connection successful.")
#     except Exception as e:
#         print("Error connecting to database:", e)
#         return

#     cursor = conn.cursor()
    
#     # Build the update SQL using the TOP (percent) PERCENT clause.
#     update_sql = f"""
#     UPDATE TOP ({percent}) PERCENT ssd_api_data_staging_anon
#     SET row_state = 'new',
#         submission_status = 'pending',
#         last_updated = GETDATE(),
#         api_response = NULL
#     """
#     cursor.execute(update_sql)
#     conn.commit()
#     print(f"Successfully reset fields for {percent}% of the records.")
    
#     conn.close()



# if __name__ == '__main__':
#     # Example: Reset 25% of the records.
#     reset_table_fields(25)



In [40]:
# import pyodbc
# import json

# def query_staging_table(conn_str):
#     """Query and print all records from the ssd_api_data_staging_anon table."""
#     conn_str = conn_str
    
#     try:
#         conn = pyodbc.connect(conn_str)
#         print("Database connection successful.")
#     except Exception as e:
#         print("Error connecting to database:", e)
#         return

#     cursor = conn.cursor()
#     query_sql = "SELECT person_id, json_payload, previous_hash, current_hash, row_state, last_updated, submission_status, api_response, submission_timestamp FROM ssd_api_data_staging_anon"
#     cursor.execute(query_sql)
    
#     rows = cursor.fetchall()
#     for row in rows:
#         person_id, json_payload, previous_hash, current_hash, row_state, last_updated, submission_status, api_response, submission_timestamp = row
        
#         # Convert binary hash vals to hex string so i can review them
#         previous_hash_hex = previous_hash.hex() if previous_hash is not None else None
#         current_hash_hex = current_hash.hex() if current_hash is not None else None

#         record = json.loads(json_payload)
#         print(f"PersonID: {person_id}, PrevHash: {previous_hash_hex}, CurrHash: {current_hash_hex}, "
#               f"RowState: {row_state}, LastUpdated: {last_updated}, SubmissionStatus: {submission_status}, "
#               f"API: {api_response}, SubmissionTimestamp: {submission_timestamp}")
#         print("JSON Payload:")
#         print(record)
#         print("-" * 60)
    
#     conn.close()



# server = "ESLLREPORTS04V"
# database = "HDM_Local"
# trusted_connection = "yes"
# driver = '{ODBC Driver 17 for SQL Server}' # or {SQL Server}
# conn_str = f"DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection={trusted_connection}"

# query_staging_table(conn_str)

In [41]:
# ## destructive payload ops

# num_records_to_damage_int = 1 # how many records to break/damage in stored _anon data



# import pyodbc
# import json
# import datetime

# def set_future_dates(record, future_date="2099-12-31"):
#     """
#     Recursively update any key whose name contains 'date' (case-insensitive)
#     to the specified future date string.
#     """
#     if isinstance(record, dict):
#         for key, value in record.items():
#             if "date" in key.lower() and isinstance(value, str):
#                 record[key] = future_date
#             else:
#                 set_future_dates(value, future_date)
#     elif isinstance(record, list):
#         for item in record:
#             set_future_dates(item, future_date)
#     return record

# def update_factors(record):
#     """
#     Recursively find any key named 'factors' and replace its value with an array
#     of 42 items (exceeding the max size of 41). The items are generated as:
#       "1A", "4A", "1B", "4B", ..., "1U", "4U" (for 21 letter pairs).
#     """
#     def generate_factors():
#         factors = []
#         for j in range(21):  # 21 pairs => 42 elements
#             letter = chr(65 + j)  # 65 is 'A'
#             factors.append("1" + letter)
#             factors.append("4" + letter)
#         return factors

#     if isinstance(record, dict):
#         for key, value in record.items():
#             if key == "factors":
#                 record[key] = generate_factors()
#             else:
#                 update_factors(value)
#     elif isinstance(record, list):
#         for item in record:
#             update_factors(item)
#     return record

# def update_disabilities(record):
#     """
#     Recursively find the key 'disabilities' and if its value is an array,
#     replace it with a single string value.
#     """
#     if isinstance(record, dict):
#         for key, value in record.items():
#             if key == "disabilities" and isinstance(value, list):
#                 record[key] = "HAND"
#             else:
#                 update_disabilities(value)
#     elif isinstance(record, list):
#         for item in record:
#             update_disabilities(item)
#     return record

# def update_id_keys(record):
#     """
#     Recursively update dictionary keys that end with '_id' by appending '_failtest' to the key.
    
#     For example, a key "la_child_id" becomes "la_child_id_failtest".
    
#     Parameters:
#         record (dict or list): The JSON-like structure (nested dictionaries/lists).
        
#     Returns:
#         The updated record with modified keys.
#     """
#     if isinstance(record, dict):
#         # Iterate over a copy of keys to safely modify the dict.
#         for key in list(record.keys()):
#             value = record[key]
#             # First, recursively update nested structures.
#             if isinstance(value, (dict, list)):
#                 update_id_keys(value)
#             # If the key ends with '_id', rename it.
#             if key.endswith("_id"):
#                 new_key = key + "_failtest"
#                 record[new_key] = record.pop(key)
#     elif isinstance(record, list):
#         for item in record:
#             update_id_keys(item)
#     return record

# # Example usage:
# sample_json = {
#     "la_child_id": "Child1234",
#     "mis_child_id": "Supplier-Child-1234",
#     "child_details": {
#         "unique_pupil_number": "ABC0123456789",
#         "first_name": "John",
#         "surname": "Doe"
#     }
# }



# def remove_purge_keys(record):
#     """
#     Recursively remove all keys named 'purge' from the record.
#     """
#     if isinstance(record, dict):
#         keys_to_remove = [k for k in record if k == "purge"]
#         for k in keys_to_remove:
#             del record[k]
#         for key, value in record.items():
#             remove_purge_keys(value)
#     elif isinstance(record, list):
#         for item in record:
#             remove_purge_keys(item)
#     return record

# def break_records(conn_str, num_to_modify, operations=None):
#     """
#     Connects to the database table and applies destructive modifications
#     to a defined number of records (based on the primary key order).

#     Parameters:
#        num_to_modify (int): The number of records to modify.
#        operations (list of functions): Destructive operations to apply to each record.
#                                       Each function must accept a dict and return a dict.
#                                       Defaults to all four operations if None.
#     """

#     # break records operations, 'op'
#     if operations is None:
#         operations = [set_future_dates, update_factors, update_disabilities, remove_purge_keys, update_id_keys]


    
#     try:
#         conn = pyodbc.connect(conn_str)
#         print("Database connection successful.")
#     except Exception as e:
#         print("Error connecting to database:", e)
#         return
    
#     cursor = conn.cursor()
    
#     # Fetch a defined number of records (ordered by id) to modify.
#     fetch_sql = f"""
#         SELECT id, json_payload 
#         FROM ssd_api_data_staging_anon 
#         ORDER BY id 
#         OFFSET 0 ROWS FETCH NEXT {num_to_modify} ROWS ONLY
#     """
#     cursor.execute(fetch_sql)
#     rows = cursor.fetchall()
    
#     modified_count = 0
#     for row in rows:
#         record_id = row[0]
#         json_str = row[1]
#         try:
#             record = json.loads(json_str)
#         except Exception as e:
#             print(f"Error parsing JSON for record id {record_id}: {e}")
#             continue
        
#         # Apply each destructive operation.
#         for op in operations:
#             record = op(record)
        
#         new_json_str = json.dumps(record)
        
#         # Update the record in the table.
#         update_sql = "UPDATE ssd_api_data_staging_anon SET json_payload = ? WHERE id = ?"
#         cursor.execute(update_sql, new_json_str, record_id)
#         modified_count += 1
    
#     conn.commit()
#     print(f"Modified {modified_count} records with destructive operations.")
#     conn.close()




# if __name__ == '__main__':


#     server = "ESLLREPORTS04V"
#     database = "HDM_Local"
#     trusted_connection = "yes"
#     driver = '{ODBC Driver 17 for SQL Server}' # or {SQL Server}
#     conn_str = f"DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection={trusted_connection}"
    

#     # For example, modify N records using a/all destructive operations.
#     # from [set_future_dates, update_factors, remove_purge_keys, update_disabilities, update_id_keys]
#     break_records(conn_str, num_records_to_damage_int,[update_factors])

#     query_staging_table(conn_str)

In [42]:
# reponse code testing

# [
#     set_future_dates,  - API pass
#  update_factors, - API fail malformed payload
#  remove_purge_keys, 
#  update_disabilities
# ]

In [43]:
# ## make some simple VALID changes to a % of the stored fake data.

# import pyodbc
# import json
# import hashlib
# import random
# import math


# def update_row_state_based_on_hash_diff(conn_Str):
#     """
#     Connects to the database and for records in ssd_api_data_staging_anon where 
#     previous_hash and current_hash differ, sets:
#         - row_state = 'updated'
#         - submission_status = 'pending'
#         - api_response = NULL
#         - submission_timestamp = NULL
#     """
    
#     try:
#         conn = pyodbc.connect(conn_str)
#         print("Database connection successful.")
#     except Exception as e:
#         print("Error connecting to database:", e)
#         return
    
#     cursor = conn.cursor()
    
#     # Update the row_state and reset submission fields when the hashes differ.
#     update_sql = """
#     UPDATE ssd_api_data_staging_anon
#     SET row_state = 'updated',
#         submission_status = 'pending',
#         api_response = NULL,
#         submission_timestamp = NULL
#     WHERE (
#         (previous_hash IS NOT NULL AND current_hash IS NOT NULL AND previous_hash <> current_hash)
#         OR (previous_hash IS NULL AND current_hash IS NOT NULL)
#         OR (previous_hash IS NOT NULL AND current_hash IS NULL)
#     )
#     """
#     cursor.execute(update_sql)
#     conn.commit()
#     print(f"Updated row_state and reset submission fields for {cursor.rowcount} records.")
#     conn.close()


# import pyodbc
# import json
# import hashlib
# import random
# import math

# def update_random_records(conn_str, percent):
#     """
#     Connects to the ssd_api_data_staging_anon table and applies destructive changes
#     to a random subset of records based on the given percentage.
    
#     For each selected record (randomly chosen based on the percentage of total records),
#     the function randomly applies one of the following changes:
#       - Update child_details.sex to "F", OR
#       - Update child_details.first_name to "Jeff".
      
#     Before making any destructive changes, if previous_hash is null, it will be set to the current_hash.
#     Similarly, if previous_json_payload is null, it will be set to the new JSON payload.
    
#     After modifying the JSON payload, the SHA-256 hash is recalculated and stored in current_hash.
#     Additionally, the following fields are updated:
#         row_state = 'updated',
#         submission_status = 'pending',
#         api_response = NULL,
#         submission_timestamp = NULL

#     Parameters:
#       conn_str (str): The database connection string.
#       percent (int): Percentage of total records to update (must be between 1 and 100).
#     """
#     if not (1 <= percent <= 100):
#         raise ValueError("Percentage must be an integer between 1 and 100.")
        
#     try:
#         conn = pyodbc.connect(conn_str)
#         print("Database connection successful.")
#     except Exception as e:
#         print("Error connecting to database:", e)
#         return

#     cursor = conn.cursor()
    
#     # Fetch all records (id and json_payload) from the table.
#     query_sql = "SELECT id, json_payload FROM ssd_api_data_staging_anon"
#     cursor.execute(query_sql)
#     rows = cursor.fetchall()
    
#     total_records = len(rows)
#     if total_records == 0:
#         print("No records found.")
#         conn.close()
#         return

#     # Determine the number of records to update based on the percentage.
#     num_to_update = math.ceil(total_records * (percent / 100.0))
#     print(f"Total records: {total_records}; updating {num_to_update} records based on {percent}%.")

#     # Randomly choose the records to update.
#     records_to_update = random.sample(list(rows), min(num_to_update, total_records))
    
#     update_count = 0

#     # For each selected record, randomly choose which destructive operation to apply.
#     for rec in records_to_update:
#         rec_id, json_payload = rec
#         try:
#             data = json.loads(json_payload)
#         except Exception as e:
#             print(f"Error parsing JSON for record id {rec_id}: {e}")
#             continue
        
#         # Randomly select one of the two operations.
#         operation = random.choice(["sex", "first_name"])
        
#         if operation == "sex":
#             if "child_details" in data and "sex" in data["child_details"]:
#                 data["child_details"]["sex"] = "F"
#             else:
#                 print(f"Record id {rec_id} missing 'child_details.sex'; skipping operation.")
#                 continue
#         elif operation == "first_name":
#             if "child_details" in data and "first_name" in data["child_details"]:
#                 data["child_details"]["first_name"] = "Jeff"
#             else:
#                 print(f"Record id {rec_id} missing 'child_details.first_name'; skipping operation.")
#                 continue
        
#         new_json_str = json.dumps(data)
#         new_hash = hashlib.sha256(new_json_str.encode('utf-8')).digest()
        
#         # Update query now sets previous_hash and previous_json_payload if they are null.
#         update_sql = """
#         UPDATE ssd_api_data_staging_anon 
#         SET previous_hash = COALESCE(previous_hash, current_hash),
#             previous_json_payload = COALESCE(previous_json_payload, ?),
#             json_payload = ?, 
#             current_hash = ?, 
#             row_state = 'updated', 
#             submission_status = 'pending', 
#             api_response = NULL, 
#             submission_timestamp = NULL 
#         WHERE id = ?
#         """
#         # Pass the new_json_str for both COALESCE functions and then new_hash and rec_id.
#         cursor.execute(update_sql, new_json_str, new_json_str, new_hash, rec_id)
#         update_count += 1

#     conn.commit()
#     print(f"Updated {update_count} records with valid changes.")
#     conn.close()



# # def update_random_records(conn_str, percent):
# #     """
# #     Connects to the ssd_api_data_staging_anon table and applies destructive changes
# #     to a random subset of records based on the given percentage.
    
# #     For each selected record (randomly chosen based on the percentage of total records),
# #     the function randomly applies one of the following changes:
# #       - Update child_details.sex to "F", OR
# #       - Update child_details.first_name to "Jeff".
      
# #     After modifying the JSON payload, the SHA-256 hash is recalculated and stored in current_hash.
# #     Additionally, the following fields are updated:
# #         row_state = 'updated',
# #         submission_status = 'pending',
# #         api_response = NULL,
# #         submission_timestamp = NULL

# #     Parameters:
# #       conn_str (str): The database connection string.
# #       percent (int): Percentage of total records to update (must be between 1 and 100).
# #     """
# #     if not (1 <= percent <= 100):
# #         raise ValueError("Percentage must be an integer between 1 and 100.")
        
# #     try:
# #         conn = pyodbc.connect(conn_str)
# #         print("Database connection successful.")
# #     except Exception as e:
# #         print("Error connecting to database:", e)
# #         return

# #     cursor = conn.cursor()
    
# #     # Fetch all records (id and json_payload) from the table.
# #     query_sql = "SELECT id, json_payload FROM ssd_api_data_staging_anon"
# #     cursor.execute(query_sql)
# #     rows = cursor.fetchall()
    
# #     total_records = len(rows)
# #     if total_records == 0:
# #         print("No records found.")
# #         conn.close()
# #         return

# #     # Determine the number of records to update based on the percentage.
# #     num_to_update = math.ceil(total_records * (percent / 100.0))
# #     print(f"Total records: {total_records}; updating {num_to_update} records based on {percent}%.")

# #     # Randomly choose the records to update.
# #     records_to_update = random.sample(list(rows), min(num_to_update, total_records))
    
# #     update_count = 0

# #     # For each selected record, randomly choose which destructive operation to apply.
# #     for rec in records_to_update:
# #         rec_id, json_payload = rec
# #         try:
# #             data = json.loads(json_payload)
# #         except Exception as e:
# #             print(f"Error parsing JSON for record id {rec_id}: {e}")
# #             continue
        
# #         # Randomly select one of the two operations.
# #         operation = random.choice(["sex", "first_name"])
        
# #         if operation == "sex":
# #             if "child_details" in data and "sex" in data["child_details"]:
# #                 data["child_details"]["sex"] = "F"
# #             else:
# #                 print(f"Record id {rec_id} missing 'child_details.sex'; skipping operation.")
# #                 continue
# #         elif operation == "first_name":
# #             if "child_details" in data and "first_name" in data["child_details"]:
# #                 data["child_details"]["first_name"] = "Jeff"
# #             else:
# #                 print(f"Record id {rec_id} missing 'child_details.first_name'; skipping operation.")
# #                 continue
        
# #         new_json_str = json.dumps(data)
# #         new_hash = hashlib.sha256(new_json_str.encode('utf-8')).digest()
        
# #         update_sql = """
# #         UPDATE ssd_api_data_staging_anon 
# #         SET json_payload = ?, 
# #             current_hash = ?, 
# #             row_state = 'updated', 
# #             submission_status = 'pending', 
# #             api_response = NULL, 
# #             submission_timestamp = NULL 
# #         WHERE id = ?
# #         """
# #         cursor.execute(update_sql, new_json_str, new_hash, rec_id)
# #         update_count += 1

# #     conn.commit()
# #     print(f"Updated {update_count} records with valid changes.")
# #     conn.close()



# if __name__ == '__main__':
#     # Example connection string (adjust as needed)
#     conn_str = "DRIVER={ODBC Driver 17 for SQL Server};SERVER=ESLLREPORTS04V;DATABASE=HDM_Local;Trusted_Connection=yes;"
#     # For example, update 50% of records for each operation.
#     update_random_records(conn_str, 45)
