Setup:
- Run this notebook in Google Colab by changing the URL from 'github.com' to 'githubtocolab.com'. 
- Ensure that DOD_clinical_data.xlsx and/or Data_All_Jul25.mat is in working directory.

In [35]:
# Some packages
import pandas as pd
import numpy as np

These are some structures that help with iterating through the classification boundaries.

In [36]:
# Clinical classification boundaries
clinical_ranges = [
(0, 25, "Normal Hearing"),
(25, 40, "Mild Hearing Loss"),
(40, 60, "Moderate Hearing Loss"),
(60, np.inf, "Severe and Profound hearing loss")
]

# Military boundaries - This models the table from: https://pmc.ncbi.nlm.nih.gov/articles/PMC10571680/table/table1-23312165231198374/
military_ranges = {
	'H0' : { # Note: Set NH to H0 to ensure that the rounding function works
		'Better' : {'500' : 20, '1000' : 20, '2000' : 20, '3000' : 20, '4000' : 20, '6000' : 20},
		'Worse' : {'500' : 20, '1000' : 20, '2000' : 20, '3000' : 20, '4000' : 20, '6000' : 20}
	},
	'H1' : {
		'Better' : {'500' : 25, '1000' : 25, '2000' : 25, '3000' : 25, '4000' : 25, '6000' : 65},
		'Worse' : {'500' : 30, '1000' : 30, '2000' : 30, '3000' : 35, '4000' : 45, '6000' : np.inf}
	},
	'H2' : {
		'Better' : {'500' : 25, '1000' : 30, '2000' : 25, '3000' : 40, '4000' : 60, '6000' : 70},
		'Worse' : {'500' : 40, '1000' : 40, '2000' : 60, '3000' : np.inf, '4000' : np.inf, '6000' : np.inf}
	},
	'H3' : {
		'Better' : {'500' : np.inf, '1000' : np.inf, '2000' : np.inf, '3000' : np.inf, '4000' : np.inf, '6000' : np.inf},
		'Worse' : {'500' : np.inf, '1000' : np.inf, '2000' : np.inf, '3000' : np.inf, '4000' : np.inf, '6000' : np.inf}
	}
}

These are helper functions that take in the different frequency dB HL values of a given participant and returns the classification. 

In [37]:
# Helper function: Classifies a soldier's hearing profile based off of clinical metrics
def classify_clinical(RU500, RU1000, RU2000, LU500, LU1000, LU2000):

	# Calculate the PTA
	PTA_score = (RU500 + RU1000 + RU2000 + LU500 + LU1000 + LU2000)/6
	
	# Calculate which profile the PTA falls under
	profile = ""
	for min, max, label in clinical_ranges:
		if min <= PTA_score <= max:
			profile = label
		if profile != "":
			break
	
	# Output ordered pair is defined below
	return profile, PTA_score

# Helper function: Classifies a soldier's hearing profile based off of military metrics
def classify_military(RU500, RU1000, RU2000, RU3000, RU4000, RU6000, LU500, LU1000, LU2000, LU3000, LU4000, LU6000):

	# Initialize variables
	better_profile = ""
	worse_profile = ""
	profile = ""
	right_ear = {'500' : RU500, '1000' : RU1000, '2000' : RU2000, '3000' : RU3000, '4000' : RU4000, '6000' : RU6000}
	left_ear = {'500' : LU500, '1000' : LU1000, '2000' : LU2000, '3000' : LU3000, '4000' : LU4000, '6000' : LU6000}

	# Calculate PTA
	right_score = (RU500 + RU1000 + RU2000 + RU3000 + RU4000 + RU6000)/6
	left_score = (LU500 + LU1000 + LU2000 + LU3000 + LU4000 + LU6000)/6

	# Define better and worse ear.
	better_ear = right_ear if right_score <= left_score else left_ear
	worse_ear = left_ear if better_ear == right_ear else right_ear

	# Classify the better ear
	for label in military_ranges.keys():
		if better_ear['500'] <= military_ranges[label]['Better']['500'] and better_ear['1000'] <= military_ranges[label]['Better']['1000'] and better_ear['2000'] <= military_ranges[label]['Better']['2000'] and better_ear['3000'] <= military_ranges[label]['Better']['3000'] and better_ear['4000'] <= military_ranges[label]['Better']['4000'] and better_ear['6000'] <= military_ranges[label]['Better']['6000']:
			better_profile = label
		if better_profile != "":
			break
	
	# Classify the worse ear
	for label in military_ranges.keys():
		if worse_ear['500'] <= military_ranges[label]['Worse']['500'] and worse_ear['1000'] <= military_ranges[label]['Worse']['1000'] and worse_ear['2000'] <= military_ranges[label]['Worse']['2000'] and worse_ear['3000'] <= military_ranges[label]['Worse']['3000'] and worse_ear['4000'] <= military_ranges[label]['Worse']['4000'] and worse_ear['6000'] <= military_ranges[label]['Worse']['6000']:
			worse_profile = label
		if worse_profile != "":
			break
	
	# Round up to the higher profile
	profile = worse_profile if better_profile == worse_profile else max(worse_profile, better_profile)
	
	# Rename 'H0' back into 'NH'
	profile = 'NH' if profile == 'H0' else profile
	better_profile = 'NH' if better_profile == 'H0' else better_profile
	worse_profile = 'NH' if worse_profile == 'H0' else worse_profile

	# Output tuple is defined below
	return profile, better_profile, worse_profile

Here's an example of how to use these functions. The following code iterates through all participants in the 'DOD_clinical_data.xlsx' sheet, and outputs both the clinical and military classification in a separate file called 'DOD_clinical_labels_excel.xlsx'.

In [38]:
# Read the file & set output path
input_path = 'DOD_clinical_data.xlsx'
output_path = 'DOD_clinical_labels_excel.xlsx'
df = pd.read_excel(input_path)

# Initialize variables
clinical_profiles = []
military_profiles = []
better_profiles = []
worse_profiles = []
ids = []
debug = []

# Iterate through all participants and run the classification functions
for index, row in df.iterrows():
	clinical_profile, PTA_score = classify_clinical(row['RU500'], row['RU1000'], row['RU2000'], row['LU500'], row['LU1000'], row['LU2000'])
	military_profile, better_profile, worse_profile = classify_military(row['RU500'], row['RU1000'], row['RU2000'], row['RU3000'], row['RU4000'], row['RU6000'], row['LU500'], row['LU1000'], row['LU2000'], row['LU3000'], row['LU4000'], row['LU6000'])
	ids.append(row['ID'])
	clinical_profiles.append(clinical_profile)
	military_profiles.append(military_profile)
	debug.append(f"PTA = {PTA_score}, Better Ear = {better_profile}, Worse Ear = {worse_profile}") # Optional

# Output all values into a separate .xlsx file
data = {
	'ID' : ids,
	'Clinical Profile' : clinical_profiles,
	'Military Profile' : military_profiles
}
final_df = pd.DataFrame(data)
final_df.to_excel(output_path)


Now, here's an example that loads in the DOD struct and directly computes the profile. This only differs in how we access the data, so hopefully we will the same result.

In [39]:
# Read the file & set output path
import math
import scipy.io
mat = scipy.io.loadmat('Data_All_Jul25.mat')
data = mat['ans']
output_path = 'DOD_clinical_labels_struct.xlsx'

# Initialize variables
clinical_profiles = []
military_profiles = []
better_profiles = []
worse_profiles = []
ids = []
debug = []

# Iterate through all participants and run the classification functions
for i in range(0, len(data[0])):

	# Access the data from the struct
	id = int(data[0][i][0][0][0]) if not math.isnan(data[0][i][0][0][0]) else 0
	ru500 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f500'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f500'][0][0][0][0]) else 0
	ru1000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f1000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f1000'][0][0][0][0]) else 0
	ru2000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f2000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f2000'][0][0][0][0]) else 0
	ru3000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f3000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f3000'][0][0][0][0]) else 0
	ru4000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f4000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f4000'][0][0][0][0]) else 0
	ru6000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f6000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f6000'][0][0][0][0]) else 0
	lu500 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f500'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f500'][0][0][0][0]) else 0
	lu1000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f1000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f1000'][0][0][0][0]) else 0
	lu2000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f2000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f2000'][0][0][0][0]) else 0
	lu3000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f3000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f3000'][0][0][0][0]) else 0
	lu4000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f4000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f4000'][0][0][0][0]) else 0
	lu6000 = int(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f6000'][0][0][0][0]) if not math.isnan(data[0][i]['aud']['audiogram'][0][0]['ac'][0][0]['left'][0][0]['f6000'][0][0][0][0]) else 0

	# Run the classification functions
	clinical_profile, PTA_score = classify_clinical(ru500, ru1000, ru2000, lu500, lu1000, lu2000)
	military_profile, better_profile, worse_profile = classify_military(ru500, ru1000, ru2000, ru3000, ru4000, ru6000, lu500, lu1000, lu2000, lu3000, lu4000, lu6000)
	ids.append(id)
	clinical_profiles.append(clinical_profile)
	military_profiles.append(military_profile)
	debug.append(f"PTA = {PTA_score}, Better Ear = {better_profile}, Worse Ear = {worse_profile}") # Optional

# Output all values into a separate .xlsx file
data = {
	'ID' : ids,
	'Clinical Profile' : clinical_profiles,
	'Military Profile' : military_profiles
}
final_df = pd.DataFrame(data)
final_df.to_excel(output_path)
