# How to use the data structure created in MATLAB imported into Python

In [1]:
# Load the matlab file using SciPy
import scipy.io
mat = scipy.io.loadmat('Data_All_Jul25.mat') # Ensure correct file path

In [None]:
# This took my computer 7.5 minutes to load...
# Note: If you want a visualization of what this struct looks like, I'd recommend saving the output of this cell as a .json file and viewing it there.
mat

In [2]:
data = mat['ans']

In [3]:
print(type(data))

<class 'numpy.ndarray'>


In [4]:
import numpy as np

In [5]:
data.shape # This is a 1 by 190 object. It can be best viewed through matlab, but essentially:

# id, attn_beh, aud, cog, eeg, subj_info
# 1003
# 1004
# 1005 (and so on until 190...)

(1, 190)

In [6]:
# Let's try iterating through all participants. 
# Since the dimensions are 1x190, we'll have to grab the 0th index of the first dimension.
# Then, each index of the next dimension represents a participant.

# For example, this is the first participant
data[0][0] 

# This is the last participant
data[0][189]

np.void((array([[3124]], dtype=uint16), array([[(array([[3124]], dtype=uint16), array([[1]], dtype=uint8), MatlabOpaque([(b'', b'MCOS', b'datetime', array([[3707764736],
                             [         2],
                             [         1],
                             [         1],
                             [    119615],
                             [         1]], dtype=uint32))            ],
                     dtype=[('s0', 'O'), ('s1', 'O'), ('s2', 'O'), ('arr', 'O')]), array([[(array([[3124]], dtype=uint16), MatlabOpaque([(b'', b'MCOS', b'datetime', array([[3707764736],
                                     [         2],
                                     [         1],
                                     [         1],
                                     [    119616],
                                     [         1]], dtype=uint32))            ],
                             dtype=[('s0', 'O'), ('s1', 'O'), ('s2', 'O'), ('arr', 'O')]), array([[5]], dtype=uint

In [7]:
# How many participants do we have?
print(len(data[0]))

190


In [8]:
# Here's how to get the ID of the last participant
data[0][189][0][0][0]

np.uint16(3124)

In [9]:
# Let's focus on the first participant.
# The index mapping can get a little confusing.
# This is to get the first participant's audiogram, AirCon, right ear, 500hz score.
data[0][0]['aud']['audiogram'][0][0]['ac'][0][0]['right'][0][0]['f500'][0][0][0][0]

np.uint8(5)

# Check differences between two xlsx files

In [16]:
import pandas as pd
original = pd.read_excel('../output_excel.xlsx')
new = pd.read_excel('../output_excel1.xlsx')

In [17]:
print(f"Original length: {len(original)}\nNew Length: {len(new)}")

Original length: 203
New Length: 203


In [18]:
print(1003 in original['ID'].values)

True


In [19]:
not_in_new = []
not_in_original = []
for id in original['ID'].values:
	if id not in new['ID'].values:
		not_in_new.append(id)
for id in new['ID'].values:
	if id not in original['ID'].values:
		not_in_original.append(id)
difference = not_in_new + not_in_original

In [20]:
print(f"Original length: {len(original)}\nNew Length: {len(new)}\nNot In New Length: {len(not_in_new)}\nNot In Original Length: {len(not_in_original)}")

Original length: 203
New Length: 203
Not In New Length: 0
Not In Original Length: 0


In [21]:
difference

[]

In [22]:
ids = []
clin_prof = []
military_prof = []
for row in new.iterrows():
	if (row[1]['ID'] not in difference):
		ids.append(row[1]['ID'])
		clin_prof.append(row[1]['Clinical Profile'])
		military_prof.append(row[1]['Military Profile'])
new_cleaned = pd.DataFrame({
	"ID" : ids,
	"Clinical Profile" : clin_prof,
	"Military Profile" : military_prof
})

In [23]:
ids = []
clin_prof = []
military_prof = []
for row in original.iterrows():
	if (row[1]['ID'] not in difference):
		ids.append(row[1]['ID'])
		clin_prof.append(row[1]['Clinical Profile'])
		military_prof.append(row[1]['Military Profile'])
original_cleaned = pd.DataFrame({
	"ID" : ids,
	"Clinical Profile" : clin_prof,
	"Military Profile" : military_prof
})

In [24]:
new_cleaned = new_cleaned.sort_values(by=['ID'])
original_cleaned = original_cleaned.sort_values(by=['ID'])

In [25]:
for new, old in zip(new_cleaned.iterrows(), original_cleaned.iterrows()):
	if new[1]['ID'] != old[1]['ID']:
		print(f"Difference: {new[1]['ID']} and {old[1]['ID']}")

In [39]:
count, total = 0, 0
for new, old in zip(new_cleaned.iterrows(), original_cleaned.iterrows()):
	new_index, new_row = new
	old_index, old_row = old
	total += 1
	if new_row['Military Profile'] != old_row['Military Profile']:
		print(f"ID = {old_row['ID']} | Old = {old_row['Military Profile']} | New = {new_row['Military Profile']}")
		count += 1
print(f"Done. {count}/{total}")

ID = 1006 | Old = NH | New = H1
ID = 1011 | Old = H1 | New = H2
ID = 1030 | Old = NH | New = H1
ID = 1045 | Old = H1 | New = H2
ID = 1050 | Old = H2 | New = H3
ID = 1052 | Old = H1 | New = H2
ID = 1053 | Old = H2 | New = H3
ID = 1054 | Old = H1 | New = H2
ID = 1060 | Old = NH | New = H2
ID = 1071 | Old = NH | New = H1
ID = 1074 | Old = NH | New = H1
ID = 1075 | Old = NH | New = H2
ID = 1083 | Old = NH | New = H1
ID = 1088 | Old = NH | New = H1
ID = 1090 | Old = NH | New = H1
ID = 1099 | Old = H1 | New = H2
ID = 1106 | Old = H2 | New = H3
ID = 1116 | Old = NH | New = H2
ID = 1119 | Old = NH | New = H1
ID = 1121 | Old = H2 | New = H3
ID = 1145 | Old = H1 | New = H2
ID = 1151 | Old = H1 | New = H2
ID = 1155 | Old = NH | New = H1
ID = 1159 | Old = H1 | New = H2
ID = 1165 | Old = H1 | New = H2
ID = 1169 | Old = H1 | New = H2
ID = 1172 | Old = H1 | New = H3
ID = 1203 | Old = NH | New = H2
ID = 1220 | Old = NH | New = H1
ID = 1228 | Old = H1 | New = H3
ID = 2055 | Old = NH | New = H1
ID = 206

In [15]:
for new, old in zip(new_cleaned.iterrows(), original_cleaned.iterrows()):
	if new[1]['Clinical Profile'] != old[1]['Clinical Profile']:
		print(f"Difference detected: {new[1]['ID']} and {old[1]['ID']}")
		break
print("Done")

Done


Conclusion: They are the same.