In [11]:
from Bio import SeqIO
import matplotlib.pyplot as plt
import os

# Single_Source/Applied_Biosystems_GlobalFiler/GF_2800M.hid
# Single_Source/Promega_Powerplex_Fusion_6C/PPF6C_NTD01.hid

file_name = 'Single_Source/Promega_Powerplex_Fusion_6C/PPF6C_NTD01.hid'
record = SeqIO.read(file_name, "abi")

file_size = os.path.getsize(file_name)
print(f"File size: {file_size} bytes") 

File size: 804689 bytes


In [12]:
list(record.annotations.keys())

['sample_well',
 'dye',
 'polymer',
 'machine_model',
 'run_start',
 'run_finish',
 'abif_raw',
 'molecule_type']

In [13]:
for key in record.annotations.keys():
    print(key, ":", record.annotations[key])

sample_well : b'G07'
dye : None
polymer : b'POP4'
machine_model : None
run_start : 2017-05-30 09:27:19
run_finish : 2017-05-30 10:04:39
abif_raw : {'AAct1': False, 'ABED1': b'2017-12-02 23:59:00.0', 'ABID1': b'2017-05-23 09:27:00.0', 'ABLt1': b'1702373', 'ABRn1': 3, 'ABTp1': b'ABC', 'APXV1': b'1', 'APrN1': b'WEN ILS 500', 'APrV1': b'SizeCaller v1.1.0', 'APrX1': b'<?xml version="1.0" encoding="UTF-8"?> <qc-protocol name="WEN ILS 500" version="SizeCaller v1.1.0" userName="" comments="" created="1448924566419" createUserName="" modified="1448924566409" modifyUserName="" uuid="71ca151e-5199-475c-a0dc-8e2c7507e207" securityID="" immutable="false" systemOwned="false" locked="false" unlockable="true" validated="false" checksum="0xD49034D8F19CECC411FEE9580240B25F" rgb="0;0;0" auditActive="0" iconFile=""><qcParameter><analysisSettings><parameterMap><parameter name="AnalysisDyes">1,2,3,4,5,6</parameter><parameter name="AnalysisMode">1</parameter><parameter name="AnalysisStopPoint">1000000</param

In [14]:
raw_data = record.annotations["abif_raw"]
list(raw_data.keys())  

['AAct1',
 'ABED1',
 'ABID1',
 'ABLt1',
 'ABRn1',
 'ABTp1',
 'APXV1',
 'APrN1',
 'APrV1',
 'APrX1',
 'AVld1',
 'AmbT1',
 'Anld1',
 'AsyC1',
 'AsyN1',
 'AsyV1',
 'CAED1',
 'CAID1',
 'CALt1',
 'CARn1',
 'CASN1',
 'CBED1',
 'CBID1',
 'CBLt1',
 'CBRn1',
 'CBTp1',
 'CTNM1',
 'CTTL1',
 'CkSm1',
 'CpEP1',
 'DATA1',
 'DATA2',
 'DATA3',
 'DATA4',
 'DATA5',
 'DATA6',
 'DATA7',
 'DATA8',
 'DATA9',
 'DATA10',
 'DATA11',
 'DATA12',
 'DATA105',
 'DATA106',
 'DATA205',
 'DATA206',
 'DCMD1',
 'DCMD2',
 'DSam1',
 'DySN1',
 'Dye#1',
 'DyeN1',
 'DyeN2',
 'DyeN3',
 'DyeN4',
 'DyeN5',
 'DyeN6',
 'DyeW1',
 'DyeW2',
 'DyeW3',
 'DyeW4',
 'DyeW5',
 'DyeW6',
 'EPVt1',
 'EVNT1',
 'EVNT2',
 'EVNT3',
 'EVNT4',
 'GTyp1',
 'HCFG1',
 'HCFG2',
 'HCFG3',
 'HCFG4',
 'InSc1',
 'InVt1',
 'InjN1',
 'LANE1',
 'LIMS1',
 'LNTD1',
 'LsrP1',
 'MCHN1',
 'MODF1',
 'MODL1',
 'NAVG1',
 'NLNE1',
 'NrmS3',
 'OfSc1',
 'OffS1',
 'OvrI9',
 'OvrI10',
 'OvrI11',
 'OvrI12',
 'OvrI205',
 'OvrI206',
 'OvrV9',
 'OvrV10',
 'OvrV11',
 'OvrV12',

In [15]:
non_empty_channels = {
    channel: raw_data[channel] 
    for channel in raw_data 
    if raw_data[channel] is not None
}
print("Non-empty channels:", non_empty_channels.keys())

Non-empty channels: dict_keys(['AAct1', 'ABED1', 'ABID1', 'ABLt1', 'ABRn1', 'ABTp1', 'APXV1', 'APrN1', 'APrV1', 'APrX1', 'AVld1', 'AmbT1', 'Anld1', 'AsyN1', 'AsyV1', 'CAED1', 'CAID1', 'CALt1', 'CARn1', 'CASN1', 'CBED1', 'CBID1', 'CBLt1', 'CBRn1', 'CBTp1', 'CTNM1', 'CTTL1', 'CkSm1', 'CpEP1', 'DCMD1', 'DCMD2', 'DSam1', 'Dye#1', 'DyeN6', 'DyeW1', 'DyeW2', 'DyeW3', 'DyeW4', 'DyeW5', 'DyeW6', 'EPVt1', 'EVNT1', 'EVNT2', 'EVNT3', 'EVNT4', 'GTyp1', 'HCFG1', 'HCFG4', 'InSc1', 'InVt1', 'InjN1', 'LANE1', 'LIMS1', 'LNTD1', 'LsrP1', 'MCHN1', 'MODF1', 'NAVG1', 'NLNE1', 'NrmS3', 'OvrI9', 'OvrI10', 'OvrI11', 'OvrI12', 'OvrI205', 'OvrI206', 'OvrV9', 'OvrV10', 'OvrV11', 'OvrV12', 'OvrV205', 'OvrV206', 'PSZE1', 'PTYP1', 'PXLB1', 'RGNm1', 'RMXV1', 'RMdN1', 'RMdV1', 'RMdX1', 'RNmF1', 'RPrN1', 'RPrV1', 'RUND1', 'RUND2', 'RUND3', 'RUND4', 'RUNT1', 'RUNT2', 'RUNT3', 'RUNT4', 'RunN1', 'SCAN1', 'SMED1', 'SMID1', 'SMLt1', 'SMPL1', 'SMRn1', 'STYP1', 'SVER1', 'SVER2', 'SVER3', 'SVER4', 'SZTS1', 'ScSt1', 'Scal1', '

In [16]:
for channel in non_empty_channels:
    data = raw_data[channel]
    if isinstance(data, (list, tuple)) and len(data) > 10:  # Arbitrary threshold
        print(f"{channel}: Length={len(data)}")

AmbT1: Length=446


In [17]:
raw_data = record.annotations["abif_raw"]
raw_data

{'AAct1': False,
 'ABED1': b'2017-12-02 23:59:00.0',
 'ABID1': b'2017-05-23 09:27:00.0',
 'ABLt1': b'1702373',
 'ABRn1': 3,
 'ABTp1': b'ABC',
 'APXV1': b'1',
 'APrN1': b'WEN ILS 500',
 'APrV1': b'SizeCaller v1.1.0',
 'APrX1': b'<?xml version="1.0" encoding="UTF-8"?> <qc-protocol name="WEN ILS 500" version="SizeCaller v1.1.0" userName="" comments="" created="1448924566419" createUserName="" modified="1448924566409" modifyUserName="" uuid="71ca151e-5199-475c-a0dc-8e2c7507e207" securityID="" immutable="false" systemOwned="false" locked="false" unlockable="true" validated="false" checksum="0xD49034D8F19CECC411FEE9580240B25F" rgb="0;0;0" auditActive="0" iconFile=""><qcParameter><analysisSettings><parameterMap><parameter name="AnalysisDyes">1,2,3,4,5,6</parameter><parameter name="AnalysisMode">1</parameter><parameter name="AnalysisStopPoint">1000000</parameter><parameter name="PeakDetectorTauB">key=1,value=0.0;key=2,value=0.0;key=3,value=0.0;key=4,value=0.0;key=5,value=0.0;key=6,value=0.0;</