This following code highlights a bug in the original solarPCE_parser.py (lines 1093 and 1094 of origin/main of 2021-09-15, hash a47054ae558a25b07a070c54c3aba259d7db6d8d)

In [1]:
import pandas as pd
import numpy as np

## Mix-up of 'LUMOOffset', 'HOMOOffset', and 'D-HOMO & A-LUMO offset'

In [2]:
DFT_acceptors = pd.read_csv('../data_csv/DFT_acceptor_data.csv')
acceptors = DFT_acceptors['Acceptor'].tolist()

In [3]:
DFT_donors = pd.read_csv('../data_csv/DFT_donor_data.csv')
donors = DFT_donors['Donor'].tolist()

In [4]:
DFT_offsets = pd.read_csv('../data_csv/DFT_offset_data.csv')
offset_pair = DFT_offsets['Acc-Don Pair'].tolist()
lumo_offset = DFT_offsets['LUMO offset'].tolist()
homo_offset = DFT_offsets['HOMO offset'].tolist()
donHOMO_accLUMO_offset = DFT_offsets['D-HOMO & A-LUMO offset'].tolist()

In [5]:
bad = 0; good = 0;
for i in range(len(acceptors)):
    for x in range(len(donors)): 
        donors[x] = donors[x].split('_',1)[0]
        cands = [] # New variable to keep track of collisions
        p_true = None
        for p in range(len(offset_pair)):
            # The following two lines are problematic:
            if str(acceptors[i]) in str(offset_pair[p]):
                if str(donors[x]) in str(offset_pair[p]):
                    cands.append(p)
            # This is the correct match criterion:
            if offset_pair[p] == f"{acceptors[i]}/{donors[x]}":
                p_true = p
        if len(cands) != 1:
            p_picked = cands[0] # In the solarPCE_parser, the first one is picked (break after first match)
            if offset_pair[p_picked] != f"{acceptors[i]}/{donors[x]}":
                print(f"For acc={acceptors[i]}, don={donors[x]}, the pairs entry {offset_pair[p_picked]} was wrongly picked!")
                print(f"  Got 'LUMOOffset' = {lumo_offset[p_picked]:g} instead of {lumo_offset[p_true]:g}")
                print(f"  Got 'HOMOOffset' = {homo_offset[p_picked]:g} instead of {homo_offset[p_true]:g}")
                print(f"  Got 'D-HOMO & A-LUMO offset' = {donHOMO_accLUMO_offset[p_picked]:g} instead of {donHOMO_accLUMO_offset[p_true]:g}")
                bad += 1
            else:
                print(f"Lucky, but just because {offset_pair[p_picked]} appeared before {offset_pair[cands[-1]]}.")
                good += 1
        else:
            good += 1
print(f"{bad} out of {good+bad} were bad!")

For acc=BTA53, don=PBDB-T, the pairs entry BTA53/PBDB-TF was wrongly picked!
  Got 'LUMOOffset' = 0.698979 instead of 0.565571
  Got 'HOMOOffset' = 0.103512 instead of 0.279107
  Got 'D-HOMO & A-LUMO offset' = 1.6618 instead of 1.4862
For acc=i-cc23, don=PBDB-T, the pairs entry i-cc23/PBDB-TF was wrongly picked!
  Got 'LUMOOffset' = 0.859654 instead of 0.726246
  Got 'HOMOOffset' = 0.0214154 instead of 0.19701
  Got 'D-HOMO & A-LUMO offset' = 1.50112 instead of 1.32553
For acc=Y14, don=PBDB-T, the pairs entry Y14/PBDB-TF was wrongly picked!
  Got 'LUMOOffset' = 0.813538 instead of 0.68013
  Got 'HOMOOffset' = 0.285964 instead of 0.46156
  Got 'D-HOMO & A-LUMO offset' = 1.54724 instead of 1.37165
For acc=IPTBO-4Cl, don=PBDB-T, the pairs entry IPTBO-4Cl/PBDB-TF was wrongly picked!
  Got 'LUMOOffset' = 1.00222 instead of 0.86881
  Got 'HOMOOffset' = 0.32455 instead of 0.500145
  Got 'D-HOMO & A-LUMO offset' = 1.35856 instead of 1.18297
For acc=H2, don=PBDB-T, the pairs entry H2/PBDB-TF wa

Thus, in 93 of 438 cases, the wrong offsets were taken.

## Mix-up of 'AbsFOM'

There is a similar bug in lines 1127, 1128, where tha AbsFOM paramter is selected.

In [6]:
TDDFT_absFOM = pd.read_csv('../data_csv/TDDFT_absorptionFOM_data.csv')      
TDDFT_pairs = TDDFT_absFOM['Donor/acceptor pair'].tolist()
abs_FOM = TDDFT_absFOM['FOM 1.5G AM'].tolist()

In [7]:
bad = 0; good = 0;
for i in range(len(acceptors)):
    for x in range(len(donors)): 
        donors[x] = donors[x].split('_',1)[0]
        cands = [] # New variable to keep track of collisions
        p_true = None
        for pair in range(len(TDDFT_pairs)):
            # Faulty criterion:
            if str(acceptors[i]) in str(TDDFT_pairs[pair]):
                if str(donors[x]) in str(TDDFT_pairs[pair]):
                    cands.append(pair)
            # Correct criterion:
            don_str, acc_str = TDDFT_pairs[pair].split(' and ')
            don_str = don_str.split('_')[0]
            if acc_str == acceptors[i] and don_str == donors[x]:
                p_true = pair
        p_picked = cands[0]
        if len(cands) != 1:
            print(f"Multiple hits for don={donors[x]} and acc={acceptors[i]}:\n\t {[TDDFT_pairs[p] for p in cands]}")
            print(f"Correct: '{TDDFT_pairs[p_true]}'")
            if TDDFT_pairs[p_true]==TDDFT_pairs[p_picked]:
                print('Lucky!')
                good += 1
            else:
                bad += 1
                print(f"  Got 'AbsFOM' = {abs_FOM[p_picked]:g} instead of {abs_FOM[p_true]:g}")

        else:
            assert p_picked==p_true
            good += 1
print(f"{bad} out of {good+bad} were bad!")

Multiple hits for don=PBDB-T and acc=BTA53:
	 ['PBDB-TF_dimer and BTA53', 'PBDB-T_tetra and BTA53', 'PBDB-T-2Cl_dimer and BTA53']
Correct: 'PBDB-T_tetra and BTA53'
  Got 'AbsFOM' = 46.8232 instead of 69.9927
Multiple hits for don=PBDB-T and acc=i-cc23:
	 ['PBDB-TF_dimer and i-cc23', 'PBDB-T_tetra and i-cc23', 'PBDB-T-2Cl_dimer and i-cc23']
Correct: 'PBDB-T_tetra and i-cc23'
  Got 'AbsFOM' = 40.861 instead of 64.0306
Multiple hits for don=PBDB-T and acc=Y14:
	 ['PBDB-TF_dimer and Y14', 'PBDB-T_tetra and Y14', 'PBDB-T-2Cl_dimer and Y14']
Correct: 'PBDB-T_tetra and Y14'
  Got 'AbsFOM' = 47.6944 instead of 70.8639
Multiple hits for don=PBDB-T and acc=IPTBO-4Cl:
	 ['PBDB-TF_dimer and IPTBO-4Cl', 'PBDB-T_tetra and IPTBO-4Cl', 'PBDB-T-2Cl_dimer and IPTBO-4Cl']
Correct: 'PBDB-T_tetra and IPTBO-4Cl'
  Got 'AbsFOM' = 52.7352 instead of 75.9047
Multiple hits for don=PBDB-T and acc=H2:
	 ['PBDB-TF_dimer and H2', 'PBDB-T_tetra and H2', 'PBDB-T-2Cl_dimer and H2']
Correct: 'PBDB-T_tetra and H2'
  Got

Thus, in 93 of 438 cases, the wrong AbsFOM value was taken.