## Mol2 reading and manipulation:

In [2]:
def mol2_reader(filename, column):
    with open(filename) as file:
        atom_section = False
        column_data = []
        column_names = {
        "atom_id" : 0,
        "atom_name" : 1,
        "x" : 2,
        "y" : 3,
        "z" : 4,
        "atom_type" : 5,
        }
        for line in file:
            if line.startswith("@<TRIPOS>ATOM"):
                atom_section = True
                continue
            if line.startswith("@<TRIPOS>BOND"):
                atom_section = False
                break
            if atom_section:
                line = line.split()
                if column in ["x", "y", "z"]:
                    column_data.append(float(line[column_names[column]]))
                    continue
                else:
                    column_data.append(line[column_names[column]])
    return column_data

In [3]:
xs = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/PCPDTFBT_C1_BO.mol2", "x")
ys = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/PCPDTFBT_C1_BO.mol2", "y")
zs = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/PCPDTFBT_C1_BO.mol2", "z")
print(min(xs),max(xs))
print(min(ys),max(ys))
print(min(zs),max(zs))

-10.7689 13.8654
2.4086 16.0993
2.4038 12.0507


In [4]:
xs_250 = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/polymers/PCPDTFBT_C1_BO_250mer.mol2", "x")
ys_250 = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/polymers/PCPDTFBT_C1_BO_250mer.mol2", "y")
zs_250 = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/polymers/PCPDTFBT_C1_BO_250mer.mol2", "z")
print(min(xs_250),max(xs_250))
print(min(ys_250),max(ys_250))
print(min(zs_250),max(zs_250))

-353.3456 68.1216
-178.682 16.0993
0.6756 1221.5274


In [5]:
print(len(xs_250))

49002


In [6]:
print(zs_250[49001])
print(type(zs_250[0]))

1220.6444
<class 'float'>


In [7]:
atom_types = mol2_reader("/home/jbieri/CME_lab/repos/pl-validation/mol2/PCPDTFBT_C1_BO.mol2", "atom_type")
carbon = 0
hydrogen = 0
nitrogen = 0
sulfur = 0
for atom in atom_types:
    if atom[0] == "C":
        carbon += 1
    elif atom[0] == "H":
        hydrogen += 1
    elif atom[0] == "N":
        nitrogen += 1
    elif atom[0] == "S":
        sulfur += 1

print(carbon, hydrogen, nitrogen, sulfur)
print(carbon+hydrogen+nitrogen+sulfur)

c250 = carbon * 250 * 12
h250 = hydrogen * 250 * 1
n250 = nitrogen * 250 * 14
s250 = sulfur * 250 * 32

bulk_mass = c250+h250+n250+s250
mono_mass = bulk_mass / 250

print(c250, h250, n250, s250)

78 108 4 6
196
234000 27000 14000 48000


## Calculating box sizes:

In [8]:
density_of_box_cm3 = 0.5
density_of_box_m3 = density_of_box_cm3 * 10**-6

volume = density_of_box_cm3 / mono_mass
volume = volume * 6.022*10**23 / 10**-6
volume = volume * 10**-27

print(volume, "nm^3")
print(volume**(1/3))

0.23304953560371516 nm^3
0.6153885535490613


In [17]:
print("Volume " + str(1/volume) + " nm^3")
print("Length: "+ str((1/volume)**(1/3)) + " nm")

Volume 4.29093324476918 nm^3
Length: 1.6249896008510596 nm


## Determining which atoms to replace and with what in generated mol2 files:

In [1]:
import glob
molecule_list = glob.glob("/home/jbieri/CME_lab/repos/pl-validation/mol2/"+"*.mol2")
molecule_list.pop(1)
molecule_list = sorted(molecule_list)

In [109]:
end_hydrogens = []
for i in range(len(molecule_list)):
    end_hydrogens.append((mol2_reader(molecule_list[i], "atom_name"))[-2:])

In [111]:
print(end_hydrogens)

[['H7', 'H3'], ['H7', 'H3'], ['H7', 'H3'], ['H7', 'H3'], ['H7', 'H3'], ['H5', 'H5'], ['H5', 'H5'], ['H4', 'H4'], ['H5', 'H3'], ['H4', 'H4'], ['H6', 'H3'], ['H5', 'H5'], ['H7', 'H3']]


In [112]:
polymer_list = glob.glob("/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/"+"*.mol2")
polymer_list = sorted(polymer_list)

file_count = 0
replace_with = []
for filename in polymer_list:
    h_count = 0
    with open(filename, "r") as file:
        for line in file:
            temp = line.split()
            if len(temp)>1 and temp[1] == ("H"):
                temp[1] = end_hydrogens[file_count][h_count]
                replace_with.append(temp[:2])
                h_count += 1
    file_count += 1

In [113]:
print(replace_with)

[['123', 'H4'], ['1222', 'H4'], ['230', 'H6'], ['2292', 'H3'], ['438', 'H5'], ['4372', 'H5'], ['603', 'H7'], ['6022', 'H3']]


In [114]:
print(polymer_list)

['/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTFBT_C11_BO_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTFBT_C1_BO_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTFBT_C3_BO_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTFBT_C4_BO_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTFBT_C5_BO_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTPT_HD_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTPT_ODD_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTPT_eneODD_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDTPT_nC16_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PCPDT_PT_eneHD_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PIDTBT_nC16_10mer.mol2', '/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/PIDTCPDT_C11BO_10mer.mol2', '/home/jbieri/

In [10]:
molecule_list = glob.glob("/home/jbieri/CME_lab/repos/pl-validation/mol2/10_mers/"+"*.mol2")
molecule_list = sorted(molecule_list)

In [14]:
lengths = []
for i in range(len(molecule_list)):
    lengths.append(len(mol2_reader(molecule_list[i], "x")))

In [15]:
print(lengths)

[318, 198, 222, 234, 246, 123, 147, 148, 123, 124, 231, 439, 604]
