In [1]:
import os
import pandas as pd
import numpy as np
from tabulate import tabulate

In [2]:
# load all xls files in this directory, read the Summary_Properties sheet, and concatenate them into a single dataframe, skip those that don't have the sheet
# list all files in the current directory
files = [f for f in os.listdir() if f.endswith('.xls') or f.endswith('.xlsx')]
files.remove('smiles.xlsx')

print(f"Found {len(files)} files")
files

Found 4 files


['pyrdz_properties_postprocessed_for_pyrdz1_to_pyrdz1.xlsx',
 'pyrd_properties_postprocessed_for_pyrd1_to_pyrd13.xlsx',
 'pyrmd_properties_postprocessed_for_pyrmd1_to_pyrmd7.xlsx',
 'pyrz_properties_postprocessed_for_pyrz1_to_pyrz3.xlsx']

In [3]:
# files = ['pyrd_properties_postprocessed_for_pyrd1_to_pyrd13.xlsx']

In [4]:
# read the smiles.xlsx file, keep the first two columns
smiles = pd.read_excel('smiles.xlsx')
smiles.rename(columns={'SMILES': 'smiles'}, inplace=True)
smiles = smiles.iloc[:, :2]
smiles

Unnamed: 0,smiles,id
0,BrC1=C2C(OCCC2)=CN=C1,pyrd1
1,ClC1=NC=CC=C1C,pyrd2
2,BrC1=CC(C)=NC=C1,pyrd3
3,O=C(NC1=NC=CC(C2CC2)=C1)C3=CC=C(B4OC(C)(C)C(C)...,pyrd4
4,ClC1=NC(C(F)(F)F)=CC(C)=C1,pyrd5
5,BrC1=CC(C)=CC(NC(OC(C)(C)C)=O)=N1,pyrd6
6,FC(F)(C1=CC(C(F)(F)F)=CC(C2=CN=CC(C)=C2)=C1)F,pyrd7
7,BrC1=NC(C2=CN=CS2)=CC(C)=C1,pyrd8
8,CC1=NC(Cl)=CC2=C1C=NN2,pyrd9
9,BrCCCC1=CN=CC=C1,pyrd10


In [5]:
dict = {}

# combine columns that have the same name
for file in files:
    temp = pd.read_excel(file, sheet_name="Summary_Properties", header=0)
    temp.rename(columns={'Compound_Name': 'id'}, inplace=True)
    # based on the Compound_Name, append a new column with the respective smiles
    temp = temp.merge(smiles, on='id', how='left')
    display(temp)
    # add it to the dictionary
    dict[file] = temp

Unnamed: 0,id,HOMO_Boltz,HOMO_Boltz_stdev,HOMO_min,HOMO_max,HOMO_range,HOMO_low_E,HOMO_Vbur_min,LUMO_Boltz,LUMO_Boltz_stdev,...,NBO_LP_occupancy_N2_low_E,NBO_LP_occupancy_N2_Vbur_min,NBO_LP_energy_N2_Boltz,NBO_LP_energy_N2_Boltz_stdev,NBO_LP_energy_N2_min,NBO_LP_energy_N2_max,NBO_LP_energy_N2_range,NBO_LP_energy_N2_low_E,NBO_LP_energy_N2_Vbur_min,smiles
0,pyrdz1,-0.287992,0.000357,-0.29077,-0.28789,0.00288,-0.28789,-0.28805,-0.038242,0.001077,...,1.93343,1.93246,-0.414611,0.000857,-0.41525,-0.41137,0.00388,-0.41525,-0.41385,CC(C)C1=NN=C(C2=CC=C(I)C=C2F)C=C1


Unnamed: 0,id,HOMO_Boltz,HOMO_Boltz_stdev,HOMO_min,HOMO_max,HOMO_range,HOMO_low_E,HOMO_Vbur_min,LUMO_Boltz,LUMO_Boltz_stdev,...,NBO_LP_occupancy_N1_low_E,NBO_LP_occupancy_N1_Vbur_min,NBO_LP_energy_N1_Boltz,NBO_LP_energy_N1_Boltz_stdev,NBO_LP_energy_N1_min,NBO_LP_energy_N1_max,NBO_LP_energy_N1_range,NBO_LP_energy_N1_low_E,NBO_LP_energy_N1_Vbur_min,smiles
0,pyrd1,-0.29088,0.0,-0.29088,-0.29088,0.0,-0.29088,-0.29088,-0.00553,0.0,...,1.90959,1.90959,-0.38489,0.0,-0.38489,-0.38489,0.0,-0.38489,-0.38489,BrC1=C2C(OCCC2)=CN=C1
1,pyrd2,-0.31,0.0,-0.31,-0.31,0.0,-0.31,-0.31,-0.00748,0.0,...,1.89792,1.89792,-0.38716,0.0,-0.38716,-0.38716,0.0,-0.38716,-0.38716,ClC1=NC=CC=C1C
2,pyrd3,-0.32162,0.0,-0.32162,-0.32162,0.0,-0.32162,-0.32162,-0.01138,0.0,...,1.91612,1.91612,-0.38423,0.0,-0.38423,-0.38423,0.0,-0.38423,-0.38423,BrC1=CC(C)=NC=C1
3,pyrd4,-0.28368,5.2e-05,-0.28569,-0.28364,0.00205,-0.28364,-0.28364,-0.032606,2.2e-05,...,1.91279,1.91279,-0.383852,8.7e-05,-0.38392,-0.36897,0.01495,-0.38391,-0.38391,O=C(NC1=NC=CC(C2CC2)=C1)C3=CC=C(B4OC(C)(C)C(C)...
4,pyrd5,-0.33403,0.0,-0.33403,-0.33403,0.0,-0.33403,-0.33403,-0.03023,0.0,...,1.8943,1.8943,-0.40792,0.0,-0.40792,-0.40792,0.0,-0.40792,-0.40792,ClC1=NC(C(F)(F)F)=CC(C)=C1
5,pyrd6,-0.288716,0.000161,-0.29521,-0.2887,0.00651,-0.2887,-0.29521,-0.004725,0.000447,...,1.89384,1.88823,-0.392468,0.000175,-0.39362,-0.38993,0.00369,-0.39245,-0.38993,BrC1=CC(C)=CC(NC(OC(C)(C)C)=O)=N1
6,pyrd7,-0.310806,0.00017,-0.31093,-0.31069,0.00024,-0.31069,-0.31069,-0.043408,8.5e-05,...,1.91468,1.91468,-0.390319,5.7e-05,-0.39036,-0.39028,8e-05,-0.39028,-0.39028,FC(F)(C1=CC(C(F)(F)F)=CC(C2=CN=CC(C)=C2)=C1)F
7,pyrd8,-0.28815,0.000481,-0.28849,-0.28781,0.00068,-0.28849,-0.28849,-0.04343,0.00024,...,1.89506,1.89506,-0.394048,0.002008,-0.39547,-0.39263,0.00284,-0.39263,-0.39263,BrC1=NC(C2=CN=CS2)=CC(C)=C1
8,pyrd9,-0.2928,0.0,-0.2928,-0.2928,0.0,-0.2928,-0.2928,-0.02545,0.0,...,1.89837,1.89837,-0.38438,0.0,-0.38438,-0.38438,0.0,-0.38438,-0.38438,CC1=NC(Cl)=CC2=C1C=NN2
9,pyrd10,-0.312966,0.001518,-0.31572,-0.30007,0.01565,-0.3121,-0.31568,-0.003239,0.002187,...,1.91714,1.91761,-0.380174,0.001563,-0.38336,-0.3727,0.01066,-0.37962,-0.38014,BrCCCC1=CN=CC=C1


Unnamed: 0,id,HOMO_Boltz,HOMO_Boltz_stdev,HOMO_min,HOMO_max,HOMO_range,HOMO_low_E,HOMO_Vbur_min,LUMO_Boltz,LUMO_Boltz_stdev,...,NBO_LP_occupancy_N2_low_E,NBO_LP_occupancy_N2_Vbur_min,NBO_LP_energy_N2_Boltz,NBO_LP_energy_N2_Boltz_stdev,NBO_LP_energy_N2_min,NBO_LP_energy_N2_max,NBO_LP_energy_N2_range,NBO_LP_energy_N2_low_E,NBO_LP_energy_N2_Vbur_min,smiles
0,pyrmd1,-0.292663,0.000179,-0.29283,-0.29225,0.00058,-0.29266,-0.29283,-0.0119,0.000264,...,1.91451,1.9145,-0.39691,0.000223,-0.39699,-0.39633,0.00066,-0.39697,-0.39699,NC1=NC(C)=C(C(OCC)=O)C=N1
1,pyrmd2,-0.30477,0.0,-0.30477,-0.30477,0.0,-0.30477,-0.30477,-0.0314,0.0,...,1.89791,1.89791,-0.410835,7e-06,-0.41084,-0.41083,1e-05,-0.41084,-0.41083,ClC1=CC(Cl)=NC(CC2=CC=CC=C2)=N1
2,pyrmd3,-0.325183,0.000772,-0.32559,-0.32445,0.00114,-0.32559,-0.32445,-0.026892,0.000528,...,1.90667,1.90684,-0.405158,0.001314,-0.40585,-0.40391,0.00194,-0.40585,-0.40391,ClC1=NC=C(CC)C=N1
3,pyrmd4,-0.35781,0.0,-0.35781,-0.35781,0.0,-0.35781,-0.35781,-0.06031,0.0,...,1.89847,1.89847,-0.43073,0.0,-0.43073,-0.43073,0.0,-0.43073,-0.43073,ClC1=NC(C#N)=NC(C)=C1
4,pyrmd5,-0.300398,0.000568,-0.30057,-0.29946,0.00111,-0.30057,-0.30057,-0.064272,0.001064,...,1.91394,1.91394,-0.398675,0.002164,-0.40225,-0.39802,0.00423,-0.39802,-0.39802,O=CC(C=N1)=C(C)N=C1C2=CC=CC=C2
5,pyrmd6,-0.32635,0.0,-0.32635,-0.32635,0.0,-0.32635,-0.32635,-0.02812,0.0,...,1.90665,1.90665,-0.40655,0.0,-0.40655,-0.40655,0.0,-0.40655,-0.40655,CC1=CN=C(Cl)N=C1
6,pyrmd7,-0.31123,0.0,-0.31123,-0.31123,0.0,-0.31123,-0.31123,-0.00862,0.0,...,1.89468,1.89468,-0.40948,0.0,-0.40948,-0.40948,0.0,-0.40948,-0.40948,ClC1=NC=C(C)C(OC)=N1


Unnamed: 0,id,HOMO_Boltz,HOMO_Boltz_stdev,HOMO_min,HOMO_max,HOMO_range,HOMO_low_E,HOMO_Vbur_min,LUMO_Boltz,LUMO_Boltz_stdev,...,NBO_LP_occupancy_N2_low_E,NBO_LP_occupancy_N2_Vbur_min,NBO_LP_energy_N2_Boltz,NBO_LP_energy_N2_Boltz_stdev,NBO_LP_energy_N2_min,NBO_LP_energy_N2_max,NBO_LP_energy_N2_range,NBO_LP_energy_N2_low_E,NBO_LP_energy_N2_Vbur_min,smiles
0,pyrz1,-0.328091,0.000513,-0.32878,-0.3279,0.00088,-0.3279,-0.32878,-0.051188,0.00014,...,1.90484,1.90443,-0.416855,0.000309,-0.41727,-0.41674,0.00053,-0.41674,-0.41727,ClC1=C(C)N=C(C(OC)=O)C=N1
1,pyrz2,-0.281835,0.000117,-0.28333,-0.28183,0.0015,-0.28183,-0.28333,-0.044896,0.00011,...,1.89884,1.89873,-0.408183,7.2e-05,-0.4091,-0.40818,0.00092,-0.40818,-0.4091,ClC1=C(C)N=C(C(OC)=O)C(N)=N1
2,pyrz3,-0.323715,0.001101,-0.32533,-0.32334,0.00199,-0.32334,-0.32533,-0.042839,6.1e-05,...,1.92347,1.92294,-0.405594,0.000365,-0.40613,-0.40547,0.00066,-0.40547,-0.40613,CC1=NC=C(C(OC)=O)N=C1


In [6]:
# drop columns that are not present in all dataframes
columns = set.intersection(*[set(d.columns) for d in dict.values()])
print(f"Columns: {columns}, number of columns: {len(columns)}")
for key in dict.keys():
    dict[key] = dict[key][list(columns)]
    
# concatenate all dataframes
df = pd.concat(dict.values(), ignore_index=True)
df

Columns: {'%Vbur_C3_3.0Å_Boltz', 'dipole(Debye)_max', 'SASA_surface_area(Å²)_range', 'η_range', 'polar_aniso(Debye)_low_E', '%Vbur_C2_2.5Å_range', 'pyramidalization_Gavrish_C4(°)_max', 'pyramidalization_Agranat-Radhakrishnan_C2_Boltz', '%Vbur_C4_3.0Å_range', 'NMR_shift_C3_Vbur_min', '%Vbur_C1_2.5Å_low_E', '%Vbur_C4_3.5Å_range', 'NBO_charge_N1_min', '%Vbur_C3_3.5Å_low_E', '%Vbur_C3_3.0Å_min', 'μ_range', '%Vbur_C1_3.0Å_max', 'NBO_LP_energy_N1_range', 'volume(Bohr_radius³/mol)_range', '%Vbur_C3_4.0Å_Boltz_stdev', 'NBO_LP_occupancy_N1_range', '%Vbur_C2_2.5Å_Boltz_stdev', 'NMR_shift_C2_Boltz', 'NBO_charge_C1_max', 'polar_iso(Debye)_low_E', '%Vbur_N1_4.0Å_max', '%Vbur_C1_2.5Å_Boltz_stdev', '%Vbur_C2_2.0Å_Boltz_stdev', 'volume(Bohr_radius³/mol)_min', 'NMR_shift_C3_Boltz', '%Vbur_N1_2.5Å_Vbur_min', 'NBO_charge_C3_Vbur_min', '%Vbur_C2_3.5Å_Boltz_stdev', 'pyramidalization_Gavrish_C3(°)_min', 'NBO_charge_N1_Vbur_min', '%Vbur_C3_4.0Å_low_E', '%Vbur_N1_3.5Å_range', 'pyramidalization_Agranat-Radhakr

Unnamed: 0,%Vbur_C3_3.0Å_Boltz,dipole(Debye)_max,SASA_surface_area(Å²)_range,η_range,polar_aniso(Debye)_low_E,%Vbur_C2_2.5Å_range,pyramidalization_Gavrish_C4(°)_max,pyramidalization_Agranat-Radhakrishnan_C2_Boltz,%Vbur_C4_3.0Å_range,NMR_shift_C3_Vbur_min,...,NBO_LP_energy_N1_min,pyramidalization_Gavrish_C1(°)_low_E,%Vbur_N1_4.0Å_low_E,polar_aniso(Debye)_Boltz,SASA_volume(Å³)_min,pyramidalization_Agranat-Radhakrishnan_N1_Boltz,η_Boltz,%Vbur_C4_2.0Å_Vbur_min,%Vbur_C2_3.5Å_Vbur_min,NBO_charge_N1_range
0,66.136254,4.7294,5.699944,0.0111,205.946,0.255567,0.022137,0.006073931,0.388297,43.507,...,-0.41525,0.119621,50.853842,207.845466,720.897059,1.998087,0.24975,96.629649,56.80267,0.00753
1,81.114981,3.8433,0.0,0.0,67.302,0.0,0.005695,0.01262264,0.0,34.1896,...,-0.38489,0.004997,35.518435,67.302,481.283655,1.997292,0.28535,96.352014,64.039205,0.0
2,63.720761,3.4509,0.0,0.0,54.8962,0.0,0.0,2.21733e-11,0.0,22.9562,...,-0.38716,,40.809462,54.8962,389.546728,2.0,0.30252,92.239153,63.604174,0.0
3,74.493445,0.418,0.0,0.0,61.9619,0.0,2e-06,6.620001e-07,0.0,10.7467,...,-0.38423,4.2e-05,40.821876,61.9619,406.872695,2.0,0.31024,92.365057,57.231877,0.0
4,78.532695,5.952,7.739761,0.0058,196.651,4.753223,0.012941,0.005128489,4.9026,-0.0408,...,-0.38392,0.024938,41.019723,205.88342,1050.25414,1.999683,0.251073,92.413481,66.682585,0.04807
5,74.61729,5.188,0.0,0.0,57.528,0.0,0.000189,7.990694e-06,0.0,7.1887,...,-0.40792,0.000133,50.602461,57.528,493.760497,0.000187,0.3038,92.674974,56.827129,0.0
6,74.720752,5.2279,27.970716,0.00191,106.154,1.91594,0.023071,0.001147219,3.071923,7.4302,...,-0.39362,0.014458,46.989634,106.169352,692.439674,0.001269,0.283991,92.536157,56.67513,0.01231
7,69.779972,3.4564,0.05001,0.00012,112.056,0.055346,0.029317,0.005817543,0.018623,27.2837,...,-0.39036,0.006682,36.890556,112.460883,743.021273,1.98757,0.267398,96.929881,60.012696,0.0001
8,74.671756,4.8493,0.290006,0.00102,120.53,1.315275,0.012501,0.0008965162,1.972214,8.9752,...,-0.39547,0.003187,51.494709,120.542487,576.019873,0.000759,0.244721,92.574897,60.458209,0.00796
9,72.997058,2.2799,0.0,0.0,79.9254,0.0,5.9e-05,5.359846e-06,0.0,20.0491,...,-0.38438,0.000234,47.334895,79.9254,464.536762,2.0,0.26735,92.113249,65.944721,0.0


In [7]:
# drop column that match one capital letter followed by a number in any place
df = df[df.columns.drop(list(df.filter(regex='[C][2-9]')))]
df

Unnamed: 0,dipole(Debye)_max,SASA_surface_area(Å²)_range,η_range,polar_aniso(Debye)_low_E,%Vbur_C1_2.5Å_low_E,NBO_charge_N1_min,μ_range,%Vbur_C1_3.0Å_max,NBO_LP_energy_N1_range,volume(Bohr_radius³/mol)_range,...,polar_iso(Debye)_Boltz,polar_aniso(Debye)_Vbur_min,NBO_LP_energy_N1_min,pyramidalization_Gavrish_C1(°)_low_E,%Vbur_N1_4.0Å_low_E,polar_aniso(Debye)_Boltz,SASA_volume(Å³)_min,pyramidalization_Agranat-Radhakrishnan_N1_Boltz,η_Boltz,NBO_charge_N1_range
0,4.7294,5.699944,0.0111,205.946,86.734926,-0.21985,0.0029,77.513223,0.00388,543.861,...,201.026856,210.665,-0.41525,0.119621,50.853842,207.845466,720.897059,1.998087,0.24975,0.00753
1,3.8433,0.0,0.0,67.302,74.778617,-0.3854,0.0,59.651557,0.0,0.0,...,117.962,67.302,-0.38489,0.004997,35.518435,67.302,481.283655,1.997292,0.28535,0.0
2,3.4509,0.0,0.0,54.8962,85.671962,-0.41958,0.0,73.915189,0.0,0.0,...,85.9969,54.8962,-0.38716,,40.809462,54.8962,389.546728,2.0,0.30252,0.0
3,0.418,0.0,0.0,61.9619,85.74847,-0.42868,0.0,72.520299,0.0,0.0,...,94.6394,61.9619,-0.38423,4.2e-05,40.821876,61.9619,406.872695,2.0,0.31024,0.0
4,5.952,7.739761,0.0058,196.651,84.703412,-0.46996,0.001045,73.621871,0.01495,969.001,...,290.97783,196.651,-0.38392,0.024938,41.019723,205.88342,1050.25414,1.999683,0.251073,0.04807
5,5.188,0.0,0.0,57.528,85.032231,-0.40588,0.0,71.242737,0.0,0.0,...,100.126,57.528,-0.40792,0.000133,50.602461,57.528,493.760497,0.000187,0.3038,0.0
6,5.2279,27.970716,0.00191,106.154,85.540109,-0.48411,0.006935,74.582837,0.00369,603.144,...,177.326641,87.2335,-0.39362,0.014458,46.989634,106.169352,692.439674,0.001269,0.283991,0.01231
7,3.4564,0.05001,0.00012,112.056,75.712983,-0.39929,0.00018,61.306243,8e-05,487.128,...,172.397374,112.056,-0.39036,0.006682,36.890556,112.460883,743.021273,1.98757,0.267398,0.0001
8,4.8493,0.290006,0.00102,120.53,86.637257,-0.45059,0.00017,75.5736,0.00284,4.261,...,160.491457,120.53,-0.39547,0.003187,51.494709,120.542487,576.019873,0.000759,0.244721,0.00796
9,2.2799,0.0,0.0,79.9254,85.989387,-0.4609,0.0,73.695434,0.0,0.0,...,112.386,79.9254,-0.38438,0.000234,47.334895,79.9254,464.536762,2.0,0.26735,0.0


In [8]:
# move coloumn "id" and "SMILES" to first position, order the rest of the columns alphabetically
cols = list(df.columns)
cols.remove("id")
cols.remove("smiles")
cols.sort()
cols.insert(0, "id")
cols.insert(1, "smiles")
df = df[cols]

# remove columns that contain any cell with value = 0
columns_with_zeros = df.columns[(df == 0).any()]
df = df.drop(columns=columns_with_zeros)
display(df)

Unnamed: 0,id,smiles,%Vbur_C1_2.0Å_Boltz,%Vbur_C1_2.0Å_Vbur_min,%Vbur_C1_2.0Å_low_E,%Vbur_C1_2.0Å_max,%Vbur_C1_2.0Å_min,%Vbur_C1_2.5Å_Boltz,%Vbur_C1_2.5Å_Vbur_min,%Vbur_C1_2.5Å_low_E,...,μ_Boltz,μ_Vbur_min,μ_low_E,μ_max,μ_min,ω_Boltz,ω_Vbur_min,ω_low_E,ω_max,ω_min
0,pyrdz1,CC(C)C1=NN=C(C2=CC=C(I)C=C2F)C=C1,95.752998,95.748321,95.75155,95.993673,95.748321,86.731908,86.694231,86.734926,...,-0.163117,-0.16345,-0.16291,-0.16055,-0.16345,0.05327,0.0536,0.05309,0.0536,0.04951
1,pyrd1,BrC1=C2C(OCCC2)=CN=C1,90.705708,90.705708,90.705708,90.705708,90.705708,74.778617,74.778617,74.778617,...,-0.148205,-0.148205,-0.148205,-0.148205,-0.148205,0.03849,0.03849,0.03849,0.03849,0.03849
2,pyrd2,ClC1=NC=CC=C1C,95.180139,95.180139,95.180139,95.180139,95.180139,85.671962,85.671962,85.671962,...,-0.15874,-0.15874,-0.15874,-0.15874,-0.15874,0.04165,0.04165,0.04165,0.04165,0.04165
3,pyrd3,BrC1=CC(C)=NC=C1,95.716038,95.716038,95.716038,95.716038,95.716038,85.74847,85.74847,85.74847,...,-0.1665,-0.1665,-0.1665,-0.1665,-0.1665,0.04468,0.04468,0.04468,0.04468,0.04468
4,pyrd4,O=C(NC1=NC=CC(C2CC2)=C1)C3=CC=C(B4OC(C)(C)C(C)...,94.741043,94.731405,94.731405,94.976756,94.731405,84.651442,84.703412,84.703412,...,-0.158143,-0.158135,-0.158135,-0.15712,-0.158165,0.049803,0.04981,0.04981,0.04981,0.04817
5,pyrd5,ClC1=NC(C(F)(F)F)=CC(C)=C1,95.276989,95.276989,95.276989,95.276989,95.276989,85.032231,85.032231,85.032231,...,-0.18213,-0.18213,-0.18213,-0.18213,-0.18213,0.05459,0.05459,0.05459,0.05459,0.05459
6,pyrd6,BrC1=CC(C)=CC(NC(OC(C)(C)C)=O)=N1,95.147784,94.957386,95.154313,95.154313,94.724948,85.525035,85.614989,85.540109,...,-0.146721,-0.153625,-0.14669,-0.14669,-0.153625,0.0379,0.04167,0.03788,0.04167,0.03788
7,pyrd7,FC(F)(C1=CC(C(F)(F)F)=CC(C2=CN=CC(C)=C2)=C1)F,90.977815,91.002712,91.002712,91.002712,90.951059,75.694937,75.712983,75.712983,...,-0.177107,-0.17702,-0.17702,-0.17702,-0.1772,0.058653,0.05861,0.05861,0.0587,0.05861
8,pyrd8,BrC1=NC(C2=CN=CS2)=CC(C)=C1,95.543657,95.858084,95.858084,95.858084,95.228564,86.068125,86.637257,86.637257,...,-0.16579,-0.165875,-0.165875,-0.165705,-0.165875,0.05616,0.0561,0.0561,0.05622,0.0561
9,pyrd9,CC1=NC(Cl)=CC2=C1C=NN2,95.699897,95.699897,95.699897,95.699897,95.699897,85.989387,85.989387,85.989387,...,-0.159125,-0.159125,-0.159125,-0.159125,-0.159125,0.04736,0.04736,0.04736,0.04736,0.04736


In [9]:
# export to csv
df.to_csv("combined.csv", index=False)