In [1]:
from rdkit import Chem
import pandas as pd
import os

In [4]:

source_sdf_directory = 'data/sdf_data/binders' 

output_csv_directory = 'data/sdf_data/binders_csv' 

print(f"Čitanje i obrada SDF fajlova iz direktorijuma: {os.path.abspath(source_sdf_directory)}\n")

# Kreirajte izlazni direktorijum ako ne postoji
os.makedirs(output_csv_directory, exist_ok=True)
print(f"Generisani CSV fajlovi će biti sačuvani u: {os.path.abspath(output_csv_directory)}\n")

if not os.path.isdir(source_sdf_directory):
    print(f"Greška: Izvorni direktorijum '{source_sdf_directory}' ne postoji.")
else:
    total_files_processed = 0
    total_molecules_processed = 0

    for filename in os.listdir(source_sdf_directory):
        if filename.endswith(".sdf"):
            file_path = os.path.join(source_sdf_directory, filename)
            
            # Formirajte ime izlaznog CSV fajla (isto ime kao SDF, ali .csv ekstenzija)
            csv_filename = os.path.splitext(filename)[0] + '.csv'
            output_csv_path = os.path.join(output_csv_directory, csv_filename)

            print(f"Procesiram fajl: {filename} -> Sačuvaću kao: {csv_filename}")
            total_files_processed += 1
            
            molecules_in_current_file = [] # Lista za molekule iz trenutnog SDF fajla
            current_file_mol_count = 0

            try:
                suppl = Chem.SDMolSupplier(file_path, sanitize=True, removeHs=False)
                
                for i, mol in enumerate(suppl):
                    if mol is None:
                        print(f"  Upozorenje: Nevažeći molekul na indeksu {i} u fajlu {filename}. Preskačem.")
                        continue
                    
                    current_file_mol_count += 1
                    total_molecules_processed += 1
                    
                    mol_data = {}
                    
                    # Dodajte osnovne hemijske informacije
                    try:
                        mol_data['SMILES'] = Chem.MolToSmiles(mol)
                    except Exception: # Uhvatite specifičniji izuzetak ako je poznat
                        mol_data['SMILES'] = None
                    
                    try:
                        mol_data['InChI'] = Chem.MolToInchi(mol)
                    except Exception:
                        mol_data['InChI'] = None
                        
                    try:
                        mol_data['InChIKey'] = Chem.MolToInchiKey(mol)
                    except Exception:
                        mol_data['InChIKey'] = None
                        
                    # Dodajte sve "properties" (dodatne podatke) iz SDF fajla
                    for prop_name in mol.GetPropNames():
                        mol_data[prop_name] = mol.GetProp(prop_name)
                        
                    # Dodajte ime izvornog fajla (opciono, ali korisno za praćenje)
                    mol_data['Source_SDF_File'] = filename
                    
                    molecules_in_current_file.append(mol_data)
                
                # Kreiranje i čuvanje DataFrame-a za TRENUTNI SDF fajl
                if molecules_in_current_file:
                    df = pd.DataFrame(molecules_in_current_file)
                    try:
                        df.to_csv(output_csv_path, index=False, encoding='utf-8')
                        print(f"  Sačuvano {current_file_mol_count} molekula u: {os.path.abspath(output_csv_path)}")
                    except Exception as e:
                        print(f"  Greška pri čuvanju '{csv_filename}' u CSV: {e}")
                else:
                    print(f"  Nema validnih molekula za čuvanje u {filename}.")
                
            except Exception as e:
                print(f"  Greška pri obradi {filename}: {e}")
            
            print("-" * 30) # Separator za fajlove

    print(f"\nZavršeno čitanje {total_files_processed} SDF fajlova.")
    print(f"Ukupno {total_molecules_processed} validnih molekula je procesirano i sačuvano u pojedinačne CSV fajlove.")

print("\nProces završen.")

Čitanje i obrada SDF fajlova iz direktorijuma: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders

Generisani CSV fajlovi će biti sačuvani u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv

Procesiram fajl: OT_ERa_GFPERaERE_0120-ESR1.sdf -> Sačuvaću kao: OT_ERa_GFPERaERE_0120-ESR1.csv


[16:53:20] skipping block at line 7: 'BEGIN ATOM'
[16:53:20] skipping block at line 9: 'BEGIN BOND'
[16:53:20] ERROR: Empty structure

[16:53:20] Invalid InChI prefix in generating InChI Key




[16:53:20] ERROR: Unknown element(s): *

[16:53:20] Invalid InChI prefix in generating InChI Key











[16:53:21] skipping block at line 5660: 'BEGIN ATOM'
[16:53:21] skipping block at line 5662: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key
[16:53:21] ERROR: Unknown element(s): *

[16:53:21] Invalid InChI prefix in generating InChI Key








[16:53:21] skipping block at line 9661: 'BEGIN ATOM'
[16:53:21] skipping block at line 9663: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key



[16:53:21] skipping block at line 10456: 'BEGIN ATOM'
[16:53:21] skipping block at line 10458: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI 

  Sačuvano 179 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ERa_GFPERaERE_0120-ESR1.csv
------------------------------
Procesiram fajl: OT_ER_ERaERa_0480-ESR1.sdf -> Sačuvaću kao: OT_ER_ERaERa_0480-ESR1.csv
  Sačuvano 133 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ER_ERaERa_0480-ESR1.csv
------------------------------
Procesiram fajl: OT_ER_ERaERa_1440-ESR1.sdf -> Sačuvaću kao: OT_ER_ERaERa_1440-ESR1.csv















[16:53:21] skipping block at line 8528: 'BEGIN ATOM'
[16:53:21] skipping block at line 8530: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key

[16:53:21] ERROR: Unknown element(s): *

[16:53:21] Invalid InChI prefix in generating InChI Key

[16:53:21] skipping block at line 9925: 'BEGIN ATOM'
[16:53:21] skipping block at line 9927: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key








[16:53:21] ERROR: Unknown element(s): *

[16:53:21] Invalid InChI prefix in generating InChI Key








  Sačuvano 142 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ER_ERaERa_1440-ESR1.csv
------------------------------
Procesiram fajl: OT_ER_ERaERb_0480-ESR1-ESR2.sdf -> Sačuvaću kao: OT_ER_ERaERb_0480-ESR1-ESR2.csv







[16:53:21] ERROR: Unknown element(s): *

[16:53:21] Invalid InChI prefix in generating InChI Key









[16:53:21] skipping block at line 12195: 'BEGIN ATOM'
[16:53:21] skipping block at line 12197: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key





[16:53:21] skipping block at line 14930: 'BEGIN ATOM'
[16:53:21] skipping block at line 14932: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key


[16:53:21] skipping block at line 16195: 'BEGIN ATOM'
[16:53:21] skipping block at line 16197: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generating InChI Key









[16:53:21] ERROR: Unknown element(s): *

[16:53:21] Invalid InChI prefix in generating InChI Key


[16:53:21] skipping block at line 7: 'BEGIN ATOM'
[16:53:21] skipping block at line 9: 'BEGIN BOND'
[16:53:21] ERROR: Empty structure

[16:53:21] Invalid InChI prefix in generat

  Sačuvano 210 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ER_ERaERb_0480-ESR1-ESR2.csv
------------------------------
Procesiram fajl: NVS_NR_mERa-ESR1.sdf -> Sačuvaću kao: NVS_NR_mERa-ESR1.csv



[16:53:22] skipping block at line 11289: 'BEGIN ATOM'
[16:53:22] skipping block at line 11291: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key

[16:53:22] skipping block at line 11479: 'BEGIN ATOM'
[16:53:22] skipping block at line 11481: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key









[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key








[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key

[16:53:22] skipping block at line 8684: 'BEGIN ATOM'
[16:53:22] skipping block at line 8686: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key










  Sačuvano 141 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/NVS_NR_mERa-ESR1.csv
------------------------------
Procesiram fajl: OT_ER_ERaERb_1440-ESR1-ESR2.sdf -> Sačuvaću kao: OT_ER_ERaERb_1440-ESR1-ESR2.csv









[16:53:22] skipping block at line 15151: 'BEGIN ATOM'
[16:53:22] skipping block at line 15153: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key







[16:53:22] skipping block at line 18371: 'BEGIN ATOM'
[16:53:22] skipping block at line 18373: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key



[16:53:22] skipping block at line 20061: 'BEGIN ATOM'
[16:53:22] skipping block at line 20063: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key







[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key








[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key




  Sačuvano 257 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ER_ERaERb_1440-ESR1-ESR2.csv
------------------------------
Procesiram fajl: OT_ER_ERbERb_1440-ESR2.sdf -> Sačuvaću kao: OT_ER_ERbERb_1440-ESR2.csv
  Sačuvano 228 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ER_ERbERb_1440-ESR2.csv
------------------------------
Procesiram fajl: NVS_NR_bER-ESR1.sdf -> Sačuvaću kao: NVS_NR_bER-ESR1.csv























[16:53:22] skipping block at line 14056: 'BEGIN ATOM'
[16:53:22] skipping block at line 14058: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key







[16:53:22] skipping block at line 16246: 'BEGIN ATOM'
[16:53:22] skipping block at line 16248: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key





[16:53:22] skipping block at line 17638: 'BEGIN ATOM'
[16:53:22] skipping block at line 17640: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key






[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key








[16:53:22] skipping block at line 1601: 'BEGIN ATOM'
[16:53:22] skipping block at line 1603: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key

[16:53:22] skipping block at line 2146: 'BEGIN ATOM'
[16:53:2

  Sačuvano 91 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/NVS_NR_bER-ESR1.csv
------------------------------
Procesiram fajl: OT_ER_ERbERb_0480-ESR2.sdf -> Sačuvaću kao: OT_ER_ERbERb_0480-ESR2.csv
  Sačuvano 206 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ER_ERbERb_0480-ESR2.csv
------------------------------
Procesiram fajl: NVS_NR_hER-ESR1.sdf -> Sačuvaću kao: NVS_NR_hER-ESR1.csv





















[16:53:22] skipping block at line 12255: 'BEGIN ATOM'
[16:53:22] skipping block at line 12257: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key




[16:53:22] skipping block at line 14578: 'BEGIN ATOM'
[16:53:22] skipping block at line 14580: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key




[16:53:22] skipping block at line 15796: 'BEGIN ATOM'
[16:53:22] skipping block at line 15798: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key







[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key


[16:53:22] skipping block at line 7: 'BEGIN ATOM'
[16:53:22] skipping block at line 9: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key


[16:53:22] skipping block at line 1850: 'BEGIN ATOM'
[16:53:22] skipping bloc

  Sačuvano 196 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/NVS_NR_hER-ESR1.csv
------------------------------
Procesiram fajl: OT_ERa_GFPERaERE_0480-ESR1.sdf -> Sačuvaću kao: OT_ERa_GFPERaERE_0480-ESR1.csv
  Sačuvano 190 molekula u: /home/ivana-milutinovic/Documents/Doktorske/BIORad/bio-inf-paper/data-processing/data/sdf_data/binders_csv/OT_ERa_GFPERaERE_0480-ESR1.csv
------------------------------

Završeno čitanje 11 SDF fajlova.
Ukupno 1973 validnih molekula je procesirano i sačuvano u pojedinačne CSV fajlove.

Proces završen.


[16:53:22] skipping block at line 14264: 'BEGIN ATOM'
[16:53:22] skipping block at line 14266: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key



[16:53:22] skipping block at line 15724: 'BEGIN ATOM'
[16:53:22] skipping block at line 15726: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key



[16:53:22] skipping block at line 16838: 'BEGIN ATOM'
[16:53:22] skipping block at line 16840: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key



[16:53:22] ERROR: Unknown element(s): *

[16:53:22] Invalid InChI prefix in generating InChI Key





[16:53:22] skipping block at line 98: 'BEGIN ATOM'
[16:53:22] skipping block at line 100: 'BEGIN BOND'
[16:53:22] ERROR: Empty structure

[16:53:22] Invalid InChI prefix in generating InChI Key



















[16:53:22] skipping block at line 6208: 'BEGIN ATOM'
[16:53:22] skipping block 