<a href="https://colab.research.google.com/github/bhattadeb34/Claude-GPT-paper/blob/main/GPT_modification_just_plots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install rdkit

In [None]:
!pip install ase

In [None]:
!git clone https://github.com/bhattadeb34/Claude-GPT-paper.git

In [None]:
import os
import pandas as pd
import sys
import json
data_home = '/content/drive/MyDrive/FACT_EFRC'
# Update to include the correct path to your custom modules
functions_folder_path='/content/'
dependent_functions_path = os.path.join(functions_folder_path, 'Claude-GPT-paper', 'dependent_functions')
sys.path.append(dependent_functions_path)
from loading_roar_colab_results import load_json_as_dict, map_smiles_to_latent_df

from paper_plotting_results_notebook import (
    generate_prompt_table,
    plot_latent_space_averaged_by_prompt_grid,
    plot_stacked_plots
)

In [None]:
def setup_paths(data_home,overall_results_filename):
    paths = {
        "common_keys_path": os.path.join(data_home, 'Claude-GPT-paper','fact-dropbox/zinc/tranches/out/IdentifyCommonMFPKeys-WriteAllMorganFingerprints-ConcatCSV-2D-AK-AKEC-000.json'),
        "pretrained_pca_path": os.path.join(data_home,'Claude-GPT-paper', 'fact-dropbox/zinc/tranches/out/StructureEmbeddingMany-TransformMorganFingerprints-WriteAllMorganFingerprints-ConcatCSV-2D-AK-AKEC-009.lzma'),
        "overall_results_path": os.path.join(data_home, 'Claude-GPT-paper', 'out', overall_results_filename),
        "base_save_path": os.path.join(data_home, 'Claude-GPT-paper', 'out', 'Figures')
    }
    return paths
def load_json_as_dict(filename):
    """
    Loads a JSON file into a Python dictionary.

    Parameters:
    - filename (str): The name of the JSON file to be loaded.

    Returns:
    - dict: The Python dictionary loaded from the JSON file.
    """
    with open(filename, 'r') as file:
        data = json.load(file)
    return data

def load_data(overall_results_path, radius, common_keys_path, pretrained_pca_path):
    overall_results = load_json_as_dict(overall_results_path)

    # Calculate the path to the updated smiles_latent_map
    smiles_latent_map_path = os.path.join(os.path.dirname(overall_results_path), "smiles_latent_map_updated.json")

    # Load the updated smiles_latent_map
    #smiles_latent_map = load_json_as_dict(smiles_latent_map_path)

    # Previous way of loading smiles_latent_map, commented out for future reference
    smiles_latent_map = map_smiles_to_latent_df(overall_results, radius, common_keys_path, pretrained_pca_path)

    return overall_results, smiles_latent_map


def generate_all_plots(overall_results, smiles_latent_map, base_save_path, data_home,quantities):

    fname = os.path.join(data_home,'Claude-GPT-paper', 'fact-dropbox', 'zinc', 'tranches', 'out', 'StructureEmbeddingMany-TransformMorganFingerprints-WriteAllMorganFingerprints-ConcatCSV-2D-AK-AKEC-009.csv')
    plot_latent_space_averaged_by_prompt_grid(overall_results, smiles_latent_map, fname,  'distances_latentP_space.pdf')
    plot_stacked_plots(overall_results, smiles_latent_map, quantities,  'stacked_subplots_GPT.pdf')
    generate_prompt_table(overall_results, smiles_latent_map, word_limit_per_line=6, save_path='prompt_table.pdf')

if __name__ == '__main__':
    overall_results_filename='claude_overall_results_A-H_2024-04-11_18-36-01.json'
    quantities = ['validity_ratio', 'chemical_diversity']

    paths = setup_paths(data_home,overall_results_filename)
    overall_results, smiles_latent_map = load_data(paths['overall_results_path'], 2, paths['common_keys_path'], paths['pretrained_pca_path'])
    generate_all_plots(overall_results, smiles_latent_map, paths['base_save_path'],data_home,quantities)


# Homo lumo energies

In [None]:
from homo_lumo_energies import (process_and_analyze_homo_lumo_energy_data,
                                plot_combined_homo_lumo_energy_differences)

In [None]:

A_H_overall_results_filename = 'claude_overall_results_A-H_2024-04-11_18-36-01.json'
EWG_overall_results_filename = 'claude_overall_results_EWG_hickner2024-04-11_02-08-21.json'
EDG_overall_results_filename = 'claude_overall_results_EDG_hickner2024-04-12_11-13-35.json'
allowed_keys = ['A', 'B', 'C', 'D']

# Processing for A-H dataset
A_H_results = process_and_analyze_homo_lumo_energy_data(data_home, A_H_overall_results_filename, allowed_keys=allowed_keys)
print("Processed A-H Dataset:", A_H_results)

# Processing for EWG dataset
EWG_results = process_and_analyze_homo_lumo_energy_data(data_home, EWG_overall_results_filename)
print("Processed EWG Dataset:", EWG_results)

# Processing for EWG dataset
EDG_results = process_and_analyze_homo_lumo_energy_data(data_home, EDG_overall_results_filename,
                                                        allowed_keys=None,
                                                        radius=2,
                                                        start_letter='M')
print("Processed EDG Dataset:", EDG_results)


In [None]:
homo_lumo_save_path_filename='homo_lumo'


plot_combined_homo_lumo_energy_differences(A_H_results['energy_differences_deviation_by_prompt'],
                                           EWG_results['energy_differences_deviation_by_prompt'],
                                           EDG_results['energy_differences_deviation_by_prompt'],
                                           A_H_results['filtered_prompt_mapping'],
                                           EWG_results['filtered_prompt_mapping'],
                                           EDG_results['filtered_prompt_mapping'],
                                           figsize=(16, 8),
                                           condense_percentile=0.99,
                                           save_path=homo_lumo_save_path_filename)
