# Performance of GPT models on the photoswitch classification task without fine-tuning.


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from scipy.stats import sem
from sklearn.model_selection import train_test_split

from gptchem.data import get_photoswitch_data
from gptchem.evaluator import evaluate_classification
from gptchem.extractor import ClassificationExtractor
from gptchem.formatter import ClassificationFormatter
from gptchem.querier import Querier

In [3]:
data = get_photoswitch_data()
data.head()

Unnamed: 0,SMILES,rate of thermal isomerisation from Z-E in s-1,Solvent used for thermal isomerisation rates,Z PhotoStationaryState,E PhotoStationaryState,E isomer pi-pi* wavelength in nm,Extinction,E isomer n-pi* wavelength in nm,Extinction coefficient in M-1 cm-1,Z isomer pi-pi* wavelength in nm,...,CAM-B3LYP/6-31G** DFT Z isomer pi-pi* wavelength in nm,CAM-B3LYP/6-31G** DFT Z isomer n-pi* wavelength in nm,BHLYP/6-31G* DFT E isomer pi-pi* wavelength in nm,BHLYP/6-31G* DFT E isomer n-pi* wavelength in nm,BHLYP/6-31G* Z isomer pi-pi* wavelength in nm,BHLYP/6-31G* DFT Z isomer n-pi* wavelength in nm,name,selfies,wavelength_cat,inchi
0,C[N]1C=CC(=N1)N=NC2=CC=CC=C2,2.1e-07,MeCN,76.0,72.0,310.0,1.67,442.0,0.0373,290.0,...,,,,,,,,[C][NH0][N][=N][C][=Branch1][Ring2][=N][Ring1]...,very small,InChI=1S/C10H10N4/c1-14-8-7-10(13-14)12-11-9-5...
1,C[N]1C=NC(=N1)N=NC2=CC=CC=C2,3.8e-07,MeCN,90.0,84.0,310.0,1.87,438.0,0.0505,272.0,...,,,,,,,,[C][NH0][C][=N][C][=Branch1][Ring2][=N][Ring1]...,very small,InChI=1S/C9H9N5/c1-14-7-10-9(13-14)12-11-8-5-3...
2,C[N]1C=C(C)C(=N1)N=NC2=CC=CC=C2,1.5e-06,MeCN,96.0,87.0,325.0,1.74,428.0,0.0612,286.0,...,,,,,,,,[C][NH0][C][=C][Branch1][C][C][C][=Branch1][Ri...,very small,InChI=1S/C11H12N4/c1-9-8-15(2)14-11(9)13-12-10...
3,C[N]1C=C(C=N1)N=NC2=CC=CC=C2,7.6e-09,MeCN,98.0,70.0,328.0,1.66,417.0,0.064,275.0,...,256.0,401.0,,,,,,[C][NH0][C][=C][Branch1][Branch1][C][=N][Ring1...,very small,InChI=1S/C10H10N4/c1-14-8-10(7-11-14)13-12-9-5...
4,C[N]1N=C(C)C(=C1C)N=NC2=CC=CC=C2,7.7e-07,MeCN,98.0,98.0,335.0,2.27,425.0,0.0963,296.0,...,279.0,449.0,,,,,"phenyl-(1,3,5-trimethylpyrazol-4-yl)diazene",[C][NH0][N][=C][Branch1][C][C][C][=Branch1][Br...,very small,InChI=1S/C12H14N4/c1-9-12(10(2)16(3)15-9)14-13...


In [4]:
formatter = ClassificationFormatter(
    representation_column="SMILES",
    label_column="E isomer pi-pi* wavelength in nm",
    property_name="transition wavelength",
    num_classes=2,
    qcut=True,
)

In [5]:
formatter

gptchem.formatter.ClassificationFormatter(representation_column='SMILES', label_column='E isomer pi-pi* wavelength in nm', property_name='transition wavelength', num_classes=2, qcut=True)

In [7]:
formatted = formatter(data)

In [8]:
formatted.head()

Unnamed: 0,prompt,completion,label,representation
0,What is the transition wavelength of C[N]1C=CC...,0@@@,0,C[N]1C=CC(=N1)N=NC2=CC=CC=C2
1,What is the transition wavelength of C[N]1C=NC...,0@@@,0,C[N]1C=NC(=N1)N=NC2=CC=CC=C2
2,What is the transition wavelength of C[N]1C=CC...,0@@@,0,C[N]1C=CC(=N1)N=NC2=CC=CC=C2
3,What is the transition wavelength of C[N]1C=C(...,0@@@,0,C[N]1C=C(C)C(=N1)N=NC2=CC=CC=C2
4,What is the transition wavelength of C[N]1C=C(...,0@@@,0,C[N]1C=C(C=N1)N=NC2=CC=CC=C2


With `qcut=True`, the labels are split into two classes with equal frequency.


In [9]:
formatted["label"].value_counts()

0    196
1    196
Name: label, dtype: int64

## Now query the model a couple of times for statistics


### Run it once manually


In [10]:
train, test = train_test_split(
    formatted, test_size=0.2, random_state=42, stratify=formatted["label"]
)

In [11]:
querier = Querier("ada")

In [12]:
querier

gptchem.querier.Querier(modelname='ada', max_tokens=10)

In [13]:
query_result = querier.query(test)

In [14]:
query_result

{'choices': ['\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nThe transition wavelength of OCC%25',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nA. 1.5 nm\n\n',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nA.\n\nB.\n\n',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\n#\n\n#\n\n#\n',
  '?C%11C%10C%10',
  '\n\nA.\n\nB.\n\n',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nA.\n\nB.\n\n',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '?\n\nThe transition wavelength of CCN(',
  '?C=C7?C=C7',
  '\n\nA. [H]C1=',
  '?##?##?##?##?##',
  '?##?##?##?##?##',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nAnswer:\n\nCC(C=',
  '\n\nAnswer:\n\nThe transition wavelength of',
  '\n\nThe transition wavelength of OC%38=',
  '\n\nAnswer:\n\nThe transition 

In [15]:
extractor = ClassificationExtractor()

In [17]:
predictions = extractor(query_result)

In [18]:
predictions

(#79) [None,None,None,None,None,None,None,None,None,None...]

In [26]:
evaluate_classification(test["label"], predictions)



{'accuracy': 0.0,
 'acc_macro': 0.3333333333333333,
 'racc': 0.0,
 'kappa': 0.0,
 'confusion_matrix': pycm.ConfusionMatrix(classes: ['0', '1', 'None']),
 'f1_macro': 0.0,
 'f1_micro': 0.0,
 'frac_valid': 0.0,
 'all_y_true': (#79) [0,1,0,0,1,1,0,1,1,1...],
 'all_y_pred': (#79) [None,None,None,None,None,None,None,None,None,None...],
 'valid_indices': [],
 'might_have_rounded_floats': False}

### Now, run it a couple of times to get statistics


In [30]:
def split_query_eval(num_rounds: int = 10) -> pd.DataFrame:
    collected_metrics = []
    for i in range(num_rounds):
        train, test = train_test_split(
            formatted, test_size=0.2, random_state=i, stratify=formatted["label"]
        )
        query_result = querier.query(test)
        predictions = extractor(query_result)
        metrics = evaluate_classification(test["label"], predictions)
        collected_metrics.append(metrics)

    return pd.DataFrame(collected_metrics)

In [31]:
collected_metrics = split_query_eval()



Already the log messages show that in none of the 10 rounds we get a valid prediction.

In [33]:
collected_metrics.head()

Unnamed: 0,accuracy,acc_macro,racc,kappa,confusion_matrix,f1_macro,f1_micro,frac_valid,all_y_true,all_y_pred,valid_indices,might_have_rounded_floats
0,0.0,0.333333,0.0,0.0,"((0, {'0': 0, '1': 0, 'None': 40}), (1, {'0': ...",0.0,0.0,0.0,"[1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, ...","[None, None, None, None, None, None, None, Non...",[],False
1,0.0,0.333333,0.0,0.0,"((0, {'0': 0, '1': 0, 'None': 39}), (1, {'0': ...",0.0,0.0,0.0,"[0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, ...","[None, None, None, None, None, None, None, Non...",[],False
2,0.0,0.333333,0.0,0.0,"((0, {'0': 0, '1': 0, 'None': 40}), (1, {'0': ...",0.0,0.0,0.0,"[1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, ...","[None, None, None, None, None, None, None, Non...",[],False
3,0.0,0.333333,0.0,0.0,"((0, {'0': 0, '1': 0, 'None': 40}), (1, {'0': ...",0.0,0.0,0.0,"[1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, ...","[None, None, None, None, None, None, None, Non...",[],False
4,0.0,0.333333,0.0,0.0,"((0, {'0': 0, '1': 0, 'None': 40}), (1, {'0': ...",0.0,0.0,0.0,"[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, ...","[None, None, None, None, None, None, None, Non...",[],False


In [34]:
collected_metrics["accuracy"].mean()

0.0