In [1]:
import json
import numpy as np
import pandas as pd
import os
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt

In [2]:
df = np.load('../qm9_filtered.npy', allow_pickle = True)

In [3]:
df_X = []
df_y = []

for line in df:
    num = len(line['chiral_centers'])
    if num != 1:
        continue
    
    df_X.append(line['xyz'].flatten())
    if line['chiral_centers'][0][1] == "R":
        df_y.append(0)
    else:
        df_y.append(1)
    
    
df_X = np.array(df_X)
df_y = np.array(df_y)

In [4]:
print(len(df_X), len(df_y))

22973 22973


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size = 0.2, random_state=120)

In [14]:
from xgboost import XGBClassifier

# Define the depths to search
max_depth_values = [5, 10, 20, 30]

# Initialize results list
results = []

# Iterate over different depths
for depth in max_depth_values:
    print(depth)
    clf = XGBClassifier(max_depth=depth, use_label_encoder=False, eval_metric='logloss')
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    y_train_pred = clf.predict(X_train)
    
    test_acc = accuracy_score(y_test, y_pred)
    test_f1 = f1_score(y_test, y_pred, average='weighted')
    
    results.append({"Depth": depth, "Test Accuracy": test_acc, "Test F1 Score": test_f1})

# Convert results to DataFrame
results_df = pd.DataFrame(results)

results_df

5
10
20
30


Unnamed: 0,Depth,Test Accuracy,Test F1 Score
0,5,0.697933,0.697881
1,10,0.742764,0.742768
2,20,0.749946,0.74996
3,30,0.755169,0.75518


In [15]:
results_df.to_csv('XGB_RS_class_result.csv', index=False)