# DREAM Target 2035: Step 1 Model – Multi-Fingerprint + Imbalanced Validation + Submission

In [None]:
!pip install autogluon pandas numpy pyarrow

In [None]:
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split

## Load and Prepare Training Data

In [None]:
df = pd.read_parquet('data/WDR91.parquet')
fps = ['ECFP4', 'ECFP6', 'FCFP4', 'FCFP6', 'MACCS', 'RDK', 'AVALON', 'ATOMPAIR', 'TOPTOR']
X = np.hstack([np.stack(df[fp].values) for fp in fps])
y = df['TARGET_VALUE'].values

## Create Imbalanced Validation Set (~0.4% active)

In [None]:
# Separate actives/inactives
X_pos, X_neg = X[y == 1], X[y == 0]
y_pos, y_neg = y[y == 1], y[y == 0]
n_val_pos = min(500, len(X_pos))
n_val_neg = n_val_pos * 250
X_val_pos, _, y_val_pos, _ = train_test_split(X_pos, y_pos, test_size=(1 - n_val_pos / len(X_pos)), random_state=42)
X_val_neg, _, y_val_neg, _ = train_test_split(X_neg, y_neg, test_size=(1 - n_val_neg / len(X_neg)), random_state=42)
X_val = np.vstack([X_val_pos, X_val_neg])
y_val = np.hstack([y_val_pos, y_val_neg])
X_train = np.vstack([X_pos, X_neg])
y_train = np.hstack([y_pos, y_neg])
train_df = pd.DataFrame(X_train)
train_df['Activity'] = y_train
val_df = pd.DataFrame(X_val)
val_df['Activity'] = y_val

## Train AutoGluon Model

In [None]:
predictor = TabularPredictor(label='Activity', eval_metric='roc_auc').fit(train_df, time_limit=600)

## Load Test Set and Generate Submission

In [None]:
test_df = pd.read_parquet('data/Step1_TestData_Target2035.parquet')
X_test = np.hstack([np.stack(test_df[fp].values) for fp in fps])
X_test_df = pd.DataFrame(X_test)
random_ids = test_df['RandomID'].values

In [None]:
# Predict scores
scores = predictor.predict_proba(X_test_df)[1]
submission = pd.DataFrame({'RandomID': random_ids, 'Score': scores})
submission = submission.sort_values('Score', ascending=False).reset_index(drop=True)
submission['Sel_200'] = 0
submission['Sel_200'].iloc[:200] = 1
submission['Sel_500'] = 0
submission['Sel_500'].iloc[:500] = 1
submission = submission[['RandomID', 'Sel_200', 'Sel_500', 'Score']]
submission.to_csv('TeamMyTeamName.csv', index=False)
submission.head()