In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Hyperparameters
state_size = None
action_size = 2  # Binary classification: BRD or no BRD
batch_size = 64
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001
memory_size = 10000
train_start = 1000
target_update_interval = 10  # Update target model every 10 episodes
training_rate = 0.01  # Add a training rate hyperparameter

def load_data():
    global state_size
    df = pd.read_excel('fulldataset.xlsx')

    # --- Data Preprocessing ---
    # Fill NaN values with the mode for object columns and median for numerical
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].fillna(df[col].mode()[0])
        else:
            df[col] = df[col].fillna(df[col].median())

    # Convert date columns to datetime and extract features
    date_columns = ['Date', 'Enrolldate', 'BIRTHDATE']
    for col in date_columns:
        df[col] = pd.to_datetime(df[col], errors='coerce')
        df[f'{col}_year'] = df[col].dt.year
        df[f'{col}_month'] = df[col].dt.month
        df[f'{col}_day'] = df[col].dt.day
        df.drop(col, axis=1, inplace=True)

    # Encode categorical variables
    categorical_columns = df.select_dtypes(include=['object']).columns
    for col in categorical_columns:
        df[col] = pd.factorize(df[col])[0]

    # Define target and features
    target_column = 'BRD_Total'
    X = df.drop(columns=[target_column]).values
    y = df[target_column].values

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Store state size
    state_size = X_scaled.shape[1]
    print(f"Shape of X: {X_scaled.shape}")

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

    return X_train, y_train, X_test, y_test

class DQNModel(tf.keras.Model):
    def __init__(self):
        super(DQNModel, self).__init__()
        self.dense1 = layers.Dense(128, activation='relu', kernel_initializer='he_uniform')  # Increased units, better initialization
        self.dense2 = layers.Dense(128, activation='relu', kernel_initializer='he_uniform')  # Increased units, better initialization
        self.output_layer = layers.Dense(action_size, activation='linear', kernel_initializer='he_uniform')  # Linear output

    def call(self, state):
        x = self.dense1(state)
        x = self.dense2(x)
        return self.output_layer(x)

class ReplayBuffer:
    def __init__(self, maxlen=memory_size):
        self.buffer = deque(maxlen=maxlen)

    def store(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        indices = np.random.choice(len(self.buffer), batch_size, replace=False)  # More efficient sampling
        samples = [self.buffer[idx] for idx in indices]
        return zip(*samples)

    def size(self):
        return len(self.buffer)

def train_dqn(model, target_model, buffer, optimizer, X_train, y_train):  # Pass y_train as well
    if buffer.size() < train_start:
        return

    # Sample a batch of experiences from the replay buffer
    states, actions, rewards, next_states, dones = buffer.sample(batch_size)
    states = np.array(states).reshape(batch_size, -1)  # Reshape states
    next_states = np.array(next_states).reshape(batch_size, -1)  # Reshape next states
    rewards = np.array(rewards, dtype=np.float32).reshape(-1, 1)
    dones = np.array(dones, dtype=np.float32).reshape(-1, 1)

    # Convert actions to numpy array
    actions = np.array(actions, dtype=np.int32)

    with tf.GradientTape() as tape:
        # Compute Q-values for the current states
        q_values = model(states)  # Shape: (batch_size, action_size)

        # Compute Q-values for the actions taken
        action_masks = tf.one_hot(actions, action_size)  # Shape: (batch_size, action_size)
        q_values_for_actions = tf.reduce_sum(q_values * action_masks, axis=1, keepdims=True)  # Shape: (batch_size, 1)

        # Compute target Q-values
        next_q_values = target_model(next_states)  # Shape: (batch_size, action_size)
        max_next_q_values = tf.reduce_max(next_q_values, axis=1, keepdims=True)  # Shape: (batch_size, 1)
        targets = rewards + gamma * max_next_q_values * (1 - dones)  # Shape: (batch_size, 1)

        # Compute the loss
        loss = tf.reduce_mean(tf.square(targets - q_values_for_actions))

    # Compute gradients and apply them
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

def select_action(model, state, epsilon):
    if np.random.rand() <= epsilon:
        return random.randint(0, action_size - 1)  # Explore
    else:
        q_values = model(state.reshape(1, -1))  # Exploit: Choose action with highest Q-value
        return np.argmax(q_values.numpy())

def calculate_accuracy(model, X_test, y_test):
    predictions = []
    for state in X_test:
        q_values = model(state.reshape(1, -1))
        action = np.argmax(q_values.numpy())
        predictions.append(action)
    return np.mean(np.array(predictions) == np.array(y_test))

class CattleEnvironment:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.index = 0
        self.num_samples = len(X)  # Store the number of samples

    def reset(self):
        self.index = 0
        return self.X[self.index]

    def step(self, action):
        if self.index >= self.num_samples:
            print("Episode is already done. Please reset.")
            return None, 0, True

        state = self.X[self.index]
        brd_label = self.y[self.index]
        reward = 1 if action == brd_label else 0  # Positive reward for correct action
        self.index += 1
        done = self.index >= self.num_samples  # Check if the episode is done
        next_state = self.X[self.index] if not done else state  # Return the same state if done
        return next_state, reward, done

def train():
    global epsilon
    X_train, y_train, X_test, y_test = load_data()  # Load training and testing data

    if X_train is None or y_train is None or X_test is None or y_test is None:
        print("Error loading data.")
        return

    # Initialize environment, model, and target model
    env = CattleEnvironment(X_train, y_train)  # Pass training data to the environment
    model = DQNModel()
    target_model = DQNModel()

    # Build the model by passing a dummy state
    dummy_state = np.zeros((1, state_size))
    model(dummy_state)
    target_model(dummy_state)

    # Copy weights from model to target_model
    target_model.set_weights(model.get_weights())

    buffer = ReplayBuffer()
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    episodes = 200  # Reduced the number of episodes
    best_accuracy = 0.0  # Keep track of the best accuracy

    # Store results in a list
    results = []

    for episode in range(episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            action = select_action(model, state, epsilon)
            next_state, reward, done = env.step(action)

            # Only store and train if the returned values are valid
            if next_state is not None:
                buffer.store(state, action, reward, next_state, done)
                total_reward += reward
                train_dqn(model, target_model, buffer, optimizer, X_train, y_train)  # Pass the entire training set
                state = next_state

        # Update epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

        # Update target model weights every target_update_interval episodes
        if episode % target_update_interval == 0:
            target_model.set_weights(model.get_weights())

        # Calculate accuracy on the test set
        accuracy = calculate_accuracy(model, X_test, y_test)
        results.append([episode, accuracy * 100, epsilon, total_reward])  # Append results

        # Save the model if it's the best so far
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            print(f"New best accuracy: {best_accuracy:.4f}, saving model...")

    # Convert results to DataFrame and print
    results_df = pd.DataFrame(results, columns=['Episode', 'Accuracy (%)', 'Epsilon', 'Total Reward'])
    print("Training complete!")
    print(f"Best Accuracy on Test Set: {best_accuracy * 100:.2f}%")
    print(results_df)

if __name__ == "__main__":
    train()


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Hyperparameters
state_size = None
action_size = 2  # Binary classification: BRD or no BRD
batch_size = 64
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001
memory_size = 10000
train_start = 1000
target_update_interval = 10  # Update target model every 10 episodes
training_rate = 0.01  # Add a training rate hyperparameter

def load_data():
    global state_size
    df = pd.read_csv('metadata.csv')

    # --- Data Preprocessing ---
    # 1. Handle 'status' for BRD label:
    # Correctly fill NaN values in 'status' column
    df['status'] = df['status'].fillna('not_removed')

    # Map 'status' to numerical values
    status_mapping = {'removed': 1, 'not_removed': 0}
    df['status'] = df['status'].map(status_mapping)

    # 2. Fill missing 'clade' based on the mode:
    df['clade'] = df['clade'].fillna(df['clade'].mode()[0])

    # 3. Convert 'collection_date' to datetime, handling missing values:
    date_formats = ["%b-%y", "%m-%Y", "%d-%m-%Y", "%m-%d-%y", "%b-%Y"]

    def convert_to_datetime(date_string):
        for fmt in date_formats:
            try:
                return pd.to_datetime(date_string, format=fmt)
            except (ValueError, TypeError):
                continue
        return pd.NaT

    df['collection_date'] = df['collection_date'].astype(str)
    df['collection_date'] = df['collection_date'].apply(convert_to_datetime)

    # 4. Extract date features:
    df['year'] = df['collection_date'].dt.year
    df['month'] = df['collection_date'].dt.month
    df['day'] = df['collection_date'].dt.day
    df = df.drop('collection_date', axis=1)

    # 5. Drop unnecessary identifier columns:
    columns_to_drop = ['sample', 'strain', 'sample_title', 'organism', 'isolate', 'host']
    df = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

    # --- Feature Encoding ---
    # 6. Encode categorical variables:
    categorical_columns = df.select_dtypes(include=['object']).columns
    for col in categorical_columns:
        df[col] = df[col].fillna(df[col].mode()[0])  # Impute before encoding
        df[col] = pd.factorize(df[col])[0]  # Use numeric codes

    # Verify that 'status' column exists before using it as the target column
    target_column = 'status'
    if target_column not in df.columns:
        print(f"Error: Target column '{target_column}' not found in the DataFrame.")
        return None, None, None, None

    # 7. Separate features and target:
    X = df.drop(columns=[target_column]).values
    y = df[target_column].values

    # 8. Scale features:
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 9. Store state size:
    state_size = X_scaled.shape[1]
    print(f"Shape of X: {X_scaled.shape}")

    # 10. Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

    return X_train, y_train, X_test, y_test

class DQNModel(tf.keras.Model):
    def __init__(self):
        super(DQNModel, self).__init__()
        self.dense1 = layers.Dense(128, activation='relu', kernel_initializer='he_uniform')  # Increased units, better initialization
        self.dense2 = layers.Dense(128, activation='relu', kernel_initializer='he_uniform')  # Increased units, better initialization
        self.output_layer = layers.Dense(action_size, activation='linear', kernel_initializer='he_uniform')  # Linear output

    def call(self, state):
        x = self.dense1(state)
        x = self.dense2(x)
        return self.output_layer(x)

class ReplayBuffer:
    def __init__(self, maxlen=memory_size):
        self.buffer = deque(maxlen=maxlen)

    def store(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        indices = np.random.choice(len(self.buffer), batch_size, replace=False)  # More efficient sampling
        samples = [self.buffer[idx] for idx in indices]
        return zip(*samples)

    def size(self):
        return len(self.buffer)

def train_dqn(model, target_model, buffer, optimizer, X_train):
    if buffer.size() < train_start:
        return

    # Sample a batch of experiences from the replay buffer
    states, actions, rewards, next_states, dones = buffer.sample(batch_size)
    states = np.array(states).reshape(batch_size, -1)  # Reshape states
    next_states = np.array(next_states).reshape(batch_size, -1)  # Reshape next states
    rewards = np.array(rewards, dtype=np.float32).reshape(-1, 1)
    dones = np.array(dones, dtype=np.float32).reshape(-1, 1)

    # Convert actions to numpy array
    actions = np.array(actions, dtype=np.int32)

    with tf.GradientTape() as tape:
        # Compute Q-values for the current states
        q_values = model(states)  # Shape: (batch_size, action_size)

        # Compute Q-values for the actions taken
        action_masks = tf.one_hot(actions, action_size)  # Shape: (batch_size, action_size)
        q_values_for_actions = tf.reduce_sum(q_values * action_masks, axis=1, keepdims=True)  # Shape: (batch_size, 1)

        # Compute target Q-values
        next_q_values = target_model(next_states)  # Shape: (batch_size, action_size)
        max_next_q_values = tf.reduce_max(next_q_values, axis=1, keepdims=True)  # Shape: (batch_size, 1)
        targets = rewards + gamma * max_next_q_values * (1 - dones)  # Shape: (batch_size, 1)

        # Compute the loss
        loss = tf.reduce_mean(tf.square(targets - q_values_for_actions))

    # Compute gradients and apply them
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

def select_action(model, state, epsilon):
    if np.random.rand() <= epsilon:
        return random.randint(0, action_size - 1)  # Explore
    else:
        q_values = model(state.reshape(1, -1))  # Exploit: Choose action with highest Q-value
        return np.argmax(q_values.numpy())

def calculate_accuracy(model, X_test, y_test):
    predictions = []
    for state in X_test:
        q_values = model(state.reshape(1, -1))
        action = np.argmax(q_values.numpy())
        predictions.append(action)
    return np.mean(np.array(predictions) == np.array(y_test))

class CattleEnvironment:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.index = 0
        self.num_samples = len(X)  # Store the number of samples

    def reset(self):
        self.index = 0
        return self.X[self.index]

    def step(self, action):
        if self.index >= self.num_samples:
            print("Episode is already done. Please reset.")
            return None, 0, True

        state = self.X[self.index]
        brd_label = self.y[self.index]
        reward = 1 if action == brd_label else 0  # Positive reward for correct action
        self.index += 1
        done = self.index >= self.num_samples  # Check if the episode is done
        next_state = self.X[self.index] if not done else state  # Return the same state if done
        return next_state, reward, done

def train():
    global epsilon
    X_train, y_train, X_test, y_test = load_data()  # Load training and testing data

    if X_train is None or y_train is None or X_test is None or y_test is None:
        print("Error loading data.")
        return

    # Initialize environment, model, and target model
    env = CattleEnvironment(X_train, y_train)  # Pass training data to the environment
    model = DQNModel()
    target_model = DQNModel()

    # Build the model by passing a dummy state
    dummy_state = np.zeros((1, state_size))
    model(dummy_state)
    target_model(dummy_state)

    # Copy weights from model to target_model
    target_model.set_weights(model.get_weights())

    buffer = ReplayBuffer()
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    episodes = 200  # Reduced the number of episodes
    best_accuracy = 0.0  # Keep track of the best accuracy

    # Store results in a list
    results = []

    for episode in range(episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            action = select_action(model, state, epsilon)
            next_state, reward, done = env.step(action)

            # Only store and train if the returned values are valid
            if next_state is not None:
                buffer.store(state, action, reward, next_state, done)
                total_reward += reward
                train_dqn(model, target_model, buffer, optimizer, X_train)  # Pass the entire training set
                state = next_state

        # Update epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

        # Update target model weights every target_update_interval episodes
        if episode % target_update_interval == 0:
            target_model.set_weights(model.get_weights())

        # Calculate accuracy on the test set
        accuracy = calculate_accuracy(model, X_test, y_test)
        results.append([episode, accuracy * 100, epsilon, total_reward])  # Append results

        # Save the model if it's the best so far
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            print(f"New best accuracy: {best_accuracy:.4f}, saving model...")

    # Convert results to DataFrame and print
    results_df = pd.DataFrame(results, columns=['Episode', 'Accuracy (%)', 'Epsilon', 'Total Reward'])
    print("Training complete!")
    print(f"Best Accuracy on Test Set: {best_accuracy * 100:.2f}%")
    print(results_df)

if __name__ == "__main__":
    train()
