In [1]:
import pandas as pd
import numpy as np

ages = np.arange(15, 85, 5)
genders = ['Male', 'Female']

drug_types = ['Drug A', 'Drug B', 'Drug C']
durations = [30, 60]


data = pd.DataFrame(columns=['Age', 'Gender', 'Drug Type', 'Duration', 'Response'])
for i in range(1000):

  age = np.random.choice(ages)
  gender = np.random.choice(genders)

  # Generate random drug type and duration based on dependencies
  if age < 45:
    if gender == 'Female':
      drug_type = 'Drug B'
      duration = np.random.choice([30, 60])
    else:
      drug_type = 'Drug A'
      duration = np.random.choice([30, 60])
  else:
    if gender == 'Female':
      drug_type = 'Drug A'
      duration = np.random.choice([30, 60])
    else:
      drug_type = 'Drug C'
      duration = np.random.choice([30, 60])

  # Generate response based on correlation rules
  if age < 45 and gender == 'Female' and drug_type == 'Drug B' and duration == 60:
    response = 1
  elif age >= 45 and gender == 'Male' and drug_type == 'Drug C' and duration == 30:
    response = 1
  elif age >= 45 and gender == 'Female' and drug_type == 'Drug A' and duration == 30:
    response = 1
  elif age < 45 and gender == 'Male' and drug_type == 'Drug A' and duration == 60:
    response = 1
  else:
    response = -1


  data.loc[i] = [age, gender, drug_type, duration, response]

patient_info_better = data


In [2]:
patient_info_better

Unnamed: 0,Age,Gender,Drug Type,Duration,Response
0,20,Male,Drug A,30,-1
1,15,Male,Drug A,30,-1
2,30,Male,Drug A,30,-1
3,20,Male,Drug A,30,-1
4,45,Male,Drug C,60,-1
...,...,...,...,...,...
995,15,Male,Drug A,60,1
996,60,Male,Drug C,60,-1
997,45,Male,Drug C,30,1
998,25,Male,Drug A,30,-1


In [3]:
reward_episode = list()
def QLearning(data, num_episodes, alpha, discount, epsilon):

  states = np.unique(data[['Age', 'Gender']].values, axis=0)
  actions = np.unique(data[['Drug Type', 'Duration']].values, axis=0)
  epsilon_decay = 0.001
  min_epsilon = 0.01

  Q = np.zeros((len(states), len(actions)))

  for episode in range(num_episodes):

    state_idx = np.random.choice(len(states))
    state = states[state_idx]
    total_rewards = 0

    done = False
    for i in range(100):
      #epsilon-greedy policy
      if np.random.rand() < epsilon:
        action_idx = np.random.choice(len(actions))
      else:
        action_values = Q[state_idx, :]
        max_idx = np.where(action_values == np.max(action_values))[0]
        action_idx = np.random.choice(max_idx)


      action = actions[action_idx]
      next_state_idx = np.where((data[['Age', 'Gender']].values == state).all(axis=1) & (data[['Drug Type', 'Duration']].values == action).all(axis=1))[0]

      if len(next_state_idx) > 0:
        next_state_idx = next_state_idx[0]
        if next_state_idx > len(states)-1: #making sure next_state_idx does not go out of bounds
          break
        else:
          reward = data.iloc[next_state_idx, -1]
          total_rewards += reward
          reward_episode.append(total_rewards)
          next_state = data.iloc[next_state_idx, :2].values
          Q[state_idx, action_idx] += alpha * (reward + discount * np.max(Q[next_state_idx, :]) - Q[state_idx, action_idx])
          state_idx = np.where((states == next_state).all(axis=1))[0][0]
          state = states[state_idx]
      else:
        reward = 0
        next_state = None
    if done:
      break

      epsilon = max(min_epsilon, np.exp(-epsilon_decay*episode))

  return Q

#preprocess data
def preprocess_data(data):
  genders = np.unique(data['Gender'])
  drugs = np.unique(data['Drug Type'])

  for i in range(len(genders)):
    if genders[i].lower() == 'male':
      data['Gender'][data['Gender'] == genders[i]] = 0
    else:
      data['Gender'][data['Gender'] == genders[i]] = 1

  for i in range(len(drugs)):
    if drugs[i] == 'Drug A':
      data['Drug Type'][data['Drug Type'] == drugs[i]] = 0
    elif drugs[i] == 'Drug B':
      data['Drug Type'][data['Drug Type'] == drugs[i]] = 1
    elif drugs[i] == 'Drug C':
      data['Drug Type'][data['Drug Type'] == drugs[i]] = 2

  data['Age'] = data['Age'].astype(float)
  data['Gender'] = data['Gender'].astype(int)
  data['Drug Type'] = data['Drug Type'].astype(int)
  data['Duration'] = data['Duration'].astype(float)

  return data

from sklearn.model_selection import train_test_split

def accuracy_calc(data, num_episodes, alpha, discount, epsilon, test_size=0.2, random_state=42):
  #data = preprocess_data(data)
  states = np.unique(data[['Age', 'Gender']].values, axis=0)
  actions = np.unique(data[['Drug Type', 'Duration']].values, axis=0)

  X_train, X_test, y_train, y_test = train_test_split(data[['Age', 'Gender', 'Drug Type', 'Duration']], data['Response'], test_size=test_size, random_state=random_state)
  Q = QLearning(X_train.join(y_train), num_episodes=num_episodes, alpha=alpha, discount=discount, epsilon=epsilon)

  rewards = []
  for i in range(X_test.shape[0]):
    state_idx = np.where((states == X_test.iloc[i, :2].values).all(axis=1))[0][0]
    action_values = Q[state_idx, :]
    action_idx = np.where(action_values == np.max(action_values))[0][0]
    action = actions[action_idx]
    next_state_idx = np.where((X_train[['Age', 'Gender', 'Drug Type', 'Duration']].values == X_test.iloc[i, :].values).all(axis=1))[0][0]
    reward = y_train.iloc[next_state_idx]
    rewards.append(reward)
  accuracy = np.mean(np.array(rewards) == y_test.values)
  print('Accuracy:', accuracy)

def train(data, num_episodes, alpha, discount, epsilon):
  data = preprocess_data(data)
  Q = QLearning(data, num_episodes=num_episodes, alpha=alpha, discount = discount, epsilon=epsilon)
  return Q

In [4]:
num_episodes = 10000
alpha = 0.1
discount = 0.9
epsilon = 0.5

In [None]:
Q = train(patient_info_better, num_episodes, alpha, discount, epsilon)
rewards = reward_episode

In [6]:
accuracy_calc(patient_info_better, num_episodes, alpha, discount, epsilon, test_size=0.2, random_state=42)

Accuracy: 1.0


In [8]:
def test(age, gender, Q):
  drugs = np.unique(data['Drug Type'])

  if gender.lower() == "male":
    gender = 0
  elif gender.lower() == "female":
    gender = 1
  else:
    raise ValueError("Gender should be 'Male' or 'Female'")

  if age<20 or age > 80:
    raise ValueError("Age should be between 20 and 80")
  else:
      age = round(age / 5) * 5


  # Define example state
  state = np.array([age, gender])

  states = np.unique(data[['Age', 'Gender']].values, axis=0)
  state_idx = np.where((states == state).all(axis=1))[0][0]
  actions = np.unique(data[['Drug Type', 'Duration']].values, axis=0)

  # Get recommended action
  action_idx = np.argmax(Q[state_idx, :])
  action = actions[action_idx]

  # Define drug code to name mapping
  drug_names = {0: 'Drug A', 1: 'Drug B', 2: 'Drug C'}

  # Convert action to drug type and duration
  drug_type = drug_names[action[0]]
  duration = action[1]

  # Output recommended drug type and duration
  print("Recommended drug type: ", drug_type)
  print("Recommended duration: ", duration)

In [15]:
test(age=40, gender='male', Q=Q)

Recommended drug type:  Drug A
Recommended duration:  30.0
