In [1]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers

# Create the CartPole environment
env = gym.make('CartPole-v1')
num_states = env.observation_space.shape[0]   # e.g., 4
num_actions = env.action_space.n              # e.g., 2 (left or right)

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Step 1: Build the Policy Network
def create_policy_network():
    model = models.Sequential()
    model.add(layers.Input(shape=(num_states,)))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(num_actions, activation='softmax'))  # Output is a probability distribution
    return model

policy_network = create_policy_network()
optimizer = optimizers.Adam(learning_rate=0.01)

# Step 2: Compute discounted rewards
def compute_discounted_rewards(rewards, gamma=0.99):
    discounted = np.zeros_like(rewards, dtype=np.float32)#This creates a discounted array of zeroes with the same shape as rewards
    running_add = 0
    for t in reversed(range(len(rewards))):
        running_add = rewards[t] + gamma * running_add
        discounted[t] = running_add
    return discounted

# Step 3: Training Loop
episodes = 1000
for episode in range(episodes):
    state = env.reset()[0] if isinstance(env.reset(), tuple) else env.reset() #This returns the current state that we're in
    
    states = []
    actions = []
    rewards = []

    maximum = 0
    
    done = False
    while not done:
        state_tensor = tf.convert_to_tensor([state], dtype=tf.float32) #This converts the state to a tensor
        probs = policy_network(state_tensor) #This outputs a probability for each of the actions which we'll use to make a choice
        action = np.random.choice(num_actions, p=probs.numpy()[0])#we use the probability to randomly select an action 

        #next_state, reward, done, _, _ = env.step(action) if isinstance(env.step(action), tuple) else (*env.step(action), None, None)
        step_result = env.step(action)

        if len(step_result) == 5:
            next_state, reward, terminated, truncated, info = step_result
            done = terminated or truncated
        else:
            next_state, reward, done, info = step_result
            terminated = done
            truncated = False
        
        # Store the episode data
        states.append(state)
        actions.append(action)
        rewards.append(reward)
        state = next_state

    # Step 4: Compute returns and normalize
    returns = compute_discounted_rewards(rewards)
    returns = (returns - np.mean(returns)) / (np.std(returns) + 1e-8)

    # Step 5: Update Policy Network
    with tf.GradientTape() as tape:
        loss = 0
        for state, action, ret in zip(states, actions, returns):
            state = tf.convert_to_tensor([state], dtype=tf.float32)
            probs = policy_network(state)
            action_prob = probs[0, action]
            log_prob = tf.math.log(action_prob + 1e-8)
            loss += -log_prob * ret  # Negative for gradient ascent

    grads = tape.gradient(loss, policy_network.trainable_variables)
    optimizer.apply_gradients(zip(grads, policy_network.trainable_variables))

    # Logging
    total_reward = sum(rewards)
    
    if total_reward >= 500:
        print("CartPole solved!")
        print(f"Episode {episode+1}, Total Reward: {total_reward}")
        print(f"States: {states}")     
        break

env.close()


2025-04-30 11:12:08.308468: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  if not isinstance(terminated, (bool, np.bool8)):


CartPole solved!
Episode 139, Total Reward: 500.0
States: [array([-0.01957909, -0.01959581, -0.0114887 ,  0.01885356], dtype=float32), array([-0.019971  , -0.21455112, -0.01111163,  0.30788964], dtype=float32), array([-0.02426203, -0.01927263, -0.00495384,  0.01172321], dtype=float32), array([-0.02464748,  0.17592002, -0.00471937, -0.2825186 ], dtype=float32), array([-0.02112908, -0.0191343 , -0.01036974,  0.00867215], dtype=float32), array([-0.02151176,  0.17613482, -0.0101963 , -0.28726444], dtype=float32), array([-0.01798907, -0.01884025, -0.01594159,  0.00218531], dtype=float32), array([-0.01836587,  0.17650665, -0.01589788, -0.29548445], dtype=float32), array([-0.01483574, -0.01838508, -0.02180757, -0.00785756], dtype=float32), array([-0.01520344, -0.2131876 , -0.02196472,  0.27786583], dtype=float32), array([-0.01946719, -0.0177593 , -0.01640741, -0.02166303], dtype=float32), array([-0.01982238,  0.17759407, -0.01684067, -0.3194772 ], dtype=float32), array([-0.0162705 , -0.017284

In [13]:
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

# Load data
X, y = load_iris(return_X_y=True)

# k-Fold
kf = KFold(n_splits=5)
model = RandomForestClassifier()
scores = cross_val_score(model, X, y, cv=kf)

print("Scores:", scores)
print("Average Accuracy:", scores.mean())

# Stratified k-Fold
skf = StratifiedKFold(n_splits=5)
scores_stratified = cross_val_score(model, X, y, cv=skf)
print("Stratified Scores:", scores_stratified)


Scores: [1.         1.         0.86666667 0.93333333 0.73333333]
Average Accuracy: 0.9066666666666666
Stratified Scores: [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]


In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier

# Create sample binary classification dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]  # Needed for AUC-ROC

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.2f}")

recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.2f}")

f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.2f}")

auc = roc_auc_score(y_test, y_prob)
print(f"AUC-ROC: {auc:.2f}")


Accuracy: 0.91
Precision: 0.96
Recall: 0.86
F1 Score: 0.91
AUC-ROC: 0.93
