In [None]:
!pip install gym==0.18.3
!pip install numpy==1.21.2
!pip install foolbox==3.3.3
!pip install torch==1.9.0
!pip install torchvision==0.10.0
!pip install stable-baselines3==1.2.0
!pip install scipy==1.7.1


In [None]:
pip install autograd

In [None]:
import gym
import numpy as np
import foolbox as fb
import torch
import torchvision.models as models
from stable_baselines3 import PPO
from gym import spaces
from scipy.ndimage import gaussian_filter

class AdversarialEnvironment(gym.Env):
    def __init__(self, fmodel, adversarial_attacks, defense_mechanisms):
        super(AdversarialEnvironment, self).__init__()

        self.fmodel = fmodel
        self.adversarial_attacks = adversarial_attacks
        self.defense_mechanisms = defense_mechanisms
        self.target_class = np.random.randint(0, self.fmodel.num_classes)


        self.input_shape = (224, 224, 3)
        self.observation_space = spaces.Box(low=0, high=255, shape=self.input_shape, dtype=np.float32)
        self.action_space = spaces.Discrete(len(adversarial_attacks) * len(defense_mechanisms))

    def step(self, action):
      attack_idx = action // len(self.defense_mechanisms)
      defense_idx = action % len(self.defense_mechanisms)

      attack = self.adversarial_attacks[attack_idx]

      # Normalize the state to be within the bounds (0, 1)
      normalized_state = self.state.reshape(1, *self.input_shape) / 255.0
      epsilon = 0.03
      perturbed_input = self.fmodel.apply_attack(attack, normalized_state, criterion=fb.criteria.Misclassification(self.target_class), epsilons=epsilon)

      defense = self.defense_mechanisms[defense_idx]
      defended_input = defense(self.fmodel.get_image(perturbed_input))

      prediction = self.fmodel.predictions(defended_input)
      success = int(np.argmax(prediction) == self.target_class)

      reward = -success # Negative reward for successful attack

      self.state = defended_input.flatten()

      done = False

      return self.state, reward, done, {}


    def reset(self):
        # Example: Random image from the dataset (replace with actual data loading)
        initial_state = np.random.rand(*self.input_shape) * 255
        self.state = initial_state
        self.target_class = np.random.randint(0, 1000) # Example target class for ImageNet
        return self.state

# Load ResNet50 model using PyTorch
model = models.resnet50(pretrained=True).eval()
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = fb.models.PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing)

# Define adversarial attacks using Foolbox
adversarial_attacks = [
    fb.attacks.LinfPGD(),
    fb.attacks.LinfDeepFoolAttack(),
    fb.attacks.LinfBasicIterativeAttack()
]

# Define defense mechanisms
def input_transformation(x):
    return gaussian_filter(x, sigma=1)

def adversarial_training(x):
    # Replace with actual robust model
    robust_model = fmodel
    return robust_model(x)

defense_mechanisms = [
    lambda x: x,
    input_transformation,
    adversarial_training
]

# Create the AdversarialEnvironment
env = AdversarialEnvironment(fmodel, adversarial_attacks, defense_mechanisms)

# Train the RL agent using PPO from Stable Baselines
agent = PPO("MlpPolicy", env, verbose=1)
agent.learn(total_timesteps=100000)

success_counters = np.zeros((len(adversarial_attacks), len(defense_mechanisms)))

# Run the agent through multiple episodes to collect statistics
num_episodes = 1000
for episode in range(num_episodes):
    obs = env.reset()
    done = False
    while not done:
        action, _ = agent.predict(obs)
        attack_idx = action // len(defense_mechanisms)
        defense_idx = action % len(defense_mechanisms)
        obs, reward, done, _ = env.step(action)

        # If the attack was successful (reward < 0), increment the counter for this attack and defense
        if reward < 0:
            success_counters[attack_idx, defense_idx] += 1

# Analyze the results
worst_attack_idx = np.argmax(success_counters.sum(axis=1))
most_effective_defense_idx = np.argmin(success_counters.sum(axis=0))

print("Worst Adversarial Attack:", adversarial_attacks[worst_attack_idx].__class__.__name__)
print("Most Effective Defense:", defense_mechanisms[most_effective_defense_idx].__name__)

# Save the trained agent if needed
agent.save("path_to_saved_model")


In [None]:
!pip install foolbox==3.3.3
!pip install torch torchvision
!pip install stable-baselines3
!pip install gym
!pip install scipy


In [None]:
def reset(self):
        # Example: Random image from the dataset (replace with actual data loading)
        initial_state = np.random.rand(*self.input_shape) * 255
        self.state = initial_state
        self.target_class = np.random.randint(0, 1000) # ImageNet has 1000 classes
        return self.state

In [None]:
pip install tensorflow

In [None]:
import gym
import numpy as np
import foolbox as fb
import tensorflow as tf
from stable_baselines3 import PPO
from gym import spaces
from scipy.ndimage import gaussian_filter

class AdversarialEnvironment(gym.Env):
    def __init__(self, fmodel, adversarial_attacks, defense_mechanisms):
        super(AdversarialEnvironment, self).__init__()

        self.fmodel = fmodel
        self.adversarial_attacks = adversarial_attacks
        self.defense_mechanisms = defense_mechanisms

        self.input_shape = (224, 224, 3)
        self.observation_space = spaces.Box(low=0, high=255, shape=self.input_shape, dtype=np.float32)
        self.action_space = spaces.Discrete(len(adversarial_attacks) * len(defense_mechanisms))

    def reset(self):
        initial_state = np.random.rand(*self.input_shape) * 255
        self.state = initial_state.astype(np.float32)
        self.target_class = np.random.randint(0, 1000)
        return self.state

    def step(self, action):
        attack_idx = action // len(self.defense_mechanisms)
        defense_idx = action % len(self.defense_mechanisms)

        attack = self.adversarial_attacks[attack_idx]

        # Normalize the state to be within the bounds (0, 1)
        normalized_state = (self.state.reshape(1, *self.input_shape) / 255.0).astype(np.float32)

        # Apply the attack
        perturbed_input, _, success = attack(self.fmodel, normalized_state, np.array([self.target_class]), epsilons=0.03)

        defense = self.defense_mechanisms[defense_idx]
        defended_input = defense(perturbed_input)

        prediction = self.fmodel.predictions(defended_input)
        success = int(np.argmax(prediction) == self.target_class)

        reward = -success
        self.state = defended_input.flatten()
        done = False

        return self.state, reward, done, {}

# Load a TensorFlow model
model = tf.keras.applications.ResNet50(weights='imagenet')
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = fb.models.TensorFlowModel(model, bounds=(0, 1), preprocessing=preprocessing)

# Define adversarial attacks using Foolbox
adversarial_attacks = [
    fb.attacks.LinfPGD(),
    fb.attacks.LinfDeepFoolAttack(),
    fb.attacks.LinfBasicIterativeAttack()
]

# Define defense mechanisms
def input_transformation(x):
    return gaussian_filter(x, sigma=1)

def adversarial_training(x):
    robust_model = fmodel
    return robust_model(x)

defense_mechanisms = [
    lambda x: x,
    input_transformation,
    adversarial_training
]

env = AdversarialEnvironment(fmodel, adversarial_attacks, defense_mechanisms)
agent = PPO("MlpPolicy", env, verbose=1)
agent.learn(total_timesteps=100000)

# Rest of the code ...


In [None]:
import tensorflow as tf
from foolbox import TensorFlowModel, accuracy, samples
from foolbox.attacks import LinfPGD

# Load a pre-trained TensorFlow model
model = tf.keras.applications.ResNet50(weights="imagenet")
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = TensorFlowModel(model, bounds=(0, 1), preprocessing=preprocessing)

# Define an attack
attack = LinfPGD()

# Test the attack on some sample data
images, labels = samples(fmodel, dataset="imagenet", batchsize=16)
adversarials, _, success = attack(fmodel, images, labels, epsilons=0.03)

# Continue with the rest of your code...


In [None]:
pip install --upgrade autograd


In [None]:
import tensorflow as tf
from foolbox import TensorFlowModel, accuracy, samples
from foolbox.attacks import LinfPGD

# Load a pre-trained TensorFlow model
model = tf.keras.applications.ResNet50(weights="imagenet")
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = TensorFlowModel(model, bounds=(0, 1), preprocessing=preprocessing)

# Define an attack
attack = LinfPGD()

# Test the attack on some sample data
images, labels = samples(fmodel, dataset="imagenet", batchsize=16)
images = tf.image.resize(images, (224, 224))  # Ensure the correct image size

# You can use a single epsilon value or an array of values
epsilon = 0.03
adversarials, _, success = attack(fmodel, images, labels, epsilons=[epsilon])

# Continue with the rest of your code...


In [None]:
import gym
import numpy as np
import foolbox as fb
import tensorflow as tf
from stable_baselines3 import PPO
from gym import spaces
from scipy.ndimage import gaussian_filter

class AdversarialEnvironment(gym.Env):
    def __init__(self, fmodel, adversarial_attacks, defense_mechanisms):
        super(AdversarialEnvironment, self).__init__()

        self.fmodel = fmodel
        self.adversarial_attacks = adversarial_attacks
        self.defense_mechanisms = defense_mechanisms

        self.input_shape = (224, 224, 3)
        self.observation_space = spaces.Box(low=0, high=255, shape=self.input_shape, dtype=np.float32)
        self.action_space = spaces.Discrete(len(adversarial_attacks) * len(defense_mechanisms))

    def reset(self):
        initial_state = np.random.rand(*self.input_shape) * 255
        self.state = initial_state.astype(np.float32) # Ensure floating-point type
        self.target_class = np.random.randint(0, 1000)
        return [self.state]

    def step(self, action):
        attack_idx = action // len(self.defense_mechanisms)
        defense_idx = action % len(self.defense_mechanisms)

        attack = self.adversarial_attacks[attack_idx]

        # Normalize the state to be within the bounds (0, 1)
        normalized_state = (self.state.reshape(1, *self.input_shape) / 255.0).astype(np.float32)

        # epsilon = 0.03
        perturbed_input, _, success = attack(self.fmodel, normalized_state, np.array([self.target_class]), epsilons=epsilon)

        defense = self.defense_mechanisms[defense_idx]
        defended_input = defense(perturbed_input)

        prediction = self.fmodel.predictions(defended_input)
        success = int(np.argmax(prediction) == self.target_class)

        reward = -success # Negative reward for successful attack

        self.state = defended_input.flatten()

        done = False

        return self.state, reward, done, {}

# Load a TensorFlow model instead of PyTorch
model = tf.keras.applications.ResNet50(weights='imagenet')
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = fb.models.TensorFlowModel(model, bounds=(0, 1), preprocessing=preprocessing)
num_episodes = 1000
for episode in range(num_episodes):
    obs = env.reset()
    done = False
    while not done:
        action, _ = agent.predict(obs)
        attack_idx = action // len(defense_mechanisms)
        defense_idx = action % len(defense_mechanisms)
        obs, reward, done, _ = env.step(action)

        # If the attack was successful (reward < 0), increment the counter for this attack and defense
        if reward < 0:
            success_counters[attack_idx, defense_idx] += 1

# Analyze the results
worst_attack_idx = np.argmax(success_counters.sum(axis=1))
most_effective_defense_idx = np.argmin(success_counters.sum(axis=0))

print("Worst Adversarial Attack:", adversarial_attacks[worst_attack_idx].__class__.__name__)
print("Most Effective Defense:", defense_mechanisms[most_effective_defense_idx].__name__)
# Rest of the code ...


In [None]:
import gym
import numpy as np
import foolbox as fb
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from stable_baselines3 import PPO
from gym import spaces
from scipy.ndimage import gaussian_filter

class AdversarialEnvironment(gym.Env):
    def __init__(self, fmodel, adversarial_attacks, defense_mechanisms):
        super(AdversarialEnvironment, self).__init__()

        self.fmodel = fmodel
        self.adversarial_attacks = adversarial_attacks
        self.defense_mechanisms = defense_mechanisms

        self.input_shape = (224, 224, 3)
        self.observation_space = spaces.Box(low=0, high=255, shape=self.input_shape, dtype=np.float32)
        self.action_space = spaces.Discrete(len(adversarial_attacks) * len(defense_mechanisms))
    def step(self, action):
        attack_idx = action // len(self.defense_mechanisms)
        defense_idx = action % len(self.defense_mechanisms)

        attack = self.adversarial_attacks[attack_idx]

        # Normalize the state to be within the bounds (0, 1)
        normalized_state = self.state.reshape(1, *self.input_shape) / 255.0
        epsilon = 0.03
        perturbed_input, _, success = attack(self.fmodel, normalized_state, np.array([self.target_class]), epsilons=epsilon)

        defense = self.defense_mechanisms[defense_idx]
        defended_input = defense(perturbed_input)

        prediction = self.fmodel.predictions(defended_input)
        success = int(np.argmax(prediction) == self.target_class)

        reward = -success # Negative reward for successful attack

        self.state = defended_input.flatten()

        done = False

        return self.state, reward, done, {}


    def reset(self):
        # Example: Random image from the dataset (replace with actual data loading)
        initial_state = np.random.rand(*self.input_shape) * 255
        self.state = initial_state
        self.target_class = np.random.randint(0, 1000) # Updated target class
        return self.state
    # ... rest of the class remains the same ...

# Load ResNet50 model using TensorFlow
model = ResNet50(weights='imagenet')
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = fb.models.TensorFlowModel(model, bounds=(0, 1), preprocessing=preprocessing)

# Define adversarial attacks using Foolbox
adversarial_attacks = [
    fb.attacks.LinfPGD(),
    fb.attacks.LinfDeepFoolAttack(),
    fb.attacks.LinfBasicIterativeAttack()
]

# Define defense mechanisms
def input_transformation(x):
    return gaussian_filter(x, sigma=1)

def adversarial_training(x):
    # Replace with actual robust model
    robust_model = fmodel
    return robust_model(x)

defense_mechanisms = [
    lambda x: x,
    input_transformation,
    adversarial_training
]

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import DummyVecEnv

# ...

env = AdversarialEnvironment(fmodel, adversarial_attacks, defense_mechanisms)
env = DummyVecEnv([lambda: env]) # Wrap the environment

agent = PPO("MlpPolicy", env, verbose=1)
agent.learn(total_timesteps=100000)
# ...

env = AdversarialEnvironment(fmodel, adversarial_attacks, defense_mechanisms)
env = DummyVecEnv([lambda: env]) # Wrap the environment

agent = PPO("MlpPolicy", env, verbose=1)
agent.learn(total_timesteps=100000)
success_counters = np.zeros((len(adversarial_attacks), len(defense_mechanisms)))

# Run the agent through multiple episodes to collect statistics
num_episodes = 1000
for episode in range(num_episodes):
    obs = env.reset()
    done = False
    while not done:
        action, _ = agent.predict(obs)
        attack_idx = action // len(defense_mechanisms)
        defense_idx = action % len(defense_mechanisms)
        obs, reward, done, _ = env.step(action)

        # If the attack was successful (reward < 0), increment the counter for this attack and defense
        if reward < 0:
            success_counters[attack_idx, defense_idx] += 1

# Analyze the results
worst_attack_idx = np.argmax(success_counters.sum(axis=1))
most_effective_defense_idx = np.argmin(success_counters.sum(axis=0))

print("Worst Adversarial Attack:", adversarial_attacks[worst_attack_idx].__class__.__name__)
print("Most Effective Defense:", defense_mechanisms[most_effective_defense_idx].__name__)

# Save the trained agent if needed
agent.save("path_to_saved_model")
# ... rest of the code remains the same ...


In [None]:
import gym
import numpy as np
import foolbox as fb
import torch
import torchvision.models as models
from stable_baselines3 import PPO
from gym import spaces
from scipy.ndimage import gaussian_filter

class AdversarialEnvironment(gym.Env):
    def __init__(self, fmodel, adversarial_attacks, defense_mechanisms):
        super(AdversarialEnvironment, self).__init__()

        self.fmodel = fmodel
        self.adversarial_attacks = adversarial_attacks
        self.defense_mechanisms = defense_mechanisms

        self.input_shape = (224, 224, 3)
        self.observation_space = spaces.Box(low=0, high=255, shape=self.input_shape, dtype=np.float32)
        self.action_space = spaces.Discrete(len(adversarial_attacks) * len(defense_mechanisms))

    def step(self, action):
        attack_idx = action // len(self.defense_mechanisms)
        defense_idx = action % len(self.defense_mechanisms)

        attack = self.adversarial_attacks[attack_idx]

        # Normalize the state to be within the bounds (0, 1)
        normalized_state = self.state.reshape(1, *self.input_shape) / 255.0
        epsilon = 0.03
        perturbed_input, _, success = attack(self.fmodel, normalized_state, np.array([self.target_class]), epsilons=epsilon)

        defense = self.defense_mechanisms[defense_idx]
        defended_input = defense(perturbed_input)

        prediction = self.fmodel.predictions(defended_input)
        success = int(np.argmax(prediction) == self.target_class)

        reward = -success # Negative reward for successful attack

        self.state = defended_input.flatten()

        done = False

        return self.state, reward, done, {}


    def reset(self):
        # Example: Random image from the dataset (replace with actual data loading)
        initial_state = np.random.rand(*self.input_shape) * 255
        self.state = initial_state
        self.target_class = np.random.randint(0, 1000) # Updated target class
        return self.state

# Load ResNet50 model using PyTorch
model = models.resnet50(pretrained=True).eval()
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = fb.models.PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing)

# Define adversarial attacks using Foolbox
adversarial_attacks = [
    fb.attacks.LinfPGD(),
    fb.attacks.LinfDeepFoolAttack(),
    fb.attacks.LinfBasicIterativeAttack()
]

# Define defense mechanisms
def input_transformation(x):
    return gaussian_filter(x, sigma=1)

def adversarial_training(x):
    # Replace with actual robust model
    robust_model = fmodel
    return robust_model(x)

defense_mechanisms = [
    lambda x: x,
    input_transformation,
    adversarial_training
]
from stable_baselines3.common.vec_env import DummyVecEnv

# ...

env = AdversarialEnvironment(fmodel, adversarial_attacks, defense_mechanisms)
env = DummyVecEnv([lambda: env]) # Wrap the environment

agent = PPO("MlpPolicy", env, verbose=1)
agent.learn(total_timesteps=100000)
# # Create the AdversarialEnvironment
# env = AdversarialEnvironment(fmodel, adversarial_attacks, defense_mechanisms)

# # Train the RL agent using PPO from Stable Baselines
# agent = PPO("MlpPolicy", env, verbose=1)
# agent.learn(total_timesteps=100000)

success_counters = np.zeros((len(adversarial_attacks), len(defense_mechanisms)))

# Run the agent through multiple episodes to collect statistics
num_episodes = 1000
for episode in range(num_episodes):
    obs = env.reset()
    done = False
    while not done:
        action, _ = agent.predict(obs)
        attack_idx = action // len(defense_mechanisms)
        defense_idx = action % len(defense_mechanisms)
        obs, reward, done, _ = env.step(action)

        # If the attack was successful (reward < 0), increment the counter for this attack and defense
        if reward < 0:
            success_counters[attack_idx, defense_idx] += 1

# Analyze the results
worst_attack_idx = np.argmax(success_counters.sum(axis=1))
most_effective_defense_idx = np.argmin(success_counters.sum(axis=0))

print("Worst Adversarial Attack:", adversarial_attacks[worst_attack_idx].__class__.__name__)
print("Most Effective Defense:", defense_mechanisms[most_effective_defense_idx].__name__)

# Save the trained agent if needed
agent.save("path_to_saved_model")


In [None]:
pip install shimmy

uses any moulde expect foolbox but write code completely dont think about complexity, write completely

 pythorch take one image dataest , using some var model now use list adversarial attacks and defences based on model develop reinforcement learning ,aim i should find wrost attack best defence combination , think where piping it should look careful , devlop detail procure keep my main as goal 100% satisfy it

dont write completely dont leave any part assume any model and corresponding input ,use predefined model ,input , list adversarical and defence attack

add visualization part where every it is required write plot location accauracy update complet code with visualization

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
import random
import matplotlib.pyplot as plt
# Load pre-trained model
model = models.resnet18(pretrained=True)
model.eval()

# Load dataset
transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)

# Define adversarial attack (FGSM)
def fgsm_attack(image, epsilon, data_grad):
    sign_data_grad = data_grad.sign()
    perturbed_image = image + epsilon * sign_data_grad
    return torch.clamp(perturbed_image, 0, 1)

# Define adversarial defense (feature squeezing)
def feature_squeezing(image, bit_depth=5):
    scale = 2 ** bit_depth - 1
    return torch.round(image * scale) / scale

# Reinforcement Learning Environment
class AdversarialEnvironment:
    def __init__(self, model, attacks, defenses):
        self.model = model
        self.attacks = attacks
        self.defenses = defenses
        self.state = None
        self.state_label = None

    def step(self, action):
        # Compute the output and loss
        output = self.model(self.state)
        loss = torch.nn.functional.cross_entropy(output, self.state_label.unsqueeze(0))

        # Compute the gradient of the loss with respect to the input image
        self.model.zero_grad()
        loss.backward(retain_graph=True)
        data_grad = self.state.grad.data

        # Apply the attack
        attack, defense = self.attacks[action[0]], self.defenses[action[1]]
        perturbed_image = attack(self.state, 0.1, data_grad)
        defended_image = defense(perturbed_image)
        output = self.model(defended_image)
        reward = -torch.nn.functional.cross_entropy(output, self.state_label.unsqueeze(0)) # Negative loss as reward
        done = True
        return self.state, reward, done

    def reset(self, image, label):
        self.state = image.unsqueeze(0).requires_grad_(True) # Add batch dimension
        self.state_label = label
        return self.state

# DQN Agent
class DQNAgent:
    def __init__(self, state_dim, action_dim):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.model = nn.Sequential(
            nn.Linear(state_dim, 24),
            nn.ReLU(),
            nn.Linear(24, 24),
            nn.ReLU(),
            nn.Linear(24, action_dim)
        )
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()

    def act(self, state, epsilon=0.1):
        if random.random() < epsilon:
            return random.randint(0, self.action_dim - 1)
        q_values = self.model(state)
        return torch.argmax(q_values).item()

    def train(self, state, action, reward, next_state, done):
        target = reward + (1.0 - done) * 0.99 * torch.max(self.model(next_state))
        current = self.model(state)[action]
        loss = self.criterion(current, target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()




# Add a list to store accuracy for each epoch
accuracies = []

# Training loop
env = AdversarialEnvironment(model, [fgsm_attack], [feature_squeezing])
agent = DQNAgent(state_dim=3*224*224, action_dim=1)

for epoch in range(10):
    correct = 0
    total = 0
    for images, labels in train_loader:
        for i in range(images.size(0)): # Loop through the batch
            state = env.reset(images[i], labels[i])
            done = False
            while not done:
                action = agent.act(state.view(-1))
                next_state, reward, done = env.step((action, 0))
                agent.train(state.view(-1), action, reward, next_state.view(-1), done)
                state = next_state

                # Check if the prediction is correct
                pred = torch.argmax(env.model(state)).item()
                if pred == labels[i].item():
                    correct += 1
                total += 1

    # Compute accuracy for the epoch
    accuracy = correct / total
    accuracies.append(accuracy)
    print(f'Epoch {epoch+1}, Accuracy: {accuracy*100:.2f}%')

# Plot the accuracy
plt.plot(accuracies)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Location Accuracy Over Epochs')
plt.show()



Files already downloaded and verified
