# Only using sensors data

## Config env and callback function

In [None]:
# ENV CONF
import carla 
# client = carla.Client("localhost", 2000)
# world = client.load_world('Town01')
import gymnasium as gym
from carla_env import CarEnv
import numpy as np
import time
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback
class CARLA_G(gym.Env):
    def __init__(self, ):
        super(CARLA_G, self).__init__()
        self.env = CarEnv()
        self.action_space = gym.spaces.Box(low=-1, high=1, shape = (2, ), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low = -np.inf, high = np.inf, shape=(16, ), dtype=np.float32)

    def step(self, action):
        [new_image, new_state], reward, done, info = self.env.step(action)
        return new_state.astype(np.float32), reward, done, False, {}
    
    def reset(self, seed = None, options = {}):
        image, state = self.env.reset()
        return state.astype(np.float32), {}
    
    def render(self):
        pass


class MyCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(MyCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []

    def _on_step(self) -> bool:
        # Check if there are completed episodes
        if len(self.model.ep_info_buffer) > 0:
            info = self.model.ep_info_buffer[-1]
            self.episode_rewards.append(info['r'])
            self.episode_lengths.append(info['l'])

            # Here we log the episodic rewards and lengths to TensorBoard
            self.logger.record('episode_reward', np.mean(self.episode_rewards[-100:]))
            self.logger.record('episode_length', np.mean(self.episode_lengths[-100:]))
            self.logger.dump(step=self.num_timesteps)

        return True


env = CARLA_G()
town_name = 'Town01'
env.env.town_name = town_name
env.reset()

## Train the model with MlpPolicy

In [None]:
from stable_baselines3 import SAC, PPO, DDPG
from stable_baselines3.common.env_checker import check_env
import os
from stable_baselines3.common.logger import configure
log_dir = "./logs_sensor"
os.makedirs(log_dir, exist_ok=True)

#model conf
#---new model
model = SAC("MlpPolicy", env, verbose=1,buffer_size=1000000,tensorboard_log=log_dir)

#---load model
# if you countinue learning from saved model, uncomment this lines:
# model = SAC.load("model name", print_system_info=True)
# model2 = SAC("MlpPolicy", env, verbose=1)
# model.env = model2.env
# model.load_replay_buffer("./buffer_SAC_model_sesnor.pkl")



# Pass a TensorBoard logger to the model
new_logger = configure(log_dir, ["stdout", "tensorboard"])


# Instantiate the callback
my_callback = MyCallback()
model.set_logger(new_logger)

for i in range(10000000):
    try:
        name = "SAC_model_sesnor_run_"+str(i+1)
        print(name)
        model.learn(total_timesteps=5000, log_interval=4,callback = my_callback)
        model.save(name)
        #saving realy buffer for next learning process
        model.save_replay_buffer("buffer_SAC_model_sesnor.pkl")
    except:
        print("error")
        time.sleep(0.2)

# Using camera data

## Config env and callback function

In [None]:
# ENV CONF
import carla 
# client = carla.Client("localhost", 2000)
# world = client.load_world('Town01')
import gymnasium as gym
from carla_env import CarEnv
import numpy as np
import time
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback

class CARLA_G_camera(gym.Env):
    def __init__(self, ):
        super(CARLA_G_camera, self).__init__()
        self.env = CarEnv()
        self.action_space = gym.spaces.Box(low=-1, high=1, shape = (2, ), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=1,shape=(3,60,160), dtype=np.float32)

    def step(self, action):
        [new_image, new_state], reward, done, info = self.env.step(action)
        first_ch_new_image = np.moveaxis(new_image,-1,0)/255
        return (first_ch_new_image.astype(np.float32)), reward, done, False, {}
    
    def reset(self, seed = None, options = {}):
        image, state = self.env.reset()
        first_ch_image = np.moveaxis(image,-1,0)/255

        return first_ch_image.astype(np.float32), {}
    
    def render(self):
        pass


class MyCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(MyCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []

    def _on_step(self) -> bool:
        # Check if there are completed episodes
        if len(self.model.ep_info_buffer) > 0:
            info = self.model.ep_info_buffer[-1]
            self.episode_rewards.append(info['r'])
            self.episode_lengths.append(info['l'])

            # Here we log the episodic rewards and lengths to TensorBoard
            self.logger.record('episode_reward', np.mean(self.episode_rewards[-100:]))
            self.logger.record('episode_length', np.mean(self.episode_lengths[-100:]))
            self.logger.dump(step=self.num_timesteps)

        return True


env = CARLA_G_camera()
town_name = 'Town01'
env.env.town_name = town_name
env.reset()

## Create CustomSACPolicy policy_kwargs based on resnet18

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback
import pandas as pd
import gymnasium as gym
import torch.nn as nn
import torch
import torch.nn.functional as F
from stable_baselines3 import DQN, SAC
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# from stable_baselines3.dqn.policies import DQNPolicy
from stable_baselines3.sac.policies import Actor, CnnPolicy, MlpPolicy, MultiInputPolicy, SACPolicy
import torch
import torch.nn as nn

class Block(nn.Module):
    
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        
    def forward(self, x):
        identity = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        x += identity
        x = self.relu(x)
        return x

class ResNet_18(nn.Module):
    
    def __init__(self, image_channels, num_classes):
        
        super(ResNet_18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        #resnet layers
        self.layer1 = self.__make_layer(64, 64, stride=1)
        self.layer2 = self.__make_layer(64, 128, stride=2)
        self.layer3 = self.__make_layer(128, 256, stride=2)
        self.layer4 = self.__make_layer(256, 512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
        
    def __make_layer(self, in_channels, out_channels, stride):
        
        identity_downsample = None
        if stride != 1:
            identity_downsample = self.identity_downsample(in_channels, out_channels)
            
        return nn.Sequential(
            Block(in_channels, out_channels, identity_downsample=identity_downsample, stride=stride), 
            Block(out_channels, out_channels)
        )
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x 
    
    def identity_downsample(self, in_channels, out_channels):
        
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1), 
            nn.BatchNorm2d(out_channels)
        )
    
class CustomResNetFeaturesExtractor(BaseFeaturesExtractor):

    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 100):
        super(CustomResNetFeaturesExtractor, self).__init__(observation_space, features_dim)
        extractors = {}
        total_concat_size = 0
        # We need to know size of the output of this extractor,
        # so go over all the spaces and compute output feature sizes
        for  subspace in observation_space.spaces.items():
                extractors = nn.Sequential(ResNet_18(3, 100)).to('cuda:0')
                total_concat_size += 100 #subspace.shape[1] // 4 * subspace.shape[2] // 4
                

        self.extractors = nn.ModuleDict(extractors).to('cuda:0')
        print(self.extractors)
        self._features_dim = 32 #total_concat_size
        # print(self._features_dim)
        self.fc1 = nn.Linear(100,32)
        self.fc2 = nn.Linear(32,32)
        # self.fc3 = nn.Linear(256,features_dim)#done

    def forward(self, observations) -> torch.Tensor:
        encoded_tensor_list = []

        for key, extractor in self.extractors.items():
            encoded_tensor_list.append(extractor(observations))

        x = torch.cat(encoded_tensor_list, dim=1).to('cuda:0')
        # print(x.shape)
        x = self.fc1(x)
        # print(x.shape)
        x = self.fc2(x)
        # # print(x.shape)
        # x = self.fc3(x)
        # print(x.shape)
        return x
class CustomSACPolicy(SACPolicy):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


obs_space = env.observation_space

# Define the policy_kwargs to use the features extractor
policy_kwargs = dict(
    features_extractor_class=CustomResNetFeaturesExtractor,
    features_extractor_kwargs=dict(features_dim=100),
    net_arch=[]

)

## Train the model with CustomSACPolicy

In [None]:
from stable_baselines3 import SAC, PPO, DDPG
from stable_baselines3.common.env_checker import check_env
import os
from stable_baselines3.common.logger import configure
log_dir = "./logs_camera"
os.makedirs(log_dir, exist_ok=True)

#model conf
#---new model
model = SAC(
    policy=CustomSACPolicy,
    env=env,
    policy_kwargs=policy_kwargs,
    verbose=1,
    buffer_size=20000 ,
    learning_rate=3e-4,
    batch_size=128*4,
    gamma=0.99,
    train_freq=4,
    gradient_steps=1,
    target_update_interval=1000,
)

#---load model
# if you countinue learning from saved model, uncomment this lines:
# model = SAC.load("model name", print_system_info=True)
# model2 = SAC(policy=CustomSACPolicy,env=env,policy_kwargs=policy_kwargs,verbose=1)
# model.env = model2.env
# model.load_replay_buffer("./buffer_SAC_model_camera.pkl")


# Pass a TensorBoard logger to the model
new_logger = configure(log_dir, ["stdout", "tensorboard"])


# Instantiate the callback
my_callback = MyCallback()
model.set_logger(new_logger)

for i in range(10000000):
    try:
        name = "SAC_model_camera_run_"+str(i+1)
        print(name)
        model.learn(total_timesteps=5000, log_interval=4,callback = my_callback)
        model.save(name)
        #saving realy buffer for next learning process
        model.save_replay_buffer("buffer_SAC_model_camera.pkl")
    except:
        print("error")
        time.sleep(0.2)

# Using sensors and camera data (fusion)

## Config env and callback function

In [None]:
# ENV CONF
import carla 
# client = carla.Client("localhost", 2000)
# world = client.load_world('Town01')
import gymnasium as gym
from carla_env import CarEnv
import numpy as np
import time
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback
class CARLA_G_fusion(gym.Env):
    def __init__(self, ):
        super(CARLA_G_fusion, self).__init__()
        self.env = CarEnv()
        self.action_space = gym.spaces.Box(low=-1, high=1, shape = (2, ), dtype=np.float32)
        # self.observation_space = gym.spaces.Box(low = -np.inf, high = np.inf, shape=(16, ), dtype=np.float32)
        self.observation_space = gym.spaces.Dict({"image": gym.spaces.Box(low=0, high=1,shape=(3,60,160), dtype=np.float32), "tracking": gym.spaces.Box(low = -np.inf, high = np.inf, shape=(16, ), dtype=np.float32)})

    def step(self, action):
        [new_image, new_state], reward, done, info = self.env.step(action)
        first_ch_new_image = np.moveaxis(new_image,-1,0)/255
        return {"image":first_ch_new_image.astype(np.float32), "tracking":new_state.astype(np.float32)}, reward, done, False, {}
    
    def reset(self, seed = None, options = {}):
        image, state = self.env.reset()
        first_ch_image = np.moveaxis(image,-1,0)/255

        return {"image":first_ch_image.astype(np.float32),"tracking":state.astype(np.float32)}, {}
    
    def render(self):
        pass


class MyCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(MyCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []

    def _on_step(self) -> bool:
        # Check if there are completed episodes
        if len(self.model.ep_info_buffer) > 0:
            info = self.model.ep_info_buffer[-1]
            self.episode_rewards.append(info['r'])
            self.episode_lengths.append(info['l'])

            # Here we log the episodic rewards and lengths to TensorBoard
            self.logger.record('episode_reward', np.mean(self.episode_rewards[-100:]))
            self.logger.record('episode_length', np.mean(self.episode_lengths[-100:]))
            self.logger.dump(step=self.num_timesteps)

        return True


env = CARLA_G_fusion()
town_name = 'Town01'
env.env.town_name = town_name
env.reset()

## Create CustomSACPolicy policy_kwargs based on resnet18

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback
import pandas as pd
import gymnasium as gym
import torch.nn as nn
import torch
import torch.nn.functional as F
from stable_baselines3 import DQN, SAC
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# from stable_baselines3.dqn.policies import DQNPolicy
from stable_baselines3.sac.policies import Actor, CnnPolicy, MlpPolicy, MultiInputPolicy, SACPolicy
import torch
import torch.nn as nn

class Block(nn.Module):
    
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        
    def forward(self, x):
        identity = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        x += identity
        x = self.relu(x)
        return x

class ResNet_18(nn.Module):
    
    def __init__(self, image_channels, num_classes):
        
        super(ResNet_18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        #resnet layers
        self.layer1 = self.__make_layer(64, 64, stride=1)
        self.layer2 = self.__make_layer(64, 128, stride=2)
        self.layer3 = self.__make_layer(128, 256, stride=2)
        self.layer4 = self.__make_layer(256, 512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
        
    def __make_layer(self, in_channels, out_channels, stride):
        
        identity_downsample = None
        if stride != 1:
            identity_downsample = self.identity_downsample(in_channels, out_channels)
            
        return nn.Sequential(
            Block(in_channels, out_channels, identity_downsample=identity_downsample, stride=stride), 
            Block(out_channels, out_channels)
        )
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x 
    
    def identity_downsample(self, in_channels, out_channels):
        
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1), 
            nn.BatchNorm2d(out_channels)
        )
    
class CustomResNetFeaturesExtractor(BaseFeaturesExtractor):

    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 116):
        super(CustomResNetFeaturesExtractor, self).__init__(observation_space, features_dim)
        extractors = {}
        total_concat_size = 0

        for key, subspace in observation_space.spaces.items():
            if key == "image":
                extractors[key] = nn.Sequential(ResNet_18(3, 100)).to('cuda:0')
                total_concat_size += 100 #subspace.shape[1] // 4 * subspace.shape[2] // 4
                print( subspace.shape[2] )
            elif key == "tracking":
                extractors[key] = nn.Linear(subspace.shape[0], 16).to('cuda:0')
                total_concat_size += 16
        self.extractors = nn.ModuleDict(extractors).to('cuda:0')
        print(self.extractors)
        self._features_dim = 32 #total_concat_size
        # print(self._features_dim)
        self.fc1 = nn.Linear(116,32)
        self.fc2 = nn.Linear(32,32)
        # self.fc3 = nn.Linear(256,features_dim)#done

    def forward(self, observations) -> torch.Tensor:
        encoded_tensor_list = []

        for key, extractor in self.extractors.items():
            encoded_tensor_list.append(extractor(observations[key]))

        x = torch.cat(encoded_tensor_list, dim=1).to('cuda:0')
        # print(x.shape)
        x = self.fc1(x)
        # print(x.shape)
        x = self.fc2(x)
        # # print(x.shape)
        # x = self.fc3(x)
        # print(x.shape)
        return x
class CustomSACPolicy(SACPolicy):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


obs_space = env.observation_space

# Define the policy_kwargs to use the features extractor
policy_kwargs = dict(
    features_extractor_class=CustomResNetFeaturesExtractor,
    features_extractor_kwargs=dict(features_dim=116),
    net_arch=[]

)

## Train the model with CustomSACPolicy

In [None]:
from stable_baselines3 import SAC, PPO, DDPG
from stable_baselines3.common.env_checker import check_env
import os
from stable_baselines3.common.logger import configure
log_dir = "./logs_fusion"
os.makedirs(log_dir, exist_ok=True)

#model conf
#---new model
model = SAC(
    policy=CustomSACPolicy,
    env=env,
    policy_kwargs=policy_kwargs,
    verbose=1,
    buffer_size=20000 ,
    learning_rate=3e-4,
    batch_size=128*4,
    gamma=0.99,
    train_freq=4,
    gradient_steps=1,
    target_update_interval=1000,
)

#---load model
# if you countinue learning from saved model, uncomment this lines:
# model = SAC.load("model name", print_system_info=True)
# model2 = SAC(policy=CustomSACPolicy,env=env,policy_kwargs=policy_kwargs,verbose=1)
# model.env = model2.env
# model.load_replay_buffer("./buffer_SAC_model_fusion.pkl")


# Pass a TensorBoard logger to the model
new_logger = configure(log_dir, ["stdout", "tensorboard"])


# Instantiate the callback
my_callback = MyCallback()
model.set_logger(new_logger)

for i in range(10000000):
    try:
        name = "SAC_model_fusion_run_"+str(i+1)
        print(name)
        model.learn(total_timesteps=5000, log_interval=4,callback = my_callback)
        model.save(name)
        #saving realy buffer for next learning process
        model.save_replay_buffer("buffer_SAC_model_fusion.pkl")
    except:
        print("error")
        time.sleep(0.2)