In [1]:
from tqdm.notebook import tqdm
from typing import List

import numpy as np

import torch

from camera_transition import CameraTransition
from models import (
    ActorImprovedValue,
    CriticTD,
    ActorModel,
    CriticModel)

In [2]:
optimizer_critic_kind = torch.optim.Adam
optimizer_critic_parameters = {
    'lr': 5e-4,
    'weight_decay': .0
}

optimizer_actor_kind = torch.optim.Adam
optimizer_actor_parameters = {
    "lr" : 0.001,
    "weight_decay" : 0.0
}

device = 'cuda' if torch.cuda.is_available() else 'cpu'

critic_iterations = 2000
critic_batch_size = 1000

actor_iterations = 400
actor_batch_size = 4000

epochs = 6

In [3]:
camera_transition = CameraTransition(
    device,
    camera_mat = np.array([[633.09029639, 0., 629.06462963], [0., 638.7544391, 362.74013262],[0., 0., 1.]]),
    dist_coeffs = np.array([[-0.04797802,  0.04744357,  0.00017416,  0.00067967, -0.00408397]]),
    target_points = [[1280//2, 720//2], [1280//2+50, 720//2+50]], # Точки на поверхности камеры
    points_env= np.array([[100, 0, 0], [0,100,0]], dtype=float), # Координаты точек в пространстве
    reward_scale= [0]# Матожидания нормального распределения
    # Мб стоит убрать и совать просто голую норму, как было у них на семе
    # Если будет ругаться на то, что скайпай не дружит с тензорами
    # Сноси эту поебень нахуй
)

actor = ActorModel()
critic = CriticModel()

critic_temporal_difference = CriticTD(actor, critic, camera_transition)
actor_improved_value = ActorImprovedValue(actor, critic, camera_transition)

In [4]:
def critic_epoch(optimizer: torch.optim.Optimizer,
                 model: CriticTD, 
                 iterations: int, 
                 batch_size: int) -> List[float]:
    losses = []
    for iteration in tqdm(range(iterations), "Critic epoch"):
        # Поменять на наши ограничения позиции
        scales = torch.tensor([2.5, 2.5, 2.5, 0.15, 0.15, 0.15])
        X = (torch.rand((batch_size, 6)) * scales * 2 - scales).to(device)
        optimizer.zero_grad()
        loss = model(X)
        loss.backward()
        optimizer.step()
        losses.append(loss.detach().cpu().numpy())
    print("Critic mean loss:", np.mean(losses), "[%f --> %f]" % (np.mean(losses[0:10]), np.mean(losses[-10:-1])))
    return losses

def actor_epoch(optimizer: torch.optim.Optimizer,
                 model: CriticTD, 
                 iterations: int, 
                 batch_size: int) -> List[float]:
    values = []
    for iteration in tqdm(range(iterations), "Actor epoch"):
        # Поменять на наши ограничения позиции
        scales = torch.tensor([2.5, 2.5, 0.15, 0.15])
        X = (torch.rand((batch_size, 6)) * scales * 2 - scales).to(device)
        optimizer.zero_grad()
        improved_value = model(X)
        improved_value.backward()
        optimizer.step()
        values.append(improved_value.detach().cpu().numpy())
    print("Actor mean value:", np.mean(values), "[%f --> %f]" % (np.mean(values[0:10]), np.mean(values[-10:-1])))
    return values

In [5]:
optimizer_actor = optimizer_actor_kind(actor_improved_value.parameters(), **optimizer_actor_parameters)

In [6]:
for _ in tqdm(range(epochs), "Actor-Critic learning"):
    optimizer_critic = optimizer_critic_kind(critic_temporal_difference.parameters(), **optimizer_critic_parameters) ## It is important to reinitialize the critic optimizer to erase irrelevant momenta and adaptations
    values = np.array(critic_epoch(optimizer_critic, 
                                   critic_temporal_difference,
                                   critic_iterations,
                                   actor_batch_size))
    actor_epoch(optimizer_actor, 
                actor_improved_value,
                actor_iterations,
                actor_batch_size)

Actor-Critic learning:   0%|          | 0/6 [00:00<?, ?it/s]

Critic epoch:   0%|          | 0/2000 [00:00<?, ?it/s]



point in _project_points [100.   0.   0.] shape (1, 3)


error: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\calib3d\src\calibration.cpp:625: error: (-5:Bad argument) Translation vector must be 1x3 or 3x1 floating-point vector in function 'cvProjectPoints2Internal'
