In [1]:
import numpy as np
import time
import gym
import torch
import torch.nn as nn
import random
import queue
import matplotlib.pyplot as plt
import math
from IPython.display import clear_output
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# Define Class

In [2]:
class Env():
    def __init__(self):
        self.env = gym.make('CarRacing-v0')
        self.reward_threshold = self.env.spec.reward_threshold

    def reset(self):
        self.counter = 0
        self.av_r = self.reward_memory()
        self.die = False
        img_rgb = self.env.reset()
        img_gray = self.rgb2gray(img_rgb)
        self.stack = [img_gray] * 4
        return np.array(self.stack)

    def step(self, action):
        total_reward = 0
        for i in range(8):
            img_rgb, reward, die, _ = self.env.step(action)
            # don't penalize "die state"
            if die:
                reward += 100
            # green penalty
            if np.mean(img_rgb[:, :, 1]) > 185.0:
                reward -= 0.05
            total_reward += reward
            # if no reward recently, end the episode
            done = True if self.av_r(reward) <= -0.1 else False
            if done or die:
                break
        img_gray = self.rgb2gray(img_rgb)
        self.stack.pop(0)
        self.stack.append(img_gray)
        assert len(self.stack) == 4
        return np.array(self.stack), total_reward, done, die

    def render(self):
        self.env.render()

    def close(self):
        self.env.close()
        
    @staticmethod
    def rgb2gray(rgb, norm=True):
        gray = np.dot(rgb[..., :], [0.299, 0.587, 0.114])
        if norm:
            # normalize
            gray = gray / 128. - 1.
        return gray

    @staticmethod
    def reward_memory():
        count = 0
        length = 100
        history = np.zeros(length)

        def memory(reward):
            nonlocal count
            history[count] = reward
            count = (count + 1) % length
            return np.mean(history)

        return memory


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.cnn_base = nn.Sequential(  # input shape (4, 96, 96)
            nn.Conv2d(4, 8, kernel_size=4, stride=2),
            nn.ReLU(),  # activation
            nn.Conv2d(8, 16, kernel_size=3, stride=2),  # (8, 47, 47)
            nn.ReLU(),  # activation
            nn.Conv2d(16, 32, kernel_size=3, stride=2),  # (16, 23, 23)
            nn.ReLU(),  # activation
            nn.Conv2d(32, 64, kernel_size=3, stride=2),  # (32, 11, 11)
            nn.ReLU(),  # activation
            nn.Conv2d(64, 128, kernel_size=3, stride=1),  # (64, 5, 5)
            nn.ReLU(),  # activation
            nn.Conv2d(128, 256, kernel_size=3, stride=1),  # (128, 3, 3)
            nn.ReLU(),  # activation
        )  # output shape (256, 1, 1)
        self.v = nn.Sequential(nn.Linear(256, 100), nn.ReLU(), nn.Linear(100, 1))
        self.fc = nn.Sequential(nn.Linear(256, 100), nn.ReLU())
        self.alpha_head = nn.Sequential(nn.Linear(100, 3), nn.Softplus())
        self.beta_head = nn.Sequential(nn.Linear(100, 3), nn.Softplus())
        self.apply(self._weights_init)

    @staticmethod
    def _weights_init(m):
        if isinstance(m, nn.Conv2d):
            nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain('relu'))
            nn.init.constant_(m.bias, 0.1)

    def forward(self, x):
        x = self.cnn_base(x)
        x = x.view(-1, 256)
        v = self.v(x)
        x = self.fc(x)
        alpha = self.alpha_head(x) + 1
        beta = self.beta_head(x) + 1

        return (alpha, beta), v


class Agent():
    def __init__(self):
        self.net = Net().float().to(device)

    def select_action(self, state):
        state = torch.from_numpy(state).float().to(device).unsqueeze(0)
        with torch.no_grad():
            alpha, beta = self.net(state)[0]
        action = alpha / (alpha + beta)

        action = action.squeeze().cpu().numpy()
        return action

    def load_param(self):
        self.net.load_state_dict(torch.load('param/expert.pkl'))


# Getting new trajectory

In [3]:
     
class Classifier(nn.Module):
    def __init__(self,ch=2):
        super(Classifier,self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=4,out_channels=ch*8,kernel_size=7),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=ch*8,out_channels=ch*16,kernel_size=5,stride=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=ch*16,out_channels=ch*32,kernel_size=3,stride=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=ch*32,out_channels=ch*32,kernel_size=3,stride=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=ch*32,out_channels=ch*64,kernel_size=3,stride=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=ch*64,out_channels=ch*64,kernel_size=3,stride=2),
            torch.nn.ReLU()
        )
        self.v = torch.nn.Sequential(
            torch.nn.Linear(64*ch*1*1,256),
            torch.nn.ReLU()
        )
        self.fc = torch.nn.Linear(256,3)
        self.ch = ch
        
    def forward(self,x):
        x = self.layer1(x)
        x = x.view(x.size(0),-1)
        x = self.v(x)
        x = self.fc(x)
        
        x[:,0] = torch.tanh(x[:,0])
        x[:,1] = torch.sigmoid(x[:,1])
        x[:,2] = torch.sigmoid(x[:,2])
        return x

In [4]:
#model = torch.load('BC_model_copy.pt')
class ImgDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, index):
        x = self.X[index]
        y = self.Y[index]
        return x, y
model = Classifier().cuda()

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

agent = Agent()
agent.load_param()

#agent_env = Env()
env = Env()
agent_total_score = 0
total_score = 0
average_reward=0
reward_list = list()
agent_scores = []
scores = []

X = list()
Y = list()
#FirstIteration = 100
epoch = 1200
beta = 1
my_prob = list()
for i_ep in range(1,epoch+1):
    print(i_ep)
    #agent_score = 0
    score = 0
    #agent_state = agent_env.reset()
    state = env.reset()
    #action_take_turn = 0
    if i_ep == 1:
        beta = 1
        my_prob = ['expert']*math.floor((beta*100)) + ['dagger']*math.floor((1-beta)*100)
    if i_ep == 501:
        beta = 0.95
    if i_ep == 501 or i_ep == 1001 or i_ep == 1501:
        #steps_for_expert = math.floor(beta * 1000)
        my_prob = ['expert']*math.floor((beta*100)) + ['dagger']*math.floor((1-beta)*100)
        beta *= beta
    for t in range(1000):
        #   action = model(torch.tensor(state.reshape(1,4,96,96)).cuda().float())[0]
        #    action = action.cpu().detach().numpy()
        #elif action_take_turn > 0 or i_ep <= 100:
        #    action = agent.select_action(state)
        #    if action_take_turn > 0 :
        #        action_take_turn-=1
        choice = random.choice(my_prob)
        if choice == 'dagger':
            action = model(torch.tensor(state.reshape(1,4,96,96)).cuda().float())[0]
            action = action.cpu().detach().numpy()
        elif choice == 'expert':
            action = agent.select_action(state)
        state_, reward, done, die = env.step(action * np.array([2., 1., 1.]) + np.array([-1., 0., 0.]))
        
        #reward_list.append(reward)
        #if len(reward_list) == 10:
        #    tmp=0
        #    for i in reward_list:
        #        tmp+=i
        #    tmp/=10
        #    average_reward = tmp
        #    reward_list.pop(0)
        #print(average_reward)
        # time.sleep(0.05)
        env.render()
        score += reward
        state = state_
        
        #if average_reward < 10 and action_take_turn != 0:
        #        action_take_turn = 100
        X.append(state)
        Y.append(action)
        if done or die:
            break
            
    if i_ep == 500 or i_ep == 1000 or i: # Start training the dagger model
        env.close()
        train_x, val_x= X[:math.floor(len(X)*0.8)], X[math.floor(len(X)*0.8):]
        train_y, val_y= Y[:math.floor(len(Y)*0.8)], Y[math.floor(len(Y)*0.8):]
        batch_size = 128
        train_set = ImgDataset(train_x, train_y)
        val_set = ImgDataset(val_x, val_y)
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0)
        val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=0)

        device = torch.device('cuda')
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            
        loss = nn.MSELoss().cuda()
        optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)
        num_epoch = 30
        
    
        for epoch in range(num_epoch):
            epoch_start_time = time.time()
            train_loss = 0.0
            val_loss = 0.0

            model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
            for i, data in enumerate(train_loader):
                optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
                train_pred = model(data[0].cuda().float())
                batch_loss = loss(train_pred, data[1].cuda()).cuda() # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
                batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
                optimizer.step() # 以 optimizer 用 gradient 更新參數值

                train_loss += batch_loss.item()

            model.eval()
            with torch.no_grad():
                for i, data in enumerate(val_loader):
                    val_pred = model(data[0].cuda().float())
                    batch_loss = loss(val_pred, data[1].cuda())
                    val_loss += batch_loss.item()

                #將結果 print 出來
                print('[%03d/%03d] %2.2f sec(s) Train Loss: %3.6f | Val loss: %3.6f' % \
                    (epoch + 1, num_epoch, time.time()-epoch_start_time, train_loss/train_set.__len__(), val_loss/val_set.__len__()))

            torch.save(model, 'dagger.pt')
            model = torch.load('dagger.pt')
    
    
    total_score += score
    scores.append(score)
    
    #clear_output(wait=True)

    print("Epoch: {}".format(i_ep))
    print('BC\tmean: {:.2f}\tvariance: {:.2f}'.format(np.array(scores).mean(), np.array(scores).std()))
    continue
    
    #plt.figure()
    #plt.hist(x=scores, bins='auto', color='#0504aa', alpha=0.7, rwidth=0.85)
    ##plt.grid(axis='y', alpha=0.75)
    #plt.xlim(200,1070)
    ##plt.xlabel('score')
    #plt.ylabel('count')
    #plt.title('BC model')
    
    #plt.show()    
env.close()
#agent_env.close()

1
Track generation: 1260..1579 -> 319-tiles track




Epoch: 1
BC	mean: 892.98	variance: 0.00
2
Track generation: 1156..1449 -> 293-tiles track
Epoch: 2
BC	mean: 959.94	variance: 66.96
3
Track generation: 1145..1431 -> 286-tiles track
Epoch: 3
BC	mean: 767.39	variance: 277.75
4
Track generation: 1037..1300 -> 263-tiles track
Epoch: 4
BC	mean: 835.91	variance: 268.23
5
Track generation: 1004..1259 -> 255-tiles track
Epoch: 5
BC	mean: 851.26	variance: 241.86
6
Track generation: 947..1195 -> 248-tiles track
Epoch: 6
BC	mean: 862.29	variance: 222.16
7
Track generation: 1047..1313 -> 266-tiles track
Epoch: 7
BC	mean: 869.37	variance: 206.41
8
Track generation: 1203..1516 -> 313-tiles track
Epoch: 8
BC	mean: 873.66	variance: 193.42
9
Track generation: 1097..1375 -> 278-tiles track
Epoch: 9
BC	mean: 891.67	variance: 189.33
10
Track generation: 1295..1623 -> 328-tiles track
Epoch: 10
BC	mean: 904.48	variance: 183.68
11
Track generation: 1143..1433 -> 290-tiles track
Epoch: 11
BC	mean: 889.64	variance: 181.31
12
Track generation: 1174..1472 -> 298

Epoch: 82
BC	mean: 928.69	variance: 106.21
83
Track generation: 1135..1426 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1380..1728 -> 348-tiles track
Epoch: 83
BC	mean: 929.56	variance: 105.87
84
Track generation: 1303..1633 -> 330-tiles track
Epoch: 84
BC	mean: 930.62	variance: 105.67
85
Track generation: 1252..1569 -> 317-tiles track
Epoch: 85
BC	mean: 930.29	variance: 105.09
86
Track generation: 1048..1314 -> 266-tiles track
Epoch: 86
BC	mean: 931.50	variance: 105.08
87
Track generation: 1033..1301 -> 268-tiles track
Epoch: 87
BC	mean: 931.18	variance: 104.51
88
Track generation: 1165..1467 -> 302-tiles track
Epoch: 88
BC	mean: 932.21	variance: 104.37
89
Track generation: 1102..1381 -> 279-tiles track
Epoch: 89
BC	mean: 933.31	variance: 104.29
90
Track generation: 1117..1400 -> 283-tiles track
Epoch: 90
BC	mean: 934.40	variance: 104.21
91
Track generation: 1018..1276 -> 258-tiles track
Epoch: 91
BC	mean: 934.15	

Epoch: 163
BC	mean: 943.21	variance: 95.27
164
Track generation: 1049..1315 -> 266-tiles track
Epoch: 164
BC	mean: 943.80	variance: 95.27
165
Track generation: 1249..1573 -> 324-tiles track
Epoch: 165
BC	mean: 943.48	variance: 95.07
166
Track generation: 1146..1440 -> 294-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1387..1738 -> 351-tiles track
Epoch: 166
BC	mean: 942.43	variance: 95.74
167
Track generation: 1140..1429 -> 289-tiles track
Epoch: 167
BC	mean: 942.96	variance: 95.70
168
Track generation: 1105..1390 -> 285-tiles track
Epoch: 168
BC	mean: 943.49	variance: 95.65
169
Track generation: 1139..1428 -> 289-tiles track
Epoch: 169
BC	mean: 943.17	variance: 95.46
170
Track generation: 1213..1519 -> 306-tiles track
Epoch: 170
BC	mean: 942.87	variance: 95.26
171
Track generation: 1325..1659 -> 334-tiles track
Epoch: 171
BC	mean: 943.03	variance: 95.00
172
Track generation: 1224..1534 -> 310-tiles track
Epoch: 172
BC	mea

Epoch: 235
BC	mean: 948.72	variance: 88.15
236
Track generation: 1184..1484 -> 300-tiles track
Epoch: 236
BC	mean: 948.54	variance: 88.00
237
Track generation: 1286..1612 -> 326-tiles track
Epoch: 237
BC	mean: 948.35	variance: 87.86
238
Track generation: 1331..1668 -> 337-tiles track
Epoch: 238
BC	mean: 948.09	variance: 87.77
239
Track generation: 1145..1440 -> 295-tiles track
Epoch: 239
BC	mean: 948.41	variance: 87.72
240
Track generation: 1080..1354 -> 274-tiles track
Epoch: 240
BC	mean: 948.79	variance: 87.73
241
Track generation: 1209..1515 -> 306-tiles track
Epoch: 241
BC	mean: 947.86	variance: 88.72
242
Track generation: 1316..1650 -> 334-tiles track
Epoch: 242
BC	mean: 948.13	variance: 88.63
243
Track generation: 1125..1418 -> 293-tiles track
Epoch: 243
BC	mean: 947.97	variance: 88.49
244
Track generation: 1238..1552 -> 314-tiles track
Epoch: 244
BC	mean: 947.78	variance: 88.35
245
Track generation: 890..1120 -> 230-tiles track
retry to generate track (normal if there are not ma

Epoch: 312
BC	mean: 947.08	variance: 95.84
313
Track generation: 1171..1467 -> 296-tiles track
Epoch: 313
BC	mean: 947.34	variance: 95.79
314
Track generation: 1100..1379 -> 279-tiles track
Epoch: 314
BC	mean: 947.13	variance: 95.71
315
Track generation: 1389..1740 -> 351-tiles track
Epoch: 315
BC	mean: 947.32	variance: 95.62
316
Track generation: 1188..1489 -> 301-tiles track
Epoch: 316
BC	mean: 947.56	variance: 95.56
317
Track generation: 1197..1510 -> 313-tiles track
Epoch: 317
BC	mean: 947.77	variance: 95.48
318
Track generation: 1148..1439 -> 291-tiles track
Epoch: 318
BC	mean: 948.03	variance: 95.44
319
Track generation: 1205..1510 -> 305-tiles track
Epoch: 319
BC	mean: 947.31	variance: 96.15
320
Track generation: 1011..1268 -> 257-tiles track
Epoch: 320
BC	mean: 947.17	variance: 96.03
321
Track generation: 1004..1266 -> 262-tiles track
Epoch: 321
BC	mean: 947.44	variance: 96.01
322
Track generation: 1208..1514 -> 306-tiles track
Epoch: 322
BC	mean: 947.67	variance: 95.94
323
Tra

Epoch: 396
BC	mean: 949.13	variance: 91.63
397
Track generation: 1344..1684 -> 340-tiles track
Epoch: 397
BC	mean: 948.98	variance: 91.56
398
Track generation: 1048..1320 -> 272-tiles track
Epoch: 398
BC	mean: 949.19	variance: 91.53
399
Track generation: 1057..1326 -> 269-tiles track
Epoch: 399
BC	mean: 949.08	variance: 91.44
400
Track generation: 1245..1566 -> 321-tiles track
Epoch: 400
BC	mean: 949.24	variance: 91.39
401
Track generation: 1174..1471 -> 297-tiles track
Epoch: 401
BC	mean: 949.43	variance: 91.35
402
Track generation: 1357..1701 -> 344-tiles track
Epoch: 402
BC	mean: 949.28	variance: 91.29
403
Track generation: 1176..1474 -> 298-tiles track
Epoch: 403
BC	mean: 949.14	variance: 91.22
404
Track generation: 990..1250 -> 260-tiles track
Epoch: 404
BC	mean: 948.99	variance: 91.16
405
Track generation: 1333..1670 -> 337-tiles track
Epoch: 405
BC	mean: 948.74	variance: 91.19
406
Track generation: 1175..1483 -> 308-tiles track
Epoch: 406
BC	mean: 948.63	variance: 91.10
407
Trac

Epoch: 473
BC	mean: 948.79	variance: 89.94
474
Track generation: 1093..1377 -> 284-tiles track
Epoch: 474
BC	mean: 948.72	variance: 89.85
475
Track generation: 1066..1343 -> 277-tiles track
Epoch: 475
BC	mean: 948.89	variance: 89.84
476
Track generation: 977..1225 -> 248-tiles track
Epoch: 476
BC	mean: 948.81	variance: 89.77
477
Track generation: 1201..1505 -> 304-tiles track
Epoch: 477
BC	mean: 948.73	variance: 89.69
478
Track generation: 1111..1393 -> 282-tiles track
Epoch: 478
BC	mean: 948.65	variance: 89.61
479
Track generation: 1240..1554 -> 314-tiles track
Epoch: 479
BC	mean: 948.17	variance: 90.14
480
Track generation: 1127..1413 -> 286-tiles track
Epoch: 480
BC	mean: 948.06	variance: 90.08
481
Track generation: 1116..1409 -> 293-tiles track
Epoch: 481
BC	mean: 948.23	variance: 90.06
482
Track generation: 1141..1430 -> 289-tiles track
Epoch: 482
BC	mean: 948.15	variance: 89.98
483
Track generation: 1197..1500 -> 303-tiles track
Epoch: 483
BC	mean: 948.31	variance: 89.96
484
Trac

Epoch: 537
BC	mean: 945.09	variance: 95.20
538
Track generation: 986..1244 -> 258-tiles track
Epoch: 538
BC	mean: 945.23	variance: 95.17
539
Track generation: 1209..1515 -> 306-tiles track
Epoch: 539
BC	mean: 944.67	variance: 95.99
540
Track generation: 1336..1672 -> 336-tiles track
Epoch: 540
BC	mean: 943.70	variance: 98.50
541
Track generation: 1181..1480 -> 299-tiles track
Epoch: 541
BC	mean: 942.40	variance: 102.92
542
Track generation: 1151..1443 -> 292-tiles track
Epoch: 542
BC	mean: 942.34	variance: 102.84
543
Track generation: 1049..1318 -> 269-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1036..1306 -> 270-tiles track
Epoch: 543
BC	mean: 942.50	variance: 102.81
544
Track generation: 1167..1463 -> 296-tiles track
Epoch: 544
BC	mean: 942.36	variance: 102.77
545
Track generation: 1076..1349 -> 273-tiles track
Epoch: 545
BC	mean: 942.53	variance: 102.75
546
Track generation: 1145..1435 -> 290-tiles track
Epoch: 546
BC

Epoch: 611
BC	mean: 934.12	variance: 119.28
612
Track generation: 1124..1418 -> 294-tiles track
Epoch: 612
BC	mean: 934.02	variance: 119.21
613
Track generation: 1225..1535 -> 310-tiles track
Epoch: 613
BC	mean: 932.94	variance: 122.09
614
Track generation: 988..1239 -> 251-tiles track
Epoch: 614
BC	mean: 932.89	variance: 121.99
615
Track generation: 1092..1369 -> 277-tiles track
Epoch: 615
BC	mean: 932.79	variance: 121.92
616
Track generation: 1237..1550 -> 313-tiles track
Epoch: 616
BC	mean: 932.73	variance: 121.83
617
Track generation: 1133..1424 -> 291-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1115..1398 -> 283-tiles track
Epoch: 617
BC	mean: 932.68	variance: 121.74
618
Track generation: 1065..1336 -> 271-tiles track
Epoch: 618
BC	mean: 932.61	variance: 121.65
619
Track generation: 1172..1469 -> 297-tiles track
Epoch: 619
BC	mean: 932.56	variance: 121.56
620
Track generation: 1211..1522 -> 311-tiles track
Epoch: 62

Epoch: 688
BC	mean: 925.11	variance: 132.67
689
Track generation: 1193..1495 -> 302-tiles track
Epoch: 689
BC	mean: 925.04	variance: 132.59
690
Track generation: 1115..1398 -> 283-tiles track
Epoch: 690
BC	mean: 925.02	variance: 132.49
691
Track generation: 1268..1589 -> 321-tiles track
Epoch: 691
BC	mean: 924.32	variance: 133.66
692
Track generation: 1085..1360 -> 275-tiles track
Epoch: 692
BC	mean: 924.48	variance: 133.63
693
Track generation: 1192..1494 -> 302-tiles track
Epoch: 693
BC	mean: 924.62	variance: 133.59
694
Track generation: 1119..1412 -> 293-tiles track
Epoch: 694
BC	mean: 924.59	variance: 133.49
695
Track generation: 1023..1283 -> 260-tiles track
Epoch: 695
BC	mean: 923.83	variance: 134.88
696
Track generation: 1241..1561 -> 320-tiles track
Epoch: 696
BC	mean: 923.07	variance: 136.27
697
Track generation: 1308..1640 -> 332-tiles track
Epoch: 697
BC	mean: 923.12	variance: 136.18
698
Track generation: 1033..1297 -> 264-tiles track
Epoch: 698
BC	mean: 923.04	variance: 136

Epoch: 770
BC	mean: 917.56	variance: 138.76
771
Track generation: 1128..1415 -> 287-tiles track
Epoch: 771
BC	mean: 917.48	variance: 138.69
772
Track generation: 1240..1554 -> 314-tiles track
Epoch: 772
BC	mean: 916.79	variance: 139.92
773
Track generation: 1086..1362 -> 276-tiles track
Epoch: 773
BC	mean: 916.94	variance: 139.90
774
Track generation: 1192..1494 -> 302-tiles track
Epoch: 774
BC	mean: 916.48	variance: 140.38
775
Track generation: 1048..1314 -> 266-tiles track
Epoch: 775
BC	mean: 916.64	variance: 140.36
776
Track generation: 964..1209 -> 245-tiles track
Epoch: 776
BC	mean: 916.80	variance: 140.34
777
Track generation: 1148..1439 -> 291-tiles track
Epoch: 777
BC	mean: 916.77	variance: 140.25
778
Track generation: 1243..1558 -> 315-tiles track
Epoch: 778
BC	mean: 916.49	variance: 140.38
779
Track generation: 1133..1420 -> 287-tiles track
Epoch: 779
BC	mean: 916.41	variance: 140.31
780
Track generation: 1128..1414 -> 286-tiles track
Epoch: 780
BC	mean: 916.57	variance: 140.

Epoch: 849
BC	mean: 913.56	variance: 142.25
850
Track generation: 1149..1440 -> 291-tiles track
Epoch: 850
BC	mean: 913.53	variance: 142.16
851
Track generation: 1052..1321 -> 269-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1035..1298 -> 263-tiles track
Epoch: 851
BC	mean: 913.54	variance: 142.08
852
Track generation: 1261..1580 -> 319-tiles track
Epoch: 852
BC	mean: 913.66	variance: 142.04
853
Track generation: 1060..1329 -> 269-tiles track
Epoch: 853
BC	mean: 913.64	variance: 141.96
854
Track generation: 1343..1683 -> 340-tiles track
Epoch: 854
BC	mean: 913.72	variance: 141.90
855
Track generation: 887..1120 -> 233-tiles track
Epoch: 855
BC	mean: 913.69	variance: 141.82
856
Track generation: 1189..1490 -> 301-tiles track
Epoch: 856
BC	mean: 913.67	variance: 141.74
857
Track generation: 1144..1434 -> 290-tiles track
Epoch: 857
BC	mean: 913.79	variance: 141.70
858
Track generation: 1161..1455 -> 294-tiles track
Epoch: 85

Epoch: 923
BC	mean: 910.80	variance: 144.02
924
Track generation: 1002..1248 -> 246-tiles track
Epoch: 924
BC	mean: 910.94	variance: 144.00
925
Track generation: 1096..1376 -> 280-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1187..1488 -> 301-tiles track
Epoch: 925
BC	mean: 910.91	variance: 143.93
926
Track generation: 1123..1408 -> 285-tiles track
Epoch: 926
BC	mean: 910.89	variance: 143.85
927
Track generation: 1074..1355 -> 281-tiles track
Epoch: 927
BC	mean: 910.52	variance: 144.21
928
Track generation: 1096..1383 -> 287-tiles track
Epoch: 928
BC	mean: 910.64	variance: 144.18
929
Track generation: 1064..1334 -> 270-tiles track
Epoch: 929
BC	mean: 910.58	variance: 144.11
930
Track generation: 1276..1599 -> 323-tiles track
Epoch: 930
BC	mean: 910.53	variance: 144.05
931
Track generation: 1091..1368 -> 277-tiles track
Epoch: 931
BC	mean: 909.82	variance: 145.59
932
Track generation: 1184..1484 -> 300-tiles track
Epoch: 9

[004/030] 159.39 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[005/030] 166.81 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[006/030] 187.53 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[007/030] 189.11 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[008/030] 194.18 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[009/030] 184.89 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[010/030] 195.33 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[011/030] 191.48 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[012/030] 197.47 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[013/030] 196.31 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[014/030] 203.66 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[015/030] 210.49 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[016/030] 202.45 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[017/030] 203.17 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[018/030] 203.47 sec(s) Train Loss: 0.000024 | Val loss: 0.000029
[019/030] 

Epoch: 1059
BC	mean: 903.99	variance: 147.99
1060
Track generation: 1129..1417 -> 288-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1038..1303 -> 265-tiles track
Epoch: 1060
BC	mean: 903.98	variance: 147.92
1061
Track generation: 1067..1344 -> 277-tiles track
Epoch: 1061
BC	mean: 903.60	variance: 148.38
1062
Track generation: 1135..1423 -> 288-tiles track
Epoch: 1062
BC	mean: 903.58	variance: 148.31
1063
Track generation: 1257..1575 -> 318-tiles track
Epoch: 1063
BC	mean: 903.18	variance: 148.82
1064
Track generation: 1358..1702 -> 344-tiles track
Epoch: 1064
BC	mean: 903.03	variance: 148.84
1065
Track generation: 1180..1479 -> 299-tiles track
Epoch: 1065
BC	mean: 902.95	variance: 148.79
1066
Track generation: 946..1192 -> 246-tiles track
Epoch: 1066
BC	mean: 902.67	variance: 149.00
1067
Track generation: 1016..1283 -> 267-tiles track
Epoch: 1067
BC	mean: 902.67	variance: 148.93
1068
Track generation: 1124..1409 -> 285-til

Epoch: 1135
BC	mean: 894.86	variance: 156.34
1136
Track generation: 1070..1341 -> 271-tiles track
Epoch: 1136
BC	mean: 894.70	variance: 156.37
1137
Track generation: 1140..1430 -> 290-tiles track
Epoch: 1137
BC	mean: 894.66	variance: 156.31
1138
Track generation: 1308..1639 -> 331-tiles track
Epoch: 1138
BC	mean: 894.75	variance: 156.27
1139
Track generation: 1096..1374 -> 278-tiles track
Epoch: 1139
BC	mean: 894.86	variance: 156.24
1140
Track generation: 1171..1468 -> 297-tiles track
Epoch: 1140
BC	mean: 894.33	variance: 157.20
1141
Track generation: 1044..1309 -> 265-tiles track
Epoch: 1141
BC	mean: 894.35	variance: 157.13
1142
Track generation: 1183..1483 -> 300-tiles track
Epoch: 1142
BC	mean: 894.14	variance: 157.23
1143
Track generation: 1056..1324 -> 268-tiles track
Epoch: 1143
BC	mean: 894.12	variance: 157.16
1144
Track generation: 1189..1490 -> 301-tiles track
Epoch: 1144
BC	mean: 894.10	variance: 157.09
1145
Track generation: 1172..1465 -> 293-tiles track
Epoch: 1145
BC	mean:

Epoch: 1213
BC	mean: 887.73	variance: 161.86
1214
Track generation: 1176..1479 -> 303-tiles track
Epoch: 1214
BC	mean: 887.84	variance: 161.83
1215
Track generation: 1144..1437 -> 293-tiles track
Epoch: 1215
BC	mean: 887.83	variance: 161.77
1216
Track generation: 1151..1443 -> 292-tiles track
Epoch: 1216
BC	mean: 887.60	variance: 161.90
1217
Track generation: 1118..1409 -> 291-tiles track
Epoch: 1217
BC	mean: 887.70	variance: 161.87
1218
Track generation: 1124..1409 -> 285-tiles track
Epoch: 1218
BC	mean: 887.63	variance: 161.83
1219
Track generation: 971..1223 -> 252-tiles track
Epoch: 1219
BC	mean: 887.49	variance: 161.84
1220
Track generation: 1212..1519 -> 307-tiles track
Epoch: 1220
BC	mean: 887.47	variance: 161.77
1221
Track generation: 1411..1768 -> 357-tiles track
Epoch: 1221
BC	mean: 887.57	variance: 161.74
1222
Track generation: 1127..1413 -> 286-tiles track
Epoch: 1222
BC	mean: 887.55	variance: 161.67
1223
Track generation: 1105..1385 -> 280-tiles track
Epoch: 1223
BC	mean: 

Epoch: 1292
BC	mean: 885.74	variance: 161.65
1293
Track generation: 1336..1674 -> 338-tiles track
Epoch: 1293
BC	mean: 885.75	variance: 161.59
1294
Track generation: 1384..1734 -> 350-tiles track
Epoch: 1294
BC	mean: 885.22	variance: 162.63
1295
Track generation: 1186..1487 -> 301-tiles track
Epoch: 1295
BC	mean: 885.27	variance: 162.58
1296
Track generation: 1299..1628 -> 329-tiles track
Epoch: 1296
BC	mean: 885.33	variance: 162.53
1297
Track generation: 1198..1502 -> 304-tiles track
Epoch: 1297
BC	mean: 885.17	variance: 162.57
1298
Track generation: 981..1232 -> 251-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1099..1378 -> 279-tiles track
Epoch: 1298
BC	mean: 885.16	variance: 162.51
1299
Track generation: 1124..1409 -> 285-tiles track
Epoch: 1299
BC	mean: 885.20	variance: 162.45
1300
Track generation: 1065..1338 -> 273-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track gen

Epoch: 1370
BC	mean: 879.95	variance: 168.15
1371
Track generation: 1191..1493 -> 302-tiles track
Epoch: 1371
BC	mean: 879.95	variance: 168.09
1372
Track generation: 1120..1404 -> 284-tiles track
Epoch: 1372
BC	mean: 879.97	variance: 168.03
1373
Track generation: 990..1246 -> 256-tiles track
Epoch: 1373
BC	mean: 879.57	variance: 168.65
1374
Track generation: 1279..1612 -> 333-tiles track
Epoch: 1374
BC	mean: 879.48	variance: 168.61
1375
Track generation: 987..1239 -> 252-tiles track
Epoch: 1375
BC	mean: 879.50	variance: 168.55
1376
Track generation: 1128..1414 -> 286-tiles track
Epoch: 1376
BC	mean: 879.50	variance: 168.49
1377
Track generation: 1176..1474 -> 298-tiles track
Epoch: 1377
BC	mean: 879.61	variance: 168.48
1378
Track generation: 1315..1657 -> 342-tiles track
Epoch: 1378
BC	mean: 879.69	variance: 168.44
1379
Track generation: 1204..1508 -> 304-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1036..1303 -> 267-tile

Epoch: 1448
BC	mean: 877.61	variance: 170.72
1449
Track generation: 1209..1515 -> 306-tiles track
Epoch: 1449
BC	mean: 877.56	variance: 170.67
1450
Track generation: 1173..1471 -> 298-tiles track
Epoch: 1450
BC	mean: 877.39	variance: 170.73
1451
Track generation: 1132..1419 -> 287-tiles track
Epoch: 1451
BC	mean: 877.40	variance: 170.67
1452
Track generation: 1147..1438 -> 291-tiles track
Epoch: 1452
BC	mean: 877.41	variance: 170.61
1453
Track generation: 1147..1438 -> 291-tiles track
Epoch: 1453
BC	mean: 877.28	variance: 170.63
1454
Track generation: 1227..1538 -> 311-tiles track
Epoch: 1454
BC	mean: 877.28	variance: 170.57
1455
Track generation: 950..1195 -> 245-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1187..1488 -> 301-tiles track
Epoch: 1455
BC	mean: 877.10	variance: 170.64
1456
Track generation: 1111..1393 -> 282-tiles track
Epoch: 1456
BC	mean: 876.96	variance: 170.67
1457
Track generation: 1203..1516 -> 313-til

In [49]:
import math
import random
beta=1
my_prob = ['expert']*math.floor((beta*100)) + ['dagger']*math.floor((1-beta)*100)
random.choice(my_prob)

'expert'

In [None]:
env.close()