In [46]:
import torch
import torch.nn as nn
import random
import pickle
import numpy as np
from collections import namedtuple
import cv2
Transition = namedtuple('Transition',
                        ('obs', 'action', 'next_obs', 'reward', 'done'))


In [40]:
class ReplayMemory(object):
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

    def save_memory(self,savepath):
        with open(savepath,'wb') as f:
            pickle.dump(self.memory,f)

    def load_memory(self, loadpath, loadpath2 = None):
        with open(loadpath,'rb') as f:
            transitions = pickle.load(f)
            print(len(transitions))
        if loadpath2 != None:
            with open(loadpath2,'rb')as f:
                transitions2 = pickle.load(f)
                print(len(transitions2))
                print(transitions2[0])
            transitions = transitions+transitions2
            print(len(transitions))
        return transitions


In [41]:
memory = ReplayMemory(100000)
transitions = memory.load_memory('./mem7-3.pickle','./mem.pickle')

2261
2048
Transition(obs=tensor([[47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        ...,
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407]]), action=tensor([1]), next_obs=tensor([[47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        ...,
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407]]), reward=tensor([0.]), done=False)
4309


In [42]:
#print(len(transitions))
print(transitions[2500][0].numpy().shape)

(200, 200)


In [67]:
img = transitions[99][2].numpy()
print(torch.from_numpy(img).float())
#def rgb2gray(rgb):
#    return np.dot(rgb[:,:,:3], [0.2989, 0.5870, 0.1140])
#img = rgb2gray(img)
#print(img)
#cv2.imwrite('color_img.jpg', img)
#cv2.imshow("image", img)
#cv2.waitKey()


tensor([[47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        ...,
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407]])


In [44]:
img = transitions[99][2].numpy()
print(img)
#def rgb2gray(rgb):
#    return np.dot(rgb[:,:,:3], [0.2989, 0.5870, 0.1140])
#img = rgb2gray(img)
#print(img)

[[47.6407 47.6407 47.6407 ... 47.6407 47.6407 47.6407]
 [47.6407 47.6407 47.6407 ... 47.6407 47.6407 47.6407]
 [47.6407 47.6407 47.6407 ... 47.6407 47.6407 47.6407]
 ...
 [47.6407 47.6407 47.6407 ... 47.6407 47.6407 47.6407]
 [47.6407 47.6407 47.6407 ... 47.6407 47.6407 47.6407]
 [47.6407 47.6407 47.6407 ... 47.6407 47.6407 47.6407]]


In [None]:
import cv2
frame = cv2.cvtColor(np.uint8(transitions[0][0].numpy()), cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (235*1,
                    200*1),
                    interpolation=cv2.INTER_NEAREST)
cv2.imshow('Wimblepong', frame)
#cv2.waitKey(max(1000//30, 1))

In [None]:

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def rgb2gray(rgb):
    return np.dot(rgb[:,:,:3], [0.2989, 0.5870, 0.1140])

img = transitions[0][0]  
gray = rgb2gray(img)    
plt.imshow(img, cmap=plt.get_cmap('rgb'), vmin=0, vmax=1)
plt.show()

In [None]:
memory = ReplayMemory(100)
for i in range(100):
    ob1 = torch.rand(200,200,3)
    action = torch.tensor([0])
    ob1_next = torch.rand(200,200,3)
    reward = torch.tensor([0.])
    done = False
    memory.push(ob1,action,ob1_next,reward,done)
sample = memory.sample(3)    
memory.pickle_rick('./mem.pickle')
transitions = memory.load_pickle_rick('./mem.pickle')

In [60]:
class CNNDQN(nn.Module):
    def __init__(self,input_channels, num_actions):
        super(CNNDQN,self).__init__()
        self.Conv1 = nn.Conv2d(1, 16, 5, stride=1)# 200->198
        self.ReLU1 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)# 198->99
        self.Conv2 = nn.Conv2d(16,32,5) #12->8 and second pool 8->4
        self.ReLU2 = nn.ReLU()
        
        self.Linear1 = nn.Linear(32*100*100,64)#eli input channelit * imagesize * imagesize
        self.ReLU3 = nn.ReLU()
        self.Linear2 = nn.Linear(64,16)
        self.ReLu4 = nn.ReLU()
        self.Linear3 = nn.Linear(16,3)

        
    def forward(self, x):
        # YOUR CODE HERE
        print(x.shape)
        x = self.Conv1(x)
        x = self.pool(self.ReLU1(x))
        x = self.pool(self.ReLU2(self.Conv2(x)))
        x = x.view(-1,self.num_flat_features(x))
        x = self.ReLU3(self.Linear1(x))
        x = self.ReLu4(self.Linear2(x))
        x = self.Linear3(x)
        return x

    #source 
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [62]:
device = 'cpu'
DQN = CNNDQN(1,3).to(device)
print(DQN)
state_batch = torch.stack(transitions[0]).to(self.train_device)
transition = transitions[0]
frame = transition[0].to(device)
print(frame)
DQN.forward(frame)

CNNDQN(
  (Conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (ReLU1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (ReLU2): ReLU()
  (Linear1): Linear(in_features=320000, out_features=64, bias=True)
  (ReLU3): ReLU()
  (Linear2): Linear(in_features=64, out_features=16, bias=True)
  (ReLu4): ReLU()
  (Linear3): Linear(in_features=16, out_features=3, bias=True)
)
tensor([[47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        ...,
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407],
        [47.6407, 47.6407, 47.6407,  ..., 47.6407, 47.6407, 47.6407]])
torch.Size([200, 200])


RuntimeError: Expected 4-dimensional input for 4-dimensional weight 16 1, but got 2-dimensional input of size [200, 200] instead

In [109]:
glie_a = 500
episodes = 10000

In [110]:
eps = []
for i in range(episodes):
    #if i%2==0:
    if i/episodes<0.5:
        eps.append((glie_a)/(glie_a+i*2))
    else:
        eps.append(0)

In [111]:
print(eps[4999])

0.04762811964183654
