<a href="https://colab.research.google.com/github/jakesnakelou/Computer-Vision/blob/convolution_attempt.ipynb/SnakeModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
data.keys()

In [1]:
pip install torchmetrics



In [2]:
from random import randint
import numpy as np
import pandas as pd
import json
import time
import torch
import torchmetrics
import pandas as pd
pd.set_option('display.float_format',lambda x: '%.5f' % x)
np.set_printoptions(threshold=np. inf,suppress=True, precision=4)

In [3]:
'''Here we create a dataset object that allows to access the different elements of our data
In: Arrays: image, DirectionPointing, compassMoves,FirstPersonMoves, Reward
Out: [All inputs in a Class that is callable]
'''

class Dataset(torch.utils.data.Dataset):
    def __init__(self, image, DirectionPointing, CompassMoves, FirstPersonMoves, y):
        self.image = image.astype('float')
        self.DirectionPointing = DirectionPointing.astype('float')
        self.CompassMoves = CompassMoves.astype('float')
        self.FirstPersonMoves = FirstPersonMoves.astype('float')        
        self.y = y.astype('float')

    # Not dependent on index+
    def __getitem__(self, index):
        return self.image[index],self.DirectionPointing[index],self.CompassMoves[index], self.FirstPersonMoves[index], self.y[index]

    def __len__(self):
        return len(self.y)

In [4]:
'''
Take our Dataset, split the data, and put them into dataloaders
In: Dataset class, Train split, val split, batchsize
Out: [Dictionary 1: train dataset, val dataset, test dataset
      Dictionary 2: train dataloader, val dataloader, test dataloader 
    ]
'''


def DatasetsAndDataloaders(dataset, train_perc, val_perc, batch_size):
    
    def create_TTV_splits(train_perc, val_perc,dataset):
        assert train_perc + val_perc < 1, 'val and train percent should add up to <1'
        length = len(dataset)
        trainSize = int(train_perc * length)
        TestValSize = int(length - trainSize)
        valSize = int(val_perc * length)
        TestSize = int(TestValSize - valSize)

        train_dataset, validation_dataset = torch.utils.data.random_split(
            dataset, [trainSize, TestValSize])  ## split into 1000 training & 797 validation
        validation_dataset, test_dataset  = torch.utils.data.random_split(
            validation_dataset, [valSize, TestSize])  ## get test set from validation set

        return train_dataset, validation_dataset, test_dataset

    def create_dataloaders(train_dataset, validation_dataset, test_dataset, batch_size):
        train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, )   ### dataloader batches the data
        val_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size)
        test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
        return train_dataloader, val_dataloader, test_dataloader
    
    datasets, dataloaders = {}, {}
    train_dataset, validation_dataset, test_dataset = create_TTV_splits(train_perc, val_perc, dataset)
    train_dataloader, val_dataloader, test_dataloader = create_dataloaders(train_dataset, validation_dataset, test_dataset, batch_size)
    
    datasets['train'], datasets['val'], datasets['test'] = train_dataset, validation_dataset, test_dataset
    dataloaders['train'], dataloaders['val'], dataloaders['test'] = train_dataloader, val_dataloader, test_dataloader
    return datasets, dataloaders

In [5]:
'''Set of functions that convert the snake data into usable arrays

In: Dictionary -----> data, ArrayMax(estimate_of_size), Reward parameters. 
Out: Image, DirectionPointing, Compassmoves, FirstPersonMoves, reward ---> Arrays
'''

######  Master Function that calls all functions below

def CreateAllXYInputs(data, ArrayMax, EatReward, EatDecayRate, EatDecayPeriod, DieReward, FutureReward):
    Images,frames =  CreateImagesAllGames(ArrayMax,data)
    FirstPersonMoves = CreateDirectionArrayManyGames(data,ArrayMax,'FaceDirectionChoice',3)
    CompassMoves = CreateDirectionArrayManyGames(data,ArrayMax,'CompassDirectionChoice',4)
    DirectionPointing = CreateDirectionArrayManyGames(data,ArrayMax, 'StartingDirection',4)
    reward = CreateBigArrayofRewardFromManyGames(data, EatReward, EatDecayRate, EatDecayPeriod, DieReward, FutureReward)
    Images, FirstPersonMoves, reward, DirectionPointing, CompassMoves =Images[:frames], FirstPersonMoves[:frames], reward[:frames], DirectionPointing[:frames], CompassMoves[:frames]
    return Images, DirectionPointing, CompassMoves, FirstPersonMoves, reward


###Helper: The Dictionary keys that list the moves made are strings. To look at them sequentially we need to them to be integers
def TurnDictKeysToInt(data,instance,dic):
    return {int(k):v for k,v in data[instance][dic].items()}

################################################################################################################ 
##
##  Function that takes the body and food values for all games, and turns them into a 20x20x3 array 
# #

def CreateImagesAllGames(ArrayMax, data):

    ###Helper function that convers the body and food values for ONE game.

    def CreateImageForOneGame(instance,data): ###Array Max is how big we set the initial array of all zeros, that gets populated.
        IntegerKeysDictBody = TurnDictKeysToInt(data,instance,'body')
        IntegerKeysDictFood = TurnDictKeysToInt(data,instance,'FoodPos')
        plays = len(IntegerKeysDictBody)
        image = np.zeros((plays,3,20,20))

        ###Channel 1 = Tail, Channel 2 = Head, Channel 3 = Food
        for key, value in IntegerKeysDictBody.items():
            for idx, point in enumerate(value):
                y = 20-point[0] ### when we 'view' our game like the snake.py, y is at the top
                x = point[1]-1 ### In our snake game, the arrays start at 1, and go to 20. We want 0->19
                if idx != 0:            #tail
                    image[int(key),0,y,x] =1
                if idx == 0:            #head
                    image[int(key),1,y,x] =1          

        for key, value in IntegerKeysDictFood.items():    #food
            y = 20-value[0] #-1 because the stored array starts at 1, instead of 0
            x = value[1]-1
            image[int(key),2,y,x] =1 

        return image
    
    AllGamesImage = np.zeros((ArrayMax,3,20,20))
    frames_start=0 ###We need to keep a track of how many frames we have in case we use more than the size of the array we specified in array max.
    for game in data.keys():
        tempArray = CreateImageForOneGame(game,data) 
        frames_end = frames_start+ (len(tempArray)) #to create start and end for storing the x values
        #if there are two many values compared to max array, we end the function there.
        if frames_end > ArrayMax:
            return AllGamesImage, frames_start
        AllGamesImage[frames_start:frames_end] = tempArray
        frames_start = frames_end #set frames start to frames end for next iteration
    return AllGamesImage, frames_end

################################################################################################################ 
##
##  Functions that take Current Direction Faced, Compass Choice and turn them into one arrays with dummy variables
# #

####Helper function that turns integer based direction to dummy(Hot) variables
def TurnDirectionsToHot(HotDict,Options):
        moves = np.array(list(HotDict.values()))        
        HotMoves = np.zeros((len(moves),Options-1))
        for idx, move in enumerate(moves):
          if move!=0:
            HotMoves[idx,move-1] = 1 #if move==0, nothing is set, if move=1(right), index(0) is set etc.
        return HotMoves

def CreateDirectionArrayManyGames(data, ArrayMax, OrigDictKey, NoOfOptions):
    frames_start = 0
    AllGamesDirections = np.zeros((ArrayMax,NoOfOptions-1))
    for game in data.keys():
        IntegerKeysDictDirections = TurnDictKeysToInt(data, game, OrigDictKey)
        tempArray = TurnDirectionsToHot(IntegerKeysDictDirections, NoOfOptions)
        frames_end = frames_start + (len(tempArray)) #to create start and end for storing the x values
        if frames_end > ArrayMax:
            return AllGamesDirections
        AllGamesDirections[frames_start:frames_end] = tempArray
        frames_start = frames_end #set frames start to frames end for next iteration
    return AllGamesDirections ##1+ because the first value is all 0s from when we created the np.zeros array.

################################################################################################################ 
##
##  Function that converts the list of 'current score' into a reward function
# #

def CreateBigArrayofRewardFromManyGames(data, EatReward, EatDecayRate, EatDecayPeriod, DieReward, FutureReward):
    '''
    Eat Reward: The reward for eating food on a particular turn
    EatDecayRate: The decay rate of reward to previous values
    EatDecayPeriod: How long to assign reward historically (previous steps before the step of actually eating)
    Die Reward: The reward(negative value) for dying on a particular turn.
    Future Reward: currently redundant
    '''
    ####Helper function to calculate reward for one game.
    def reward(instance,data, EatReward,EatDecayRate, EatDecayPeriod, DieReward, FutureReward):
        IntegerKeysDictReward = TurnDictKeysToInt(data,instance,'CurrentScore')
        CurrentScore = np.array(list(IntegerKeysDictReward.values())).astype('float')
  
        ####Convert Current Score to moments of Eating food. Take [1:] slice because score is captured in the following game state
        MovesThatCaptureFood = (CurrentScore[1:] - CurrentScore[:len(CurrentScore)-1])*EatReward
        RewardTotal = MovesThatCaptureFood.copy()
        for i in range(EatDecayPeriod):
            ##Take reward from moves i in the future, and apply a powered decay rate
            DecayedReward = np.append(MovesThatCaptureFood[i+1:],np.zeros((i+1)))*(EatDecayRate)**(i+1)
            ##Add this Decayed Reward to the previous reward a move had
            RewardTotal += DecayedReward
        #so total reward is +4 for capturing food, and + 1 for all future food
        RewardArray = np.append(RewardTotal,np.array([0])) ##append 0 because we previously just took [1:]
        RewardArray[len(RewardArray)-1] = DieReward #sets final element in sequence to die reward
        return RewardArray

    AllGamesReward = np.zeros((1,)).astype('float')
    for game in data.keys():
        tempArray = reward(game, data, EatReward,EatDecayRate, EatDecayPeriod, DieReward, FutureReward)
        AllGamesReward = np.append(AllGamesReward, tempArray, axis=0)
    return AllGamesReward[1:].reshape(-1,1)

In [58]:
 ####### Upload external file #######

from google.colab import files
uploaded = files.upload()

Saving snakeLogs.json to snakeLogs (1).json


In [6]:
#we have a 20 by 20 matrix, with a body and a head covering 3+ squares. We can represent the head,tail and food in a 20x20x3(channels) array
j = open('snakeLogs (1).json')
data = json.load(j)

In [71]:
ArrayMax = 200000
EatReward = 3
DieReward = -1
FutureReward = 0
EatDecayRate, EatDecayPeriod = 0.25, 4
Images, DirectionPointing, CompassMoves, FirstPersonMoves, reward = CreateAllXYInputs(data, ArrayMax, EatReward, EatDecayRate, EatDecayPeriod, DieReward, FutureReward)

In [72]:
Images.shape,FirstPersonMoves.shape,reward.shape,DirectionPointing.shape,CompassMoves.shape

((199998, 3, 20, 20), (199998, 2), (199998, 1), (199998, 3), (199998, 3))

In [73]:
# # ### Function to check things are working

# # # #0 = up, 1 = right, 2 = left
# n,k = 190,196
# print('reward',reward[n:k])
# print('FirstPersonMove',FirstPersonMoves[n:k])
# print('CompassMove',CompassMoves[n:k])
# print('DirectionPointing',DirectionPointing[n:k])
# for i in Images[n:k]:
#   ConvertImageToOrigView(i)
# print(Images[n:k]) #,(0,1,3,2))) #this puts y (which we put first, second in the printing)

In [74]:
print(np.mean(reward))

0.024777259491344912


#### Overview of Variables
----
Trainable X (State): Body, Direction Pointining

Non-trainable X (Decision): Moves

Target Y: reward

In [75]:
dataset = Dataset(Images, DirectionPointing, CompassMoves, FirstPersonMoves,reward)

In [76]:
#clearing the RAM
del Images
del DirectionPointing
del CompassMoves
del FirstPersonMoves

In [77]:
#note, the very large batch size here may have helped?
BatchSize = 2000
datasets, dataloaders = DatasetsAndDataloaders(dataset, 0.7,0.2,BatchSize)

In [78]:
'''Testing to see if we get good shapes'''
counter = 0
for image, starting_direction, compass_move, first_person_move, y in datasets['train']:
    counter +=1
    if counter > 2:
        break
    print(image.shape,starting_direction.shape,compass_move.shape,first_person_move.shape,y.shape)

(3, 20, 20) (3,) (3,) (2,) (1,)
(3, 20, 20) (3,) (3,) (2,) (1,)


In [144]:
'''
In: Input, Compass Move

+Image
-----> 1x1 conv net
-----> 8 adjacent conv nets of different sizes
-----> Dense Layer with dropout -> 17 features

+CompassMove (3 features)
-----> Dense layer with dropout ->40 features
-----> Final dense layer outputting predicted reward.
'''

class SnakeModel(torch.nn.Module):
    def __init__(self,DenseFinalLayerSize):
        super().__init__()
        #general use
        self.tanh = torch.nn.Tanh()
        self.relu = torch.nn.ReLU()
        #can we add more channels in first section?
        #we can add 3,5,7 filters and just flatten before concatting.

        ######Convolutional Layers
        self.conv0 = torch.nn.Conv2d(3,3,kernel_size=1,stride=1,padding=(0,0)); torch.nn.init.xavier_uniform_(self.conv0.weight)
        self.conv1 = torch.nn.Conv2d(3,4,kernel_size=2,stride=1,padding=(0,0)); torch.nn.init.xavier_uniform_(self.conv1.weight)
        self.conv2 = torch.nn.Conv2d(3,4,kernel_size=4,stride=1,padding=(1,1)); torch.nn.init.xavier_uniform_(self.conv2.weight)
        self.conv3 = torch.nn.Conv2d(3,4,kernel_size=6,stride=1,padding=(2,2)); torch.nn.init.xavier_uniform_(self.conv3.weight)
        self.conv4 = torch.nn.Conv2d(3,4,kernel_size=8,stride=1,padding=(3,3)); torch.nn.init.xavier_uniform_(self.conv4.weight)
        
        self.flatten = torch.nn.Flatten()
        
        ######Dense Layers
        self.DenseLayer1 = torch.nn.Linear(5776,DenseFinalLayerSize); torch.nn.init.xavier_uniform_(self.DenseLayer1.weight)
        self.dropout1= torch.nn.Dropout(0.25)
        self.DenseLayer2 = torch.nn.Linear(400,DenseFinalLayerSize); torch.nn.init.xavier_uniform_(self.DenseLayer2.weight) 
        self.FinalDenseLayer1 = torch.nn.Linear(DenseFinalLayerSize+3,40);torch.nn.init.xavier_uniform_(self.FinalDenseLayer1.weight)
        self.dropout2= torch.nn.Dropout(0.2)
        self.FinalDenseLayer3 = torch.nn.Linear(40,1);torch.nn.init.xavier_uniform_(self.FinalDenseLayer3.weight)        
        
    def forward(self,image,move): #direction removed
        #conv layer
        x1 = self.conv1(image)
        x1 = self.relu(x1)
        x2 = self.conv2(image)
        x2 = self.relu(x2)
        x3 = self.conv3(image)
        x3 = self.relu(x3)
        x4 = self.conv4(image)
        x4 = self.relu(x4)
        x = torch.cat((x1,x2,x3,x4),1)
                      
        #dense
        x = self.flatten(x)
        x = self.dropout1(x)
        x = self.DenseLayer1(x)
        x = self.tanh(x)

        ###Add in Move
        x = torch.cat((x,move),1)
        x = self.FinalDenseLayer1(x)
        x = self.dropout2(x)
        x = self.relu(x)
        x = self.FinalDenseLayer3(x)

        return x

In [145]:
FeaturesFromConvLayer = 17
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print(f'Device: {device}')

#Create Model
Model = SnakeModel(FeaturesFromConvLayer)
#Set model to GPU
Model = Model.to(device)
#create optimizer
optimizer = torch.optim.Adam(Model.parameters())
#create loss metrics
criterion = torch.nn.MSELoss()
#create metrics(MSE & MAE)
metrics = {}
metrics['MSE'], metrics['MAE'] = torchmetrics.MeanSquaredError().to(device), torchmetrics.MeanAbsoluteError().to(device)

Device: cuda


In [138]:
# #######Testing we get the right shape from the dataloader
# testimage, _, _, _, testy = next(iter(dataloaders['val']))
# print(testimage.shape, testy.shape)#,testdirection.shape,testcompmove.shape
# del testimage
# del testy

torch.Size([2000, 3, 20, 20]) torch.Size([2000, 1])


In [139]:
#we can rearrange the order of the trainepoch to put the loss and optimizer last, then create a helper function so that train and eval share things.
def ModelOutputAndMetrics(batch, model, metrics, device, TTV='train'):
    '''
    In: dataloaders, the model, the metrics dict, the device & whether it's train or val
    Out: creates output, and prints metric --> can pass output to loss/optimizer for train
    '''
    image, starting_direction, compass_move, first_person_move, y = batch
    image, starting_direction, compass_move, y = image.to(device), starting_direction.to(device), compass_move.to(device), y.to(device)
    image, starting_direction, compass_move, y = image.float(), starting_direction.float(), compass_move.float(),y.float()
    output = model(image, compass_move)    
    for metric in metrics.values():
        metric(output,y)
    return output, y

def TrainEpoch(dataloaders, model, optimizer, criterion, metrics,device):
    model.train()
    for batch in dataloaders['train']:
        output, y = ModelOutputAndMetrics(batch, model, metrics, device, 'train')
        loss = criterion(output, y)
        loss.backward()
        optimizer.step(); optimizer.zero_grad()
    for MetricName, metric in metrics.items():
        met = metric.compute()
        print(MetricName,': ', met.item())
        metric.reset() 

def ValEpoch(dataloaders, model, optimizer, criterion, metrics,device):
    model.eval()
    with torch.no_grad():
        for batch in dataloaders['val']:
            output, y = ModelOutputAndMetrics(batch, model, metrics, device, 'train')
        for MetricName, metric in metrics.items():
            met = metric.compute()
            if MetricName == 'MSE':
                MSE = met
            print(MetricName,': ', met.item())
            metric.reset() 
                
    return MSE

In [146]:
MSE = float('inf')
patience =0
epochs =15
for epoch in range(epochs):
  print(f'Epoch {epoch} Train Metrics: ')
  TrainEpoch(dataloaders, Model, optimizer, criterion, metrics,device)
  if epoch % 3 == 0:
    print(f'Epoch {epoch} Val Metrics: ')
    trial_MSE = ValEpoch(dataloaders,Model,optimizer,criterion,metrics,device)
    if trial_MSE < MSE:
      MSE = trial_MSE
      patience=0
      print(patience)
    else:
      patience+=1
      print(patience)
      if patience > 1 and epoch > 2 :
        print('early stop')
        break

Epoch 0 Train Metrics: 
MSE :  0.10682223737239838
MAE :  0.10790999233722687
Epoch 0 Val Metrics: 
MSE :  0.09964696317911148
MAE :  0.07506545633077621
0
Epoch 1 Train Metrics: 
MSE :  0.09836263209581375
MAE :  0.08174990117549896
Epoch 2 Train Metrics: 
MSE :  0.0951405018568039
MAE :  0.08489764481782913
Epoch 3 Train Metrics: 
MSE :  0.08896751701831818
MAE :  0.09209485352039337
Epoch 3 Val Metrics: 
MSE :  0.08182702958583832
MAE :  0.0858367532491684
0
Epoch 4 Train Metrics: 
MSE :  0.07448562234640121
MAE :  0.09418434649705887
Epoch 5 Train Metrics: 
MSE :  0.05555934086441994
MAE :  0.07335248589515686
Epoch 6 Train Metrics: 
MSE :  0.04617375507950783
MAE :  0.06039992719888687
Epoch 6 Val Metrics: 
MSE :  0.04959045350551605
MAE :  0.05534686893224716
0
Epoch 7 Train Metrics: 
MSE :  0.04047350212931633
MAE :  0.054643817245960236
Epoch 8 Train Metrics: 
MSE :  0.03814012184739113
MAE :  0.051216401159763336
Epoch 9 Train Metrics: 
MSE :  0.03419393673539162
MAE :  0.0476

In [128]:
##############################################
#
#
##### Functions to help us check the model is working

def return_available_directions(current_dir_array):
  '''
  In: current direcetion
  Out: Available directions for the snake to go next
  '''
  ###extract current distance in 0,1,2,3 dialect
  if np.any(current_dir_array) !=  0:
    current_dir = np.argmax(current_dir_array)+1
  else:
    current_dir = 0

  if current_dir == 0:
    return [0,1,3]
  elif current_dir ==1:
    return [0,1,2]
  elif current_dir == 2:
    return [1,2,3]
  else:
    return [0,2,3]

##Checking function by inputting "Up"
print(f'Up returns: {return_available_directions(np.array([0,0,0]))}\nDown Returns:{return_available_directions(np.array([0,1,0]))}')


def ConvertImageToOrigView(image):
  '''
  In: Image in 3x20x20 form
  Output: List of coordinates for tail, head, body
  '''

  TailList, HeadList, FoodList = [], [], []
  def ConvertOneFrame(BlankList, image,idx):
    for rowidx, row in enumerate(image[idx,:,:]): 
      for colidx, col in enumerate(row):
        if col == 1:
          y = 20-rowidx
          x = colidx+1
          BlankList.append([y,x])
    return BlankList
  TaiList = ConvertOneFrame(TailList,image,0)
  HeadList = ConvertOneFrame(HeadList,image,1)
  FoodList = ConvertOneFrame(FoodList,image,2)
  print(f'Tail: {TailList},Head: {HeadList},Food: {FoodList}')
  return 

####Testing this function
print(f'Test for image conversion:')
ConvertImageToOrigView(images[0])

Up returns: [0, 1, 3]
Down Returns:[1, 2, 3]
Test for image conversion:
Tail: [[10, 14], [10, 15]],Head: [[9, 15]],Food: [[7, 14]]


In [129]:
########
#
#Take one batch of the data and test three different direction choices

@torch.no_grad()
def MakePrediction(dataloaders, model, TTV):
  '''
  In: dataloaders, train/test/val and the model
  Out: An array with the predicted reward for each different direction the snake could choose for each example (excluding unavailable direction based on starting direction)
  '''  
  model.eval()
  #Get the image and starting direction as we need it.
  images, starting_directions, compass_move, first_person_move, reward = next(iter(dataloaders[TTV]))
  images,starting_directions = images.to(device), starting_directions.to(device)
  images, starting_directions = images.float(), starting_directions.float()
  
  #Create an array with 4 options, and the length of the batch
  ArrayToStoreChoices = np.zeros((len(starting_directions),4))+1e-9

  
  for idx, (image, starting_direction) in enumerate(zip(images, starting_directions)):
    #Get the options we want to test, based on starting direction
    available_choices = return_available_directions(starting_direction.cpu().numpy())
    #Reshape the image and starting direction to add a first dimension to match the model shape
    image, starting_direction = torch.reshape(image,(1,3,20,20)).to(device), torch.reshape(starting_direction,(1,3)).to(device)

    #Iterate through available options, run the model, and store in the relevant idx of the array
    for move in available_choices:
      #this 'one hots' the move
      test_move = torch.zeros(1,3).to(device)
      if move!=0:
         test_move[0,move-1] = 1
      prediction=model(image, test_move).cpu().detach().numpy()[0][0] #remove from gpu, detach from grads, make it a numpy and take the values!
      ArrayToStoreChoices[idx,move] = prediction
  return ArrayToStoreChoices, images

def GetSummaryStats(ChoiceValues):
  print(f'Means: Up, Right, Down, Left: {np.mean(ChoiceValues,axis=0)}')
  print(f'Average Range: {np.mean(np.max(ChoiceValues,axis=1)-np.min(ChoiceValues,axis=1))}')
  Picks = np.argmax(ChoiceValues,axis=-1)
  a , b = (np.unique(Picks, return_counts=True))
  print(f'Number of wins: Up, Right, Down, Left: {b}')

In [147]:
ChoiceValues, images = MakePrediction(dataloaders,Model,'val')

In [148]:
GetSummaryStats(ChoiceValues)

Means: Up, Right, Down, Left: [0.0166 0.0097 0.0108 0.0151]
Average Range: 0.08040565894304127
Number of wins: Up, Right, Down, Left: [1091   56  599  254]


In [149]:
'''This returns the output of the choice values for values with a high threshold, and the place the snake was at the time'''

#create df
DFChoiceValues = pd.DataFrame(ChoiceValues)
DFChoiceValues['max'] = DFChoiceValues.max(axis=1)
DFChoiceValues['min'] = DFChoiceValues.min(axis=1)

#get mean and std
ChoiceValFlat = ChoiceValues.flatten()
mean, std = np.mean(ChoiceValFlat), np.std(ChoiceValFlat)
upperThresh = mean+2.2*std
lowerThresh= mean - 2.2*std
#just get high and low values
HighLowValRows = DFChoiceValues[(DFChoiceValues['max'] > upperThresh)] #| (DFChoiceValues['min'] < lowerThresh)]
RowsToLookAt = HighLowValRows.index.tolist()
for row in RowsToLookAt[:10]:
  display(pd.DataFrame(DFChoiceValues.loc[row]).T.rename(columns={0:"Up",1:"Right",2:"Down",3:"Left"}))
  print(ConvertImageToOrigView(images[row]))

Unnamed: 0,Up,Right,Down,Left,max,min
35,1.21314,0.95328,1.72631,0.0,1.72631,0.0


Tail: [[10, 11], [9, 11]],Head: [[9, 12]],Food: [[8, 12]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
138,0.18928,0.20135,0.4566,0.0,0.4566,0.0


Tail: [[11, 11], [10, 11]],Head: [[11, 12]],Food: [[9, 12]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
155,0.0,0.80545,1.29466,0.89007,1.29466,0.0


Tail: [[4, 3], [3, 3]],Head: [[2, 3]],Food: [[2, 2]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
186,0.13871,0.0,0.5287,0.07346,0.5287,0.0


Tail: [[4, 9], [3, 9]],Head: [[3, 8]],Food: [[2, 9]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
214,0.98582,0.93462,0.0,0.53159,0.98582,0.0


Tail: [[10, 12], [10, 13]],Head: [[11, 13]],Food: [[11, 14]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
322,1.66172,1.98835,0.47579,0.0,1.98835,0.0


Tail: [[7, 11], [6, 11]],Head: [[6, 12]],Food: [[6, 13]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
348,0.0,1.62125,1.56404,1.73885,1.73885,0.0


Tail: [[11, 16], [10, 16]],Head: [[9, 16]],Food: [[8, 16]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
402,1.13388,0.60886,0.0,0.9507,1.13388,0.0


Tail: [[7, 7], [7, 8]],Head: [[8, 8]],Food: [[9, 8]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
428,0.0,1.61904,1.19948,2.08447,2.08447,0.0


Tail: [[4, 15], [3, 15]],Head: [[2, 15]],Food: [[2, 14]]
None


Unnamed: 0,Up,Right,Down,Left,max,min
462,0.0,1.72798,1.05263,2.1736,2.1736,0.0


Tail: [[12, 14], [11, 14]],Head: [[10, 14]],Food: [[10, 13]]
None


In [113]:
###################################################################
#
#
#
######For use in .py file: one set of data in format given by .py file.
'''
in: list for tail (N x 2), list for head(1x2), list for food(1x2), value for starting direction(integer)
function: input this to model, and output rewards
out: using either tiny epsilon or softmax, make choice.
'''
#Turn tail, head, food into 1,3,20,20 array
def CreateOneMoveModelInputs(tail, head, food, starting_direction):
  one_im_array = torch.zeros((1,3,20,20)).to(device)

  for idx, point in enumerate(tail):
    y = 20-point[0] ### when we 'view' our game like the snake.py, y is at the top
    x = point[1]-1 ### In our snake game, the arrays start at 1, and go to 20. We want 0->19
    one_im_array[0,0,y,x] =1
  y = 20-head[0]
  x = head[1]-1
  one_im_array[0,1,y,x] = 1
  y = 20-food[0]
  x = food[1]-1
  one_im_array[0,2,y,x] = 1
  available_choices = return_available_directions(starting_direction)
  return one_im_array, available_choices

def CreateRewardEst(image, available_choices,model):
  ArrayToStoreEsts = torch.zeros((1,4))-1e9  ###create one hot version of the move
  for move in available_choices:
    test_move = torch.zeros(1,3).to(device)
    if move!=0:
      test_move[0,move-1] = 1
    #run the move and get a predicted reward
    prediction=model(image, test_move)[0]
    ArrayToStoreEsts[0,move] = prediction
    Probabilities=torch.nn.Softmax(dim=1)(ArrayToStoreEsts)
  return ArrayToStoreEsts, Probabilities

In [157]:
##SampleInputelements
tail = [[4,10],[3,10]]
head = [5,10]
food = [20,11]
starting_direction = 0

timage, tavailablechoices = CreateOneMoveModelInputs(tail, head, food, starting_direction)
logits, probas = CreateRewardEst(timage, tavailablechoices,Model)
print(logits)
print(probas)

tensor([[ 3.8191e-01,  6.9806e-01, -1.0000e+09,  8.5331e-01]],
       grad_fn=<CopySlices>)
tensor([[0.2516, 0.3452, 0.0000, 0.4032]], grad_fn=<SoftmaxBackward>)


In [151]:
pro

NameError: ignored

Old Code

In [None]:
def CreateImagesAllGamesONEFRAME(array_max, data):
  
      def CreateImageForOneGameOneFrame(array_max, instance,data): #instance x 20x20 (two arrays)
          ThisDictBody = TurnDictKeysToInt(data,instance,'body')
          ThisDictFood = TurnDictKeysToInt(data,instance,'FoodPos')
          plays = len(ThisDictBody)
          image = np.zeros((plays,1,20,20))

          for key, value in ThisDictBody.items():
              for idx, point in enumerate(value):
                  y = 20-point[0] #-1 because the stored array starts at 1, instead of 0
                  x = point[1]-1
                  if idx != 0:            #tail
                      image[int(key),0,y,x] = -1
                  if idx == 0:            #head
                      image[int(key),0,y,x] =1         

          for key, value in ThisDictFood.items():    #food
              y = 20-value[0] #-1 because the stored array starts at 1, instead of 0
              x = value[1]-1
              image[int(key),0,y,x] = 10

          return image
        
      AllGamesImage = np.zeros((array_max,1,20,20))
      frames_start=0
      for game in data.keys():
          tempArray = CreateImageForOneGameOneFrame(array_max, game,data) 
          frames_end = frames_start+ (len(tempArray)) #to create start and end for storing the x values
          #if there are two many values compared to max array
          if frames_end > array_max:
              return AllGamesImage, frames_start
              print('error here')
          AllGamesImage[frames_start:frames_end] = tempArray
          frames_start = frames_end #set frames start to frames end for next iteration
      print(AllGamesImage.shape,'imageshapewhenreturning')
      print(frames_end)
      return AllGamesImage, frames_end

In [None]:
# EatReward = 3
# DieReward = -1
# FutureReward = 0
# DecayRate, period = 0.25, 3

# TimerStart = time.time()

# Images,frames =  CreateImagesAllGamesMANYFRAMES(150000,data)

# FirstPersonMoves = CreateDirectionArrayManyGames(data,'FaceDirectionChoice',3)
# CompassMoves = CreateDirectionArrayManyGames(data,'CompassDirectionChoice',4)
# DirectionPointing = CreateDirectionArrayManyGames(data,'StartingDirection',4)

# reward = CreateBigArrayofRewardFromManyGames(data,EatReward,DieReward, FutureReward, DecayRate, period)
# Images, FirstPersonMoves, reward, DirectionPointing, CompassMoves =Images[:frames], FirstPersonMoves[:frames], reward[:frames], DirectionPointing[:frames], CompassMoves[:frames]
# print(f'Time Taken: {int(time.time()-TimerStart)}')