<h3>Readme</h3>
<ol>
<li>Check if the following libraries are installed before running the code. You can check this step by running the first cell below.</li>
    <ul>
     <li>PIL</li>
     <li>sklearn</li>
     <li>moviepy</li>
     <li>scipy</li>
    </ul>
<li>check if your data is arranged as follows</li>
    <ul>
<li>All the directories should be relative to the root path of your web server where the index.html file is located. If your are not running Jupyter Notebook from that root path, restart your Notebook from that path, and then continue.</li>
<li>Your game data folder should come in as:
<pre>
- game
    - expert
      - screenshots
      - embeddings
    - human
      - screenshots
      - embeddings
</pre>
   'expert' and 'human' are just example names for your corpora, you can choose whatever you want as long as you can recognize. These corpora will be displayed as separate buttons when you toggle on the 'corpora' button on the tsnemap page. </li>
    </ul>
<li>Go to the last cell, set the parameters as described.</li>
<li>Run all cells below.</li>
</ol>


In [1]:
%pylab inline
import math
import json
import os
from PIL import Image
from glob import glob
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

from moviepy.editor import *

Populating the interactive namespace from numpy and matplotlib


In [2]:
def resize_to_power_of_two(nSize):
    pos =  math.ceil(math.log2(nSize))  #(ceiling of log n with base 2)
    p   = pow(2, pos) 
    return p

def generate_spritesheet(photoSize, imageDir, outDir, game, corpus, jsonFile): 

    images = glob(imageDir + '/*.png')
    if len(images) == 0:
        images = glob(imageDir + '/*.jpg')
        
    images = sorted(images, key=lambda x: int(''.join(filter(str.isdigit, x))))   
    
    spritesheet_name = outDir + game + '/' + corpus
    print(spritesheet_name)
    
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if corpus not in corporaDict:
        corporaDict[corpus] = {}
    
    # Calculate the size of the output image, based on the photo thumb sizes
    
    totalNum = len(images)
    print(len(images))
    
    ncols = 64
    nrows = math.ceil(totalNum/ncols)
    
    photow = photoSize[0]
    photoh = photoSize[1]
   
    
    imgWidth = ncols*photow
    imgHeight = nrows*photoh
    
    imgWidth_resized = resize_to_power_of_two(imgWidth)
    imgHeight_resized = resize_to_power_of_two(imgHeight)
    
    isize = (imgWidth_resized,imgHeight_resized) # adapt to THREE
    print('imageSize=',isize, "photow", photow, "photoh", photoh, "")

    # Create the new image. 
    white = (0,0,0)
    inew = Image.new('RGB',isize,white)
    
    # Create JSON file to store the UV position of each image
    spriteDict = {}
    spriteDict['rows'] = nrows
    spriteDict['columns'] = ncols
    spriteDict['totalCount'] = totalNum
    spriteDict['spriteWidth'] = photow
    spriteDict['spriteHeight'] = photoh 
    spriteDict['spritesheet'] = {} 
    
    count = 0
    # Insert each thumb:
    for irow in range(nrows):
        for icol in range(ncols):
            left = icol*(photow)
            right = left + photow
            upper = irow*(photoh)
            lower = upper + photoh
            bbox = (left,upper,right,lower)
            try:
                # Read in an image and resize appropriately
                img = Image.open(images[count]).resize((photow,photoh))
            except:
                break
            inew.paste(img,bbox)
            if count not in spriteDict['spritesheet'].keys():
                spriteDict['spritesheet'][count] = {}
            spriteDict['spritesheet'][count]['filename'] = os.path.basename(images[count])
            spriteDict['spritesheet'][count]['uvOffset_u'] = left/imgWidth
            spriteDict['spritesheet'][count]['uvOffset_v'] = 1.0-lower/imgHeight_resized 
            spriteDict['spritesheet'][count]['uvRepeat_u'] = photow/imgWidth_resized
            spriteDict['spritesheet'][count]['uvRepeat_v'] = photoh/imgHeight_resized
            count += 1 
    
    final_filename = spritesheet_name + '.png'
    inew.save(final_filename)
    
    #add new content
    corporaDict[corpus] = spriteDict
    js = json.dumps(corporaDict, indent=2)
    f.seek(0) 
    f.write(js)
    f.close()
    
    return inew, final_filename

In [7]:
def get_embeddings(embeddingDir, game, corpus):
    embeddings = glob(embeddingDir + '/*.npy')
    embeddings = sorted(embeddings, key=lambda x: int(''.join(filter(str.isdigit, x))))
    emb_lst = []
    for embed in embeddings:
        np_emb = load(embed)
        emb_lst.append(np_emb)
    states = np.concatenate(emb_lst) 
    return states

#Call this if several corpora are expected
def get_positions(gameDir, outputDir, dimensions, perplexity, game, corpora, jsonFile):
    state_lst = []
    position_dict = {}
   
    for corpus, value in corpora.items():  
        embeddingDir = gameDir + corpus + '/' + dir_embeddings
        states = get_embeddings(embeddingDir, game, corpus)

        print(states.shape, 'from', embeddingDir, game, corpus)

        state_lst.append(states)

    all_states = np.concatenate(state_lst) 
    if len(all_states) != 0:
        print(f'all_states.shape={all_states.shape}')
    else:
        print('Reading npy file failed! Check the file path.')
     
    # PCA
    new_states = all_states
    if runPCA:
        pca_n = PCA(n_components= pca_component, svd_solver='full')
        new_states = pca_n.fit_transform(all_states)
    
    # TSNE
    tsne = TSNE(n_components = dimensions, perplexity = perplexity)
    all_positions = tsne.fit_transform(new_states)
    # noise = np.random.normal(0, 0.1, all_positions.shape)
    # all_positions += noise
    
    if all_positions is not None:
        tempName = outDir + game + '/positions_' + str(dimensions) + 'D_temp.json'
        print('position file:', tempName)
        all_positions.tofile(tempName,sep=" ", format="%s")
    
    with open(jsonFile, 'r+') as f:
        corporaDict = json.load(f)
        
        binfile_dict = {}
        positionbuffer_name = 'None'
        if all_positions is not None:
            file_idx = 0
            for corpus, value in corpora.items():
                length = corporaDict[corpus]['totalCount']
                print(f'totalCount in corpus: {length}')
                positionbuffer_name = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D.bin'
                all_positions[file_idx : file_idx + length].tofile(positionbuffer_name) #default is binary write
                binfile_dict[corpus] = positionbuffer_name
                file_idx = length
        else:
            dimensions = 0

        for corpus, value in corpora.items():
            if corpus not in corporaDict:
                corporaDict[corpus] = {}
            posDict = {}
            posDict['dimensions'] = dimensions
            posDict['perplexity'] = perplexity
            posDict['bin'] = binfile_dict[corpus]

            corporaDict[corpus]['positions'] = posDict
            corporaDict[corpus]['spriteSheetPath'] = spritesheet_list[corpus]

        js = json.dumps(corporaDict, indent=2) 
        f.seek(0) 
        f.write(js)
        f.close()
    

    

In [4]:
def get_screenshots(screenshotDir, game, corpus):
    screenshots = glob(screenshotDir + '/*.png')
    screenshots = sorted(screenshots, key=lambda x: int(os.path.basename(x)[:-4]))
    return screenshots

def generate_video(fps, gamedir, outDir, dir_screenshots, game, corpora, jsonFile):
    imageList = []
    for corpus, value in corpora.items():  
        screenshotDir = gamedir + corpus + '/' + dir_screenshots
        print(screenshotDir)
        screenshots = get_screenshots(screenshotDir, game, corpus)
        imageList.append(screenshots)
    
    # print(state_lst)
    all_images = numpy.concatenate( imageList, axis=0 ) 
        
    #Save video
    videoFile = outDir + game + '/movie.mp4'
    clips = [ImageClip(m).set_duration(1/fps)
             for m in all_images]
    concat_clip = concatenate_videoclips(clips, method="compose")
    concat_clip.write_videofile(videoFile, fps=fps)
    
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if 'video' not in corporaDict: 
        corporaDict['video'] = {}     
    corporaDict['video']['source'] = videoFile
    corporaDict['video']['fps'] = fps
    corporaDict['video']['total'] = len(all_images)
    
    js = json.dumps(corporaDict, indent=2)
    f.seek(0) 
    f.write(js)
    f.close()
    
        
       

In [9]:
%%time
######Parameters Begin#########################################################################################################################
#Directory settings
game = 'The Last Of Us' # Distinctive name for your game. It will be displayed in the dropdown list on the main page
gamedir = '../data source/The Last Of Us/output/' # The root path of your game data 
dir_screenshots = 'screenshots' # the folder name of your screenshots
dir_embeddings = 'embeddings_ma5' # the folder name of your embeddings
outDir = './pic/' # the path to output the generated results
#Spritesheet settings
photow,photoh = 64,56 # width and height: resize for your high-resolution screenshots 
#t-SNE settings
#For detailed explanation, please refer to: http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html. 
runTSNE = True # True if you want to run tsne 
dimension = 3 # dimensionality used for tsne mapping
perplexity = 30 # perplexity used for tsne mapping
runPCA = False
pca_component = 100
#video settings
runVideo = False # True if you want to generate a video for all your screenshots, you can then use the animation feature on the website. 
fps=24 # Frame per second.
########Parameters End#########################################################################################################################

runSpritesheet = True
photoSize = (80,45)
#Find corpora
corpora = {}
corpora['dir'] = gamedir
corpora['game'] = game
corpora['screenshots_folder'] = dir_screenshots
corpora['embeddings_folder'] = dir_embeddings
corpora['corpus'] = {}

folders = next(os.walk(gamedir))[1]
for i in range(len(folders)):
    folder = os.path.join(gamedir, folders[i])
    idx = folder.rfind('/')
    corpus = folder[idx+1:]
    corpus = corpus.replace(' ', '_')
    corpora['corpus'][corpus] = folder
    
#Create output dir
destFolder = outDir + game
if not os.path.exists(destFolder):
    os.makedirs(destFolder)

#Create json file
jsonFile = outDir + game + '_' + str(dimension) + 'D.json'
f = open(jsonFile, 'w+')
js = json.dumps(corpora, indent=2)
f.write(js)
f.close()

#Add Gameinfo
gameinfo_file = 'gameinfo.json' 
with open(gameinfo_file, 'r+') as f:
    data = f.read()
    if len(data) > 0:
        gameinfo = json.loads(data)
    else:
        gameinfo = {}
        
    if game not in gameinfo:
        gameinfo[game] = {}
        
    gameinfo[game][str(dimension)] = jsonFile
    print(gameinfo)
    js = json.dumps(gameinfo, indent=2) 
    f.seek(0) 
    f.write(js)
    f.close()

spritesheet_list = {}

if runSpritesheet:
    for corpus, value in corpora['corpus'].items():
        imageDir = value + '/'+ dir_screenshots
        inew, filename = generate_spritesheet(photoSize, imageDir, outDir, game, corpus, jsonFile)
        spritesheet_list[corpus] = filename
        
if runTSNE:
    get_positions(gamedir, outDir, dimension, perplexity, game, corpora['corpus'], jsonFile)
        
if runVideo:
    generate_video(fps, gamedir, outDir, dir_screenshots, game, corpora['corpus'], jsonFile)       

{'Life is Strange 1': {'3': './pic/Life is Strange 1_3D.json', '2': './pic/Life is Strange 1_2D.json'}, 'The Last Of Us': {'2': './pic/The Last Of Us_2D.json', '3': './pic/The Last Of Us_3D.json'}}
./pic/The Last Of Us/The_Last_Of_Us
7066
imageSize= (8192, 8192) photow 80 photoh 45 
(7067, 300) from ../data source/The Last Of Us/output/The_Last_Of_Us/embeddings_ma5 The Last Of Us The_Last_Of_Us
all_states.shape=(7067, 300)
position file: ./pic/The Last Of Us/positions_3D_temp.json
totalCount in corpus: 7066
Wall time: 7min 35s
