In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns

sns.set()

In [2]:
atari_games_list = [
    "Alien",
    "Amidar",
    "Assault",
    "Asterix",
    "BankHeist",
    "BattleZone",
    "Boxing",
    "Breakout",
    "ChopperCommand",
    "CrazyClimber",
    "DemonAttack",
    "Freeway",
    "Gopher",
    "Hero",
    "Kangaroo",
    "Krull",
    "KungFuMaster",
    "MsPacman",
    "Pong",
    "PrivateEye",
    "Qbert",
    "RoadRunner",
    "YarsRevenge",
    "Seaquest"
]

In [3]:
def extract_avg(f):
    with open(f) as f:
        first_line = f.readline()
        return float(first_line.rsplit(" ")[-1])

In [4]:
results = {}
DIRS = ["./BASE_CPU/", "./REUSE_CPU/", "./BASE_GPU/"]
aliases = ["base_cpu", "reuse_cpu", "base_gpu"]

games_results_not_complete = []

for game in atari_games_list:
    results[game] = []
    
for DIR, alias in zip(DIRS, aliases):
    dir_cont = sorted(os.listdir(DIR))
    
    for game in dir_cont: ## Each game is a dir name
        
        ## Skip any non-directory files
        if ".sh" in game or ".py" in game:
            continue
        
        ## Get result file from sub directory
        full_path = DIR + game
        result_file = os.listdir(full_path)
        
        ## Detect missing results
        if len(result_file) < 1:
            print("No results for: " + full_path.rsplit("/")[-1] + " ("+DIR+")")
            games_results_not_complete.append(full_path.rsplit("/")[-1])
            results[game].append(np.nan)
            continue

        ## Now we are looking at the result of a particular game using a particular method/hardware
        #results[(alias, game)] = extract_avg(full_path + "/" + result_file[0])
        
        results[game].append(extract_avg(full_path + "/" + result_file[0]))

In [5]:
## Averages over 10 runs. First element is from base_cpu, second from reuse_cpu, third from base_gpu
## Things to notice: 
## (1) Each run has high variance because these episode lengths can vary greatly, and number of .
##     whcih means there can be a variable number of backpropagations.
## (2) The worst performance of reuse comes from games with large areas of pixel change 
##       (BattleZone, ChopperCommand, Freeway, Krull, MsPacman, YarsRevenge)
## (3) Get's a very big performance boost on games that have little pixel change from frame to frame
##     (Amidar, Breakout, CrazyClimber, Gopher, Hero, Kangaroo, KungFuMaster, 
##      Pong, PrivateEye, Qbert)
## (4) There were games where the CPU version of reuse was faster than the GPU base version
## (5) Not immediately obvious why it performed much more poorly than base CPU on RoadRunner, Seaquest


results

{'Alien': [10.56, 8.86, 4.62],
 'Amidar': [9.59, 6.01, 4.63],
 'Assault': [6.96, 5.66, 3.54],
 'Asterix': [7.46, 5.9, 3.85],
 'BankHeist': [7.5, 5.45, 3.88],
 'BattleZone': [8.35, 7.71, 3.69],
 'Boxing': [9.47, 7.82, 4.4],
 'Breakout': [3.99, 1.7, 3.07],
 'ChopperCommand': [9.41, 9.11, 4.34],
 'CrazyClimber': [3.59, 2.16, 2.87],
 'DemonAttack': [3.64, 3.4, 2.61],
 'Freeway': [6.65, 7.26, 4.29],
 'Gopher': [6.75, 3.71, 4.29],
 'Hero': [5.35, 3.17, 3.47],
 'Kangaroo': [9.82, 6.81, 4.31],
 'Krull': [6.11, 7.2, 3.85],
 'KungFuMaster': [9.14, 6.33, 4.46],
 'MsPacman': [7.61, 6.72, 4.22],
 'Pong': [5.69, 3.7, 3.63],
 'PrivateEye': [3.99, 3.07, 3.05],
 'Qbert': [6.74, 3.29, 3.97],
 'RoadRunner': [9.16, 7.32, 4.25],
 'Seaquest': [5.29, 3.97, 3.32],
 'YarsRevenge': [7.81, 7.29, 4.1]}

In [8]:
#df = pd.DataFrame(results, index=atari_games_list)
df = pd.DataFrame(results, index=aliases)
#df.columns = pd.MultiIndex.from_tuples(tuples)

In [9]:
#df = pd.DataFrame(results)


In [10]:
df

Unnamed: 0,Alien,Amidar,Assault,Asterix,BankHeist,BattleZone,Boxing,Breakout,ChopperCommand,CrazyClimber,...,Kangaroo,Krull,KungFuMaster,MsPacman,Pong,PrivateEye,Qbert,RoadRunner,Seaquest,YarsRevenge
base_cpu,10.56,9.59,6.96,7.46,7.5,8.35,9.47,3.99,9.41,3.59,...,9.82,6.11,9.14,7.61,5.69,3.99,6.74,9.16,5.29,7.81
reuse_cpu,9.46,6.21,6.86,6.69,10.75,95.24,8.67,1.52,9.93,1.85,...,7.32,10.35,6.36,7.46,4.51,2.52,2.79,5.71,3.31,4.87
base_gpu,4.62,4.63,3.54,3.85,3.88,3.69,4.4,3.07,4.34,2.87,...,4.31,3.85,4.46,4.22,3.63,3.05,3.97,4.25,3.32,4.1


In [11]:
df.to_html('training_table_by_episode_20_40_3000_steps.html')