In [1]:
import pandas as pd  
import matplotlib.pyplot as plt  
from scipy.stats import sem  
  
# This function takes an array of numbers and smoothes them out.  
# Smoothing is useful for making plots a little easier to read.  
def sliding_mean(data_array, window=5):  
    data_array = array(data_array)  
    new_list = []  
    for i in range(len(data_array)):  
        indices = range(max(i - window + 1, 0),  
                        min(i + window + 1, len(data_array)))  
        avg = 0  
        for j in indices:  
            avg += data_array[j]  
        avg /= float(len(indices))  
        new_list.append(avg)  
          
    return array(new_list)  
  
# Due to an agreement with the ChessGames.com admin, I cannot make the data  
# for this plot publicly available. This function reads in and parses the  
# chess data set into a tabulated pandas DataFrame.  
chess_data = read_chess_data()
  
# These variables are where we put the years (x-axis), means (y-axis), and error bar values.  
# We could just as easily replace the means with medians,  
# and standard errors (SEMs) with standard deviations (STDs).  
years = chess_data.groupby("Year").PlyCount.mean().keys()  
mean_PlyCount = sliding_mean(chess_data.groupby("Year").PlyCount.mean().values,  
                             window=10)  
sem_PlyCount = sliding_mean(chess_data.groupby("Year").PlyCount.apply(sem).mul(1.96).values,  
                            window=10)  
  
# You typically want your plot to be ~1.33x wider than tall.  
# Common sizes: (10, 7.5) and (12, 9)  
plt.figure(figsize=(12, 9))  
  
# Remove the plot frame lines. They are unnecessary chartjunk.  
ax = plt.subplot(111)  
ax.spines["top"].set_visible(False)  
ax.spines["right"].set_visible(False)  
  
# Ensure that the axis ticks only show up on the bottom and left of the plot.  
# Ticks on the right and top of the plot are generally unnecessary chartjunk.  
ax.get_xaxis().tick_bottom()  
ax.get_yaxis().tick_left()  
  
# Limit the range of the plot to only where the data is.  
# Avoid unnecessary whitespace.  
plt.ylim(63, 85)  
  
# Make sure your axis ticks are large enough to be easily read.  
# You don't want your viewers squinting to read your plot.  
plt.xticks(range(1850, 2011, 20), fontsize=14)  
plt.yticks(range(65, 86, 5), fontsize=14)  
  
# Along the same vein, make sure your axis labels are large  
# enough to be easily read as well. Make them slightly larger  
# than your axis tick labels so they stand out.  
plt.ylabel("Ply per Game", fontsize=16)  
  
# Use matplotlib's fill_between() call to create error bars.  
# Use the dark blue "#3F5D7D" as a nice fill color.  
plt.fill_between(years, mean_PlyCount - sem_PlyCount,  
                 mean_PlyCount + sem_PlyCount, color="#3F5D7D")  
  
# Plot the means as a white line in between the error bars.   
# White stands out best against the dark blue.  
plt.plot(years, mean_PlyCount, color="white", lw=2)
  
# Make the title big enough so it spans the entire plot, but don't make it  
# so big that it requires two lines to show.  
plt.title("Chess games are getting longer", fontsize=22)
  
# Always include your data source(s) and copyright notice! And for your  
# data sources, tell your viewers exactly where the data came from,  
# preferably with a direct link to the data. Just telling your viewers  
# that you used data from the "U.S. Census Bureau" is completely useless:  
# the U.S. Census Bureau provides all kinds of data, so how are your  
# viewers supposed to know which data set you used?  
plt.xlabel("\nData source: www.ChessGames.com | "  
           "Author: Randy Olson (randalolson.com / @randal_olson)", fontsize=10)

# Finally, save the figure as a PNG.  
# You can also save it as a PDF, JPEG, etc.  
# Just change the file extension in this call.  
# bbox_inches="tight" removes all the extra whitespace on the edges of your plot.  
plt.savefig("chess-number-ply-over-time.png", bbox_inches="tight")

NameError: name 'read_chess_data' is not defined

In [4]:
from plots import parse_results

print parse_results.parse_results_file('./plots/data/hadooqn_toy_mr_sectors_fake.txt')

    reward states     step
0     0.00    4\n   255451
1     0.00    6\n   509098
2     0.00    6\n   759123
3     0.00    6\n  1001760
4     0.00    6\n  1259450
5     0.00    6\n  1500343
6     0.00    6\n  1751858
7     0.00    6\n  2000037
8     0.00    6\n  2250062
9     0.00    6\n  2501554
10    0.00    6\n  2750794
11    0.00    6\n  3000819
12    0.00    6\n  3250844
13    0.00    6\n  3502986
14    0.00    6\n  3755919
15    0.00    6\n  4000122
16    0.00    6\n  4257860
17    0.00    6\n  3755919
18    0.00    6\n  4000122
19    0.00    6\n  4257860
20    0.00    6\n  3755919
21    0.00    6\n  4000122
22    0.00    6\n  4257860
23    0.00    6\n  3755919
24    0.00    6\n  4000122
25    0.00    6\n  4257860
26    0.00    6\n  3755919
27    0.00    6\n  4000122
28    0.00    6\n  4257860
29    0.00    6\n  3755919
..     ...    ...      ...
170   1.00    6\n  4000122
171   1.00    6\n  4257860
172   1.00    6\n  3755919
173   1.00    6\n  4000122
174   1.00    6\n  4257860
1

In [4]:
from plots import plot_data
from plots import parse_results
import numpy as np

data_1 = parse_results.parse_results_file('./plots/data/hadooqn_toy_mr_sectors_fake.txt')['reward'].values.astype(np.float)
data_2 = parse_results.parse_results_file('./plots/data/cts_toy_mr_fake.txt')['reward'].values.astype(np.float)
data_3 = parse_results.parse_results_file('./plots/data/double_dqn_toy_mr_fake.txt')['reward'].values.astype(np.float)

labels = ['Hadooqn', 'Intrinsic', 'Double DQN']

plot_data.plot_data(range(0, 200), [data_1, data_2, data_3], 'Reward', 'Average Test Reward', 'Millions of Frames', labels=labels, ylim=[-.1, 1.1], yticks=[0, 1], save_file='./plots/figures/toy_mr_reward.png')

In [3]:
data = parse_results.parse_results_file('./plots/data/hadooqn_toy_mr_sectors_fake.txt')

print data['reward'].values

plt.plot(data['reward'].keys(), data['reward'].values.astype(np.float), color='#3F5D7D', lw=2)

['0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00' '0.00'
 '0.00' '0.00' '0.00' '1.00' '1.00' '0.00' '0.00' '1.00' '1.00' '0.00'
 '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00'
 '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00'
 '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00'
 '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00'
 '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00' '1.00'
 '1.00

NameError: name 'plt' is not defined