Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance log analysis #32

Open
wants to merge 30 commits into
base: master
from
Open
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9796ab9
A bit of a cleanup in log-analysis file
Jun 22, 2019
4f9fcd4
Add normalize_reward to log_analysis, a bit of cleanup, added colour …
Jun 22, 2019
e71484a
Add track_utils file
Jun 22, 2019
861a998
Avoid downloading of an already existent file, but allow forcing it
Jun 24, 2019
2f0e773
Improve plotting of grid world for evaluation (to work with negative …
Jun 30, 2019
546ad3c
Move plotting of top laps from the notebook
Jun 30, 2019
8d96466
Add functions to aggregate training data
Jun 30, 2019
3ccd777
Add functions to load and analyse evaluations in bulk
Jun 30, 2019
62b356b
readme, gitignore
Jun 30, 2019
5bb33a2
Add evaluation analysis
Jul 1, 2019
df39701
small fixes, store timestamp as decimal to make it readable
Jul 1, 2019
209ac8e
add code for reward recalculation
Jul 2, 2019
9730bb6
Add reward calculation, fix reward, Provide a Training_analysis.ipynb
Jul 2, 2019
6005efa
Make episodes per iteration configurable when parsing logs
Jul 2, 2019
8545d7f
Add action breakdown and small improvements all around
Jul 2, 2019
ba2fd9a
Add evaluation analysis notebook
Jul 2, 2019
3e86859
Improve the readme
Jul 2, 2019
07cc2a2
Improve the readme
Jul 2, 2019
b62a2a3
Add graphs per starting point to aggregates
Jul 12, 2019
dc5a48a
Set x axis in progress of buckets to (0,100)
Jul 12, 2019
e974b51
Add minimum and maximum complete times per iteration
Jul 12, 2019
79b1851
New York tracks for analysis
Jul 13, 2019
5e0c209
Fix minimum and maximum time plot
Jul 13, 2019
92f276f
Small updates and cleanups, reruns of updated functions
Jul 13, 2019
46e15f5
Clean up aggregate graphs for evaluation and fix timestap issue in no…
Jul 17, 2019
5f5399c
Preserve shape of top laps graphs
Jul 17, 2019
bd61533
Speed up analysis of training progress and reorganise plotting in tra…
Jul 17, 2019
cc95464
Speed up plotting of evaluation laps
Jul 18, 2019
18deb7f
Updated notebooks to fixes, fine-tune log_analysis, fix some memory i…
Jul 20, 2019
4c69b6f
Added China training track npy
breadcentric Aug 3, 2019
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

Speed up analysis of training progress and reorganise plotting in tra…

…ining analysis
  • Loading branch information...
Tomasz Ptak
Tomasz Ptak committed Jul 17, 2019
commit bd615331f5326909b4fb1e1c92920ccfebf8b5ca

Large diffs are not rendered by default.

@@ -408,137 +408,56 @@ def analyze_categories(panda, category='quintile', groupcount=5, title=None):
row += 1


def avg_and_dev(values, episodes_per_iteration):
average_val_per_iteration = list()
deviation_val_per_iteration = list()
def analyze_training_progress(aggregates, title=None):
aggregates['complete'] = np.where(aggregates['progress'] == 100, 1, 0)

buffer_val = list()
for val in values:
buffer_val.append(val)
grouped = aggregates.groupby('iteration')

if len(buffer_val) == episodes_per_iteration:
average_val_per_iteration.append(np.mean(buffer_val))
deviation_val_per_iteration.append(np.std(buffer_val))
# reset
buffer_val = list()
reward_per_iteration = grouped['reward'].agg([np.mean, np.std]).reset_index()
time_per_iteration = grouped['time'].agg([np.mean, np.std]).reset_index()
progress_per_iteration = grouped['progress'].agg([np.mean, np.std]).reset_index()

return average_val_per_iteration, deviation_val_per_iteration
complete_laps = aggregates[aggregates['progress'] == 100.0]
complete_grouped = complete_laps.groupby('iteration')

complete_times = complete_grouped['time'].agg([np.mean, np.min, np.max]).reset_index()

def plot(ax, values, xlabel, ylabel, title=None, red_above=None, min_values = None, max_values = None):
ax.plot(np.arange(len(values)), values, '.')
if(min_values):
ax.plot(np.arange(len(min_values)), min_values, 'r+')
if(max_values):
ax.plot(np.arange(len(max_values)), max_values, 'g+')
total_completion_rate = complete_laps.shape[0] / aggregates.shape[0]

if title:
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel(xlabel)

if red_above:
for rr in range(len(values)):
if values[rr] >= red_above:
ax.plot(rr, values[rr], 'r.')
complete_per_iteration = grouped['complete'].agg([np.mean]).reset_index()

plt.grid(True)


def completion_rate(progresses):
completes = [progress for progress in progresses if progress == 100.0]
return len(completes) / len(progresses)


def analyze_training_progress(panda, episodes_per_iteration):
# reward graph per episode
min_episodes = np.min(panda['episode'])
max_episodes = np.max(panda['episode'])
print('Number of episodes = ', max_episodes)

total_reward_per_episode = list()
time_per_episode = list()
completed_time_per_episode = list()
progress_per_episode = list()
for epi in range(min_episodes, max_episodes):
df_slice = panda[panda['episode'] == epi]
total_reward_per_episode.append(np.sum(df_slice['reward']))
time_per_episode.append(np.ptp(df_slice['timestamp']))
progress_per_episode.append(np.max(df_slice['progress']))
completed_time_per_episode.append(time_per_episode[-1] if progress_per_episode[-1] == 100.0 else 0)

average_reward_per_iteration, deviation_reward_per_iteration = avg_and_dev(total_reward_per_episode,
episodes_per_iteration)
average_time_per_iteration, deviation_time_per_iteration = avg_and_dev(time_per_episode, episodes_per_iteration)
average_progress_per_iteration, deviation_progress_per_iteration = avg_and_dev(progress_per_episode,
episodes_per_iteration)

completion_rate_per_iteration = list()

total_completion_rate = completion_rate(progress_per_episode)

buffer_val = list()
iter_count = 0
for val in progress_per_episode:
buffer_val.append(val)

if len(buffer_val) == episodes_per_iteration:
completion_rate_for_iteration = completion_rate(buffer_val)
completion_rate_per_iteration.append(completion_rate_for_iteration)
buffer_val = list()
iter_count += 1

completed_time_per_iteration = list()
min_completed_time_per_iteration = list()
max_completed_time_per_iteration = list()
buffer_val = list()
for val in completed_time_per_episode:
buffer_val.append(val)

if len(buffer_val) == episodes_per_iteration:
complete_times = [t for t in buffer_val if t != 0]
buffer_val = list()
if len(complete_times) > 0:
completed_time_per_iteration.append(np.mean(complete_times))
min_completed_time_per_iteration.append(np.min(complete_times))
max_completed_time_per_iteration.append(np.max(complete_times))
else:
completed_time_per_iteration.append(0)
min_completed_time_per_iteration.append(0)
max_completed_time_per_iteration.append(0)

print('Number of iterations = ', iter_count)
print('Number of episodes = ', np.max(aggregates['episode']))
print('Number of iterations = ', np.max(aggregates['iteration']))

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=[15, 15])

ax = axes[0, 0]
plot(ax, average_reward_per_iteration, 'Iteration', 'Mean reward', 'Rewards per Iteration')

ax = axes[1, 0]
plot(ax, deviation_reward_per_iteration, 'Iteration', 'Dev of reward')

ax = axes[2, 0]
plot(ax, total_reward_per_episode, 'Episode', 'Total reward')
if title:
fig.suptitle(title)

ax = axes[0, 1]
plot(ax, average_time_per_iteration, 'Iteration', 'Mean time', 'Times per Iteration')
plot(axes[0, 0], reward_per_iteration, 'iteration', 'Iteration', 'mean', 'Mean reward', 'Rewards per Iteration')
plot(axes[1, 0], reward_per_iteration, 'iteration', 'Iteration', 'std', 'Std dev of reward', 'Dev of reward')
plot(axes[2, 0], aggregates, 'episode', 'Episode', 'reward', 'Total reward')

ax = axes[1, 1]
plot(ax, deviation_time_per_iteration, 'Iteration', 'Dev of time')
plot(axes[0, 1], time_per_iteration, 'iteration', 'Iteration', 'mean', 'Mean time', 'Times per Iteration')
plot(axes[1, 1], time_per_iteration, 'iteration', 'Iteration', 'std', 'Std dev of time', 'Dev of time')
if complete_times.shape[0] > 0:
plot(axes[2, 1], complete_times, 'iteration', 'Iteration', 'mean', 'Time', 'Mean completed laps time')

ax = axes[2, 1]
plot(ax, completed_time_per_iteration, 'Iteration', 'Mean completed laps time', 'Mean completed time',
min_values=min_completed_time_per_iteration, max_values=max_completed_time_per_iteration)
plot(axes[0, 2], progress_per_iteration, 'iteration', 'Iteration', 'mean', 'Mean progress',
'Progress per Iteration')
plot(axes[1, 2], progress_per_iteration, 'iteration', 'Iteration', 'std', 'Std dev of progress', 'Dev of progress')
plot(axes[2, 2], complete_per_iteration, 'iteration', 'Iteration', 'mean', 'Completion rate',
'Completion rate (avg: %s)' % total_completion_rate)

ax = axes[0, 2]
plot(ax, average_progress_per_iteration, 'Iteration', 'Mean progress', 'Progress per Iteration')

ax = axes[1, 2]
plot(ax, deviation_progress_per_iteration, 'Iteration', 'Dev of progress')
def plot(ax, df, xval, xlabel, yval, ylabel, title=None):
df.plot.scatter(xval, yval, ax=ax, s=5, alpha=0.7)
if title:
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel(xlabel)

ax = axes[2, 2]
plot(ax, completion_rate_per_iteration, 'Iteration', 'Completion rate',
'Completion rate (avg: %s)' % total_completion_rate)
plt.grid(True)


def load_eval_data(eval_fname):
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.