# Parse README.md files, performance.csv, and add elements to a spreadsheet summarizing all of our models

In [105]:
import numpy as np
import os
import csv

In [47]:
walker = os.walk(os.path.expanduser('~/Dropbox/deep-retina/saved'), topdown=True)

In [48]:
full_paths = []
for dirs, subdirs, files in walker:
    full_paths.append([dirs, subdirs, files])

In [49]:
full_paths[0]

['/Users/lmcintosh/Dropbox/deep-retina/saved',
 ['glm',
  'lane-desktop.lane',
  'lane.local.lmcintosh',
  'lenna.nirum',
  'lenna.salamander',
  'mbp.nirum'],
 ['.DS_Store']]

In [54]:
full_paths[6][2]

['architecture.json', 'performance.csv', 'README.md']

In [53]:
'README.md' in full_paths[6][2]

True

In [164]:
all_models = []
for path in full_paths:
    readme_name = 'README.md'
    performance_name = 'performance.csv'
    if readme_name in path[2]:
        # Parse README.md
        readme_path = path[0] + '/' + readme_name
        f = open(readme_path, 'r')
        description = f.readlines()
        path_components = readme_path.split('/')
        for idd, d in enumerate(description):
            if d.find('Cell') >= 0:
                cells = d[:-1]
            elif d.find('Stimulus') >= 0:
                experiment = description[idd+1][:-1]
                stimulus = description[idd+2][:-1]
        
        # Parse performance.csv
        performance_path = path[0] + '/' + 'performance.csv'
        h = open(performance_path, 'r')
        stats = csv.reader(h)
        all_rows = []
        for row in stats:
            all_rows.append(row)
        num_epochs = len(all_rows) - 1 # -1 for the header
        
        # skip models for which we have no performance data
        if num_epochs >= 1:
            table = np.array(all_rows)
            just_numbers = table[1:, :].astype('float')

            model = {
                'type': description[0][2:-1],
                'date': description[1][3:-1],
                'machine': path_components[6],
                'folder': path_components[7],
                'stimulus': stimulus,
                'experiment': experiment,
                'cells': cells,
                'epochs': num_epochs,
                'max_train_cc': np.max(just_numbers, axis=0)[2],
                'mean_train_cc': np.mean(just_numbers, axis=0)[2],
                'max_test_cc': np.max(just_numbers, axis=0)[3],
                'mean_test_cc': np.mean(just_numbers, axis=0)[3],
            }
            all_models.append(model)
        

In [165]:
len(all_models)

55

In [166]:
all_models[40]

{'cells': 'Cell #[0, 1, 2, 3, 4]',
 'date': 'January 20, 2016',
 'epochs': 50,
 'experiment': 'Experiment 10-07-15',
 'folder': '2016-01-20 11.57.50 fixedlstm',
 'machine': 'lenna.salamander',
 'max_test_cc': 0.16915949846600001,
 'max_train_cc': 0.15430736180999999,
 'mean_test_cc': 0.059871836500338213,
 'mean_train_cc': 0.052905355619362646,
 'stimulus': 'naturalscene',
 'type': 'fixedlstm'}

In [167]:
header = {}
for k in sorted(all_models[0].keys()):
    header[k] = k

In [168]:
all_models.insert(0, header)

In [169]:
g = open('modelzoo.csv','w')
w = csv.DictWriter(g, all_models[0].keys())
w.writerows(all_models)
g.close()