# Ten intensities fixer

The original N&W data set had a bunch of values > 20 s. They must've generated the data in a strange way. It's confusing to have students look at a data set with lots of invalid data points in it, so here we reduce the data set to only valid values (spike times <= 20) and re-save the file

In [53]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

First we load the data. The syntax here is different from what we've used to read CSV files into pandas, but it works for pickles.

In [9]:
with open('ten_intensities.pkl', 'rb') as handle:
    ten_intensities = pickle.load(handle, encoding='latin1')

In [10]:
ten_intensities

{'4_intensity': [[11.0, 10.0, 33.0, 32.0],
  [4.0, 30.0, 32.0, 35.0],
  [17.0, 11.0, 30.0, 20.0, 34.0],
  [22.0, 34.0],
  [7.0, 20.0, 27.0, 32.0],
  [25.0, 29.0, 13.0, 35.0, 33.0],
  [32.0, 20.0, 24.0, 30.0],
  [31.0, 19.0, 29.0, 33.0],
  [15.0, 13.0, 35.0, 38.0],
  [25.0, 24.0, 31.0, 31.0]],
 '2_intensity': [[29.0, 29.0, 29.0, 24.0],
  [43.0, 48.0, 18.0, 46.0],
  [44.0, 24.0, 38.0, 22.0],
  [53.0, 37.0, 25.0, 64.0],
  [52.0, 41.0, 49.0, 45.0, 50.0, 17.0, 60.0],
  [16.0, 58.0, 15.0, 37.0],
  [39.0, 50.0],
  [31.0, 40.0, 47.0, 66.0],
  [35.0, 42.0, 35.0, 45.0, 33.0, 60.0],
  [20.0, 30.0, 47.0, 17.0, 52.0, 39.0, 21.0]],
 '8_intensity': [[6.0, 9.0, 11.0, 17.0, 18.0],
  [8.0, 9.0, 11.0, 18.0],
  [1.0, 10.0, 13.0, 19.0],
  [8.0, 9.0, 12.0, 13.0, 16.0, 17.0],
  [8.0, 9.0, 11.0, 18.0],
  [0.0, 10.0, 11.0, 16.0],
  [8.0, 9.0, 11.0, 18.0],
  [9.0, 11.0, 13.0, 17.0],
  [8.0, 9.0, 12.0, 13.0, 16.0, 17.0],
  [8.0, 9.0, 10.0, 13.0, 14.0, 17.0, 18.0]],
 '0_intensity': [[83.0, 41.0],
  [14.0, 87.0, 1

In [11]:
for label, level in ten_intensities.items():
    for trial, vals in enumerate(level):
        ten_intensities[label][trial] = [i for i in vals if i <= 20]

In [12]:
ten_intensities

{'4_intensity': [[11.0, 10.0],
  [4.0],
  [17.0, 11.0, 20.0],
  [],
  [7.0, 20.0],
  [13.0],
  [20.0],
  [19.0],
  [15.0, 13.0],
  []],
 '2_intensity': [[],
  [18.0],
  [],
  [],
  [17.0],
  [16.0, 15.0],
  [],
  [],
  [],
  [20.0, 17.0]],
 '8_intensity': [[6.0, 9.0, 11.0, 17.0, 18.0],
  [8.0, 9.0, 11.0, 18.0],
  [1.0, 10.0, 13.0, 19.0],
  [8.0, 9.0, 12.0, 13.0, 16.0, 17.0],
  [8.0, 9.0, 11.0, 18.0],
  [0.0, 10.0, 11.0, 16.0],
  [8.0, 9.0, 11.0, 18.0],
  [9.0, 11.0, 13.0, 17.0],
  [8.0, 9.0, 12.0, 13.0, 16.0, 17.0],
  [8.0, 9.0, 10.0, 13.0, 14.0, 17.0, 18.0]],
 '0_intensity': [[],
  [14.0, 18.0],
  [],
  [14.0, 20.0],
  [14.0],
  [18.0],
  [13.0],
  [],
  [],
  []],
 '7_intensity': [[10.0, 10.0, 12.0, 14.0, 16.0, 17.0],
  [9.0, 11.0, 14.0, 17.0],
  [8.0, 9.0, 10.0, 14.0, 16.0, 17.0, 19.0],
  [6.0, 11.0, 13.0, 19.0, 20.0],
  [3.0, 10.0, 14.0],
  [7.0, 9.0, 13.0, 17.0, 20.0],
  [7.0, 11.0, 13.0, 19.0, 20.0],
  [10.0, 11.0, 14.0, 17.0],
  [10.0, 10.0, 11.0, 19.0],
  [10.0, 14.0]],
 '5_int

## Write pickle

In [14]:
outfile = open('ten_intensities_data.pkl', 'wb')
pickle.dump(ten_intensities, outfile)
outfile.close()

## Confirm it worked

In [15]:
with(open('ten_intensities_data.pkl', 'rb')) as infile:
    check_data = pickle.load(infile)

In [16]:
check_data

{'4_intensity': [[11.0, 10.0],
  [4.0],
  [17.0, 11.0, 20.0],
  [],
  [7.0, 20.0],
  [13.0],
  [20.0],
  [19.0],
  [15.0, 13.0],
  []],
 '2_intensity': [[],
  [18.0],
  [],
  [],
  [17.0],
  [16.0, 15.0],
  [],
  [],
  [],
  [20.0, 17.0]],
 '8_intensity': [[6.0, 9.0, 11.0, 17.0, 18.0],
  [8.0, 9.0, 11.0, 18.0],
  [1.0, 10.0, 13.0, 19.0],
  [8.0, 9.0, 12.0, 13.0, 16.0, 17.0],
  [8.0, 9.0, 11.0, 18.0],
  [0.0, 10.0, 11.0, 16.0],
  [8.0, 9.0, 11.0, 18.0],
  [9.0, 11.0, 13.0, 17.0],
  [8.0, 9.0, 12.0, 13.0, 16.0, 17.0],
  [8.0, 9.0, 10.0, 13.0, 14.0, 17.0, 18.0]],
 '0_intensity': [[],
  [14.0, 18.0],
  [],
  [14.0, 20.0],
  [14.0],
  [18.0],
  [13.0],
  [],
  [],
  []],
 '7_intensity': [[10.0, 10.0, 12.0, 14.0, 16.0, 17.0],
  [9.0, 11.0, 14.0, 17.0],
  [8.0, 9.0, 10.0, 14.0, 16.0, 17.0, 19.0],
  [6.0, 11.0, 13.0, 19.0, 20.0],
  [3.0, 10.0, 14.0],
  [7.0, 9.0, 13.0, 17.0, 20.0],
  [7.0, 11.0, 13.0, 19.0, 20.0],
  [10.0, 11.0, 14.0, 17.0],
  [10.0, 10.0, 11.0, 19.0],
  [10.0, 14.0]],
 '5_int

In [93]:
np.array([0, 1])
            

array([0, 1])

## Convert to DataFrame of spike times

In [116]:
num_trials = 10     # number of trials at each intensity
num_timepoints = 21 # time points for each trial

df_list = []

for int_lev in sorted(ten_intensities.keys()):
    for trial in range(len(ten_intensities[int_lev])):
        spike_count = len(ten_intensities[int_lev][trial])
        if spike_count > 0:
            df_list.append(np.c_[np.repeat(int(int_lev[0]), spike_count),
                                 np.repeat(trial, spike_count),
                                 np.array(ten_intensities[int_lev][trial])
                                ]
                          )
        
df = pd.DataFrame(np.vstack(df_list),
                  columns=['Intensity', 'Trial', 'SpikeTime']
                 )

df.astype('int').to_csv('ten_intensities.csv', index=False)

In [117]:
len(ten_intensities[int_lev])

10

In [118]:
df

Unnamed: 0,Intensity,Trial,SpikeTime
0,0.0,1.0,14.0
1,0.0,1.0,18.0
2,0.0,3.0,14.0
3,0.0,3.0,20.0
4,0.0,4.0,14.0
...,...,...,...
226,9.0,9.0,9.0
227,9.0,9.0,12.0
228,9.0,9.0,13.0
229,9.0,9.0,16.0


## Convert to DataFrame - binary data

In [72]:
num_trials = 10     # number of trials at each intensity
num_timepoints = 21 # time points for each trial

df_list = []

for int_lev in ten_intensities.keys():
    mat = np.zeros([num_trials, num_timepoints])
    for trial in range(len(ten_intensities[int_lev])):
        for val in ten_intensities[int_lev][trial]:
            mat[trial, int(val)] = 1
    
    # columns for intensity and trial num
    lev_vec = np.repeat(int(int_lev[0]), num_trials)
    trials = np.arange(num_trials)
    
    # coulumn labels
    cols = ['Intensity', 'Trial']
    cols.extend(list(range(num_timepoints)))
                            
    df_list.append(pd.DataFrame(np.c_[lev_vec, trials, mat],
                   columns=cols)
                  )

df = pd.concat(df_list).astype('int')

In [73]:
df

Unnamed: 0,Intensity,Trial,0,1,2,3,4,5,6,7,...,11,12,13,14,15,16,17,18,19,20
0,4,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,4,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,2,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,1
3,4,3,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,4,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,1,5,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,1,6,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
7,1,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1,8,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [91]:
df.to_csv('ten_intensities_bin.csv', index=False)