#### Review: 2D arrays

* boolean indexing
* other ways to create 2d array

In [21]:
ages = np.array([22, 10, 34, 71, 14, 50, 61, 15]) # age in years
heights = np.array([1.7, 1.0, 1.8, 1.9, 0.9, 1.5, 1.5, 1.0]) # height in m

# How can we get: 
# the average height of people older than 20 ?

greater_than_20 = ages > 20

# this works, by "filtering out" the people younger than 20, and leaving
# zeros in their place
# but taking the average of these wouldn't work, because the zeros would mess it up
filtered = heights * greater_than_20

# to get the average from this:
number_of_people_older_than_20 = np.sum(greater_than_20)
mean_height_of_people_over_20 = np.sum(filtered) / number_of_people_older_than_20

In [22]:
print(greater_than_20)

[ True False  True  True False  True  True False]


In [11]:
# how can we ignore the zeros in filtered array?

ages = np.array([22, 10, 34, 71, 14, 50, 61, 15]) # age in years
heights = np.array([1.7, 1.0, 1.8, 1.9, 0.9, 1.5, 1.5, 1.0]) # height in m
greater_than_20 = ages > 20

filtered = heights * greater_than_20

for i in range(len(filtered)):
    if filtered[i] == 0:
        filtered[i] = None
print(filtered)

# nan : "not a number"
print(np.mean(filtered))

# to get around that:
print(np.nanmean(filtered))

[1.7 nan 1.8 1.9 nan 1.5 1.5 nan]
nan
1.6800000000000002


In [13]:
# an easier approach:

ages = np.array([22, 10, 34, 71, 14, 50, 61, 15]) # age in years
heights = np.array([1.7, 1.0, 1.8, 1.9, 0.9, 1.5, 1.5, 1.0]) # height in m
greater_than_20 = ages > 20

heights[greater_than_20]
# normally we would index with numbers inside
# here are doing something new: indexing with an array of True/False's

array([1.7, 1.8, 1.9, 1.5, 1.5])

In [14]:
bools = np.array([True, True, False])

heights[bools]

IndexError: boolean index did not match indexed array along dimension 0; dimension is 8 but corresponding boolean dimension is 3

In [18]:
heights[ages>20] # the "ages>20" part of this creates a boolean array
# we then use that boolean array to index the elements of "heights"

heights_over_1point5 = heights[heights > 1.5]

print(heights)
print(heights_over_1point5)

[1.7 1.  1.8 1.9 0.9 1.5 1.5 1. ]
[1.7 1.8 1.9]


In [49]:
# how about 2d arrays and boolean indexing?

arr = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])

# how can we select the 0th, 1st, and 3rd rows at once 
indexer = np.array([True, True, False, True])

arr[indexer]
arr[indexer,:] # colon means all items in that axis

#print(arr[0])
print(arr[indexer])

[[ 1  2  3]
 [ 4  5  6]
 [10 11 12]]


In [37]:
# can we nest a boolean index inside our regular number-based indexing?

# for example, can we index the first row, only where it is greater than 4?

print(arr[1, arr[1]>4])

# but you would normally do this in 2 steps to be clearer:
row1 = arr[1]
#print(row1)
answer = row1[row1>4]
print(answer)

[5 6]
[5 6]


In [51]:
# we didn't talk about this approach explicitly
arr = np.arange(200).reshape([20,10])
print(arr)

[[  0   1   2   3   4   5   6   7   8   9]
 [ 10  11  12  13  14  15  16  17  18  19]
 [ 20  21  22  23  24  25  26  27  28  29]
 [ 30  31  32  33  34  35  36  37  38  39]
 [ 40  41  42  43  44  45  46  47  48  49]
 [ 50  51  52  53  54  55  56  57  58  59]
 [ 60  61  62  63  64  65  66  67  68  69]
 [ 70  71  72  73  74  75  76  77  78  79]
 [ 80  81  82  83  84  85  86  87  88  89]
 [ 90  91  92  93  94  95  96  97  98  99]
 [100 101 102 103 104 105 106 107 108 109]
 [110 111 112 113 114 115 116 117 118 119]
 [120 121 122 123 124 125 126 127 128 129]
 [130 131 132 133 134 135 136 137 138 139]
 [140 141 142 143 144 145 146 147 148 149]
 [150 151 152 153 154 155 156 157 158 159]
 [160 161 162 163 164 165 166 167 168 169]
 [170 171 172 173 174 175 176 177 178 179]
 [180 181 182 183 184 185 186 187 188 189]
 [190 191 192 193 194 195 196 197 198 199]]


In [43]:
arr[arr>84]

# notice that this resulted in a 1D array of all the values over 84
# which makes sense because 85 was in the middle of a row,
# so we (and numpy) would not know how to perserve the 2D nature of it
# 
# because this scenario can happen, numpy will always flatten
# the result when indexing like this

array([ 85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
        98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
       111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
       124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
       137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
       150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162,
       163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
       176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
       189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199])

In [45]:
arr[arr>=100]

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
       113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
       126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
       139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
       152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
       165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177,
       178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
       191, 192, 193, 194, 195, 196, 197, 198, 199])

In [46]:
print(arr)

[[  0   1   2   3   4   5   6   7   8   9]
 [ 10  11  12  13  14  15  16  17  18  19]
 [ 20  21  22  23  24  25  26  27  28  29]
 [ 30  31  32  33  34  35  36  37  38  39]
 [ 40  41  42  43  44  45  46  47  48  49]
 [ 50  51  52  53  54  55  56  57  58  59]
 [ 60  61  62  63  64  65  66  67  68  69]
 [ 70  71  72  73  74  75  76  77  78  79]
 [ 80  81  82  83  84  85  86  87  88  89]
 [ 90  91  92  93  94  95  96  97  98  99]
 [100 101 102 103 104 105 106 107 108 109]
 [110 111 112 113 114 115 116 117 118 119]
 [120 121 122 123 124 125 126 127 128 129]
 [130 131 132 133 134 135 136 137 138 139]
 [140 141 142 143 144 145 146 147 148 149]
 [150 151 152 153 154 155 156 157 158 159]
 [160 161 162 163 164 165 166 167 168 169]
 [170 171 172 173 174 175 176 177 178 179]
 [180 181 182 183 184 185 186 187 188 189]
 [190 191 192 193 194 195 196 197 198 199]]


In [47]:
x = np.array([1,2,3,4,5])

x[2] = 0

print(x)

[1 2 0 4 5]


In [48]:
arr[arr>=100] = 0

print(arr)

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]]


In [55]:
arr[arr<50] = arr[arr<50] * 2

print(arr)

[[  0   4   8  12  16  20  24  28  32  36]
 [ 40  44  48  52  56  60  64  68  72  76]
 [ 80  84  88  92  96  50  52  54  56  58]
 [ 60  62  64  66  68  70  72  74  76  78]
 [ 80  82  84  86  88  90  92  94  96  98]
 [ 50  51  52  53  54  55  56  57  58  59]
 [ 60  61  62  63  64  65  66  67  68  69]
 [ 70  71  72  73  74  75  76  77  78  79]
 [ 80  81  82  83  84  85  86  87  88  89]
 [ 90  91  92  93  94  95  96  97  98  99]
 [100 101 102 103 104 105 106 107 108 109]
 [110 111 112 113 114 115 116 117 118 119]
 [120 121 122 123 124 125 126 127 128 129]
 [130 131 132 133 134 135 136 137 138 139]
 [140 141 142 143 144 145 146 147 148 149]
 [150 151 152 153 154 155 156 157 158 159]
 [160 161 162 163 164 165 166 167 168 169]
 [170 171 172 173 174 175 176 177 178 179]
 [180 181 182 183 184 185 186 187 188 189]
 [190 191 192 193 194 195 196 197 198 199]]


<hr/>
### matplotlib

In [4]:

# if any of this is confusing, just ignore it for now:
import matplotlib # not needed for the line below with pyplot to work
# this is not a true python command and is just telling python
# how to show our graphs/plots when we make them today
%matplotlib tk
##

# this is the important line for giving us the module and code we need today
import matplotlib.pyplot as pl
# a lot of people use `plt` instead of `pl`
# so you might see that in online help

In [87]:
# subplots

# this is new: when a function returns 2 variables
# you can collect them like this
fig,ax = pl.subplots()

In [88]:
# first plot, interactive

x = [0,1,2]
y = [1,3,2]

ax.plot(x, y)

[<matplotlib.lines.Line2D at 0x1c2a45ef60>]

In [89]:
ax.plot(x, y, linewidth=3, color='red')

[<matplotlib.lines.Line2D at 0x1c2bb9b048>]

In [86]:
ax.set_yticks([0, 2, 4])

ax.set_ylabel('My y axis here', labelpad=30, fontsize=40)
# labelpad determine how far from the axis the label shows up

Text(80.4444,0.5,'My y axis here')

In [71]:
x = [0,1,2]
y = [3,1,4]

ax.plot(x, y, linewidth=3)

[<matplotlib.lines.Line2D at 0x1c29fdccc0>]

In [95]:
# if we want another new figure to work on

fig2,ax2 = pl.subplots()

# to plot on that one, use ax2.plot...

In [None]:
# line

In [90]:
# lims, ticks, labels

ax.set_ylim([1,2])

(1, 2)

In [None]:
# scatter

In [3]:
# histogram

In [None]:
# errorbar

In [None]:
# bar

In [2]:
# matplotlib gallery

In [None]:
# imshow

In [94]:
lines = ax.plot([1,3,2])

print(lines)
line = lines[0]

line.set_linewidth(100)

[<matplotlib.lines.Line2D object at 0x1c305bb518>]


In [None]:
# leaving this here to ponder
# we did not teach it

my_settings = dict(linewidth=5, color='cornflowerblue')

ax.plot(x, y, **my_settings)

In [101]:
img = pl.imread('/Users/ben/Desktop/cat.jpg')

np.shape(img)
#img.shape # same thing as np.shape(img)

fig,ax = pl.subplots()

ax.imshow(img)

<matplotlib.image.AxesImage at 0x1c29eb66d8>

<hr/>
## Questions, review, and exercises

<hr/>
#### Problem 1

__(a)__ Load in the image called `'sunset.jpg'` using the `pl.imread` function.

In [2]:
sunset = pl.imread('sunset.jpg')

__(b)__ Inspect the shape of the image data. What does each axis of the array correspond to?

__(c)__ Use `matplotlib` to display the image on some axes in a figure.

In [3]:
sunset.shape

(1080, 1920)

In [4]:
fig,ax = pl.subplots()

sunset_upsidedown = sunset[::-1]

sunset_flipped = sunset[:, ::-1]

ax.imshow(sunset_flipped)

<matplotlib.image.AxesImage at 0x1c217e0e48>

__(d)__ Which is brighter on average: the top half or bottom half of the image? Answer this quantitatively using `numpy`.

*Hint: in most images, small values correspond to darkness, and large values correspond to brightness.*

In [8]:
fig,ax = pl.subplots()

ax.imshow(sunset, cmap='gist_gray')

<matplotlib.image.AxesImage at 0x1c25150748>

* y axis ?
* flip
* black and white ?

In [9]:
pl.imshow?

<hr/>
#### Problem 2

__(a)__ Load in the dataset called `neural_data.npy`.

In [10]:
data = np.load('neural_data.npy')

__(b)__ These data come from some neurons recorded in the lab. Every neuron was recorded for 30 seconds.

The values represent the instantaneous firing rate of the recorded neurons, in units of Hz. (You can think of this as a measure of how active the neuron is.)

Each row of the array contains the data from one recorded neuron.

Each column contains a time point from the 30 seconds of the recording.

How many neurons did we record?

In [13]:
n_neurons = data.shape[0]

print(n_neurons)

7


__(c)__ Given that each recording was exactly 30 seconds long, what is the sampling rate of the recording? In other words, how many samples did we record in each second?

In [16]:
data.shape

n_samples = data.shape[1]
duration = 30 # seconds

samples_per_second = n_samples / duration

print(samples_per_second) # aka sampling rate, `fs`
# units = Hz = samples per second

100.0


__(d)__ Plot the firing rate of each neuron, with each neuron being represented by a single trace with a different color.

In [24]:
data.shape

(7, 3000)

In [38]:
fig,ax = pl.subplots()

n_datapoints = data.shape[1]

time = np.linspace(0,30,n_datapoints)
# we know that our data consist of 30 seconds of equally spaced
# samples, and there are n_datapoints samples

# using a for loop on a 2d array
# what do I get in each iteration?
for neuron in data:
    ax.plot(time, neuron)
# answer: each item in a 2d array is a row
# for example, my_array[2] is the third row
# therefore, each iteration of this for loop
# gives us one row of the array
# which in this case, corresponds to the data from one neuron
    
ax.set_xlabel('Time (s)', fontsize='large')
ax.set_ylabel('Instantaneous firing rate (Hz)', fontsize='large')

Text(0,0.5,'Instantaneous firing rate (Hz)')

In [36]:
my_list = [1,2,3,4,5]

for item in my_list:
    print(item)

1
2
3
4
5


In [37]:
my_list = [1,2,3,4,5]

i = 0
while i<len(my_list):
    print(my_list[i])
    i = i + 1


1
2
3
4
5


In [None]:
ax.plot

__(e)__ Plot the average firing rate across neurons. 

(This should be a 30-second-long trace representing the average firing rate at each time, where you averaged over all the neurons.)

In [21]:
fig,ax = pl.subplots()

average_over_neurons = np.mean(data, axis=0)

ax.plot(average_over_neurons)

[<matplotlib.lines.Line2D at 0x1a1ac0e550>]

__(f)__ At some unknown point, a stimulus was applied to the neurons. Looking at your plot, can you guess when that was?

__(g)__ Compute the highest firing rate observed for each neuron.

In [42]:

highest_rate_per_neuron = np.max(data, axis=1)

print(highest_rate_per_neuron)

[110.76484451 158.83009506 140.68861449  95.67503032  62.71591795
  68.61306165 105.65384201]


Does your answer match what you observe in your plot?

__(h)__ For each neuron, determine the time at which the highest firing rate was observed.

In [49]:
index_of_max_value = np.argmax(data, axis=1)

print(index_of_max_value)

time_of_max_value = index_of_max_value / 100

print(time_of_max_value)

# or, another approach:
time = np.linspace(0,30,n_datapoints)

first_neuron_index_max = index_of_max_value[0]

time_of_max_value = time[index_of_max_value]
print(time_of_max_value)

[2992  583 1147 2489  295  225 2795]
[29.92  5.83 11.47 24.89  2.95  2.25 27.95]
29.929976658886293


In [51]:
index_of_max_value = np.argmax(data, axis=1)
print(index_of_max_value)

time = np.linspace(0,30,n_datapoints)

time_of_max_value = time[index_of_max_value]

print(time_of_max_value)

[2992  583 1147 2489  295  225 2795]
[29.92997666  5.83194398 11.47382461 24.89829943  2.95098366  2.25075025
 27.95931977]


In [57]:
# types of indexing in an array

my_array = np.array(['a','b','c','d','e','f','g','h','i'])

# using a single integer
print(my_array[3])

# using a slice
print(my_array[3:7])

# using boolean
indexers = np.array([True,True,True,False,False,False,True,False,True])
print(my_array[indexers])

# using an array of integers
indexers = np.array([0,5,6,7,8])
print(my_array[indexers])

d
['d' 'e' 'f' 'g']
['a' 'b' 'c' 'g' 'i']
['a' 'f' 'g' 'h' 'i']


# loading excel files

In [88]:
import pandas as pd

my_data = pd.read_excel('/Users/ben/Desktop/my_data.xlsx', header=2)

In [65]:
print(my_data)

         Month fresh water: (United States)  vegan: (United States)  \
0   2004-01-01                            1                       2   
1   2004-02-01                            1                       2   
2   2004-03-01                            1                       2   
3   2004-04-01                            1                       2   
4   2004-05-01                            1                       2   
5   2004-06-01                            1                       2   
6   2004-07-01                            1                       2   
7   2004-08-01                            1                       2   
8   2004-09-01                            1                       2   
9   2004-10-01                            1                       2   
10  2004-11-01                            1                       3   
11  2004-12-01                            1                       2   
12  2005-01-01                            1                       2   
13  20

In [79]:
type(my_data)

pandas.core.frame.DataFrame

In [89]:
# this converts the fancy pandas data structure
# to a numpy array of data

my_new_data = my_data.values

In [81]:
type(my_new_data)

numpy.ndarray

In [82]:
#print(my_new_data)
print(my_new_data.shape)

(176, 5)


In [72]:
csv_data = pd.read_csv('/Users/ben/Desktop/multiTimeline.csv')


In [76]:
my_data = csv_data.values

In [84]:
print(my_data.shape)

(176, 5)


In [91]:
fig,ax = pl.subplots()

ax.plot(my_new_data[:,3])

[<matplotlib.lines.Line2D at 0x1c23a51518>]

In [90]:
my_new_data[:,2]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 2, 3, 2,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 4, 4, 4,
       4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
       6, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 5, 5, 5, 6, 5, 5, 5, 6, 5,
       6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 9, 8, 8, 9,
       12, 14, 9, 9, 9, 9, 9, 10, 15, 16, 13, 13, 13, 13, 14, 14, 13, 13,
       12, 12, 12, 12], dtype=object)

In [92]:
for i in np.arange(1000):
    print(i)
    


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [93]:
# an example of how one might make a progress bar

import time

for i in range(100):
    print('|'+'#'*i+' '*(100-i-1)+f'|{i+1}%', end='')
    time.sleep(0.05)
    print('\r', end='')

|###########################################################################################        |92%

KeyboardInterrupt: 