In [1]:
# from Learning Pandas
# https://www.packtpub.com/packtlib/book/Application-Development/9781783985128/1/ch01lvl1sec11/Primary%20pandas%20objects

# import numpy and pandas, and DataFrame / Series
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

# Set some pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

# And some items for matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
#pd.options.display.mpl_style = 'default' #issues with matplotlib
plt.style.use('default')

In [2]:
s = Series([1, 2, 3, 4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [3]:
s[[1, 3]]

1    2
3    4
dtype: int64

In [4]:
s = Series([1, 2, 3, 4], 
           index = ['a', 'b', 'c', 'd'])
s

a    1
b    2
c    3
d    4
dtype: int64

In [5]:
s[['a', 'd']]

a    1
d    4
dtype: int64

In [6]:
s[[1, 2]]

b    2
c    3
dtype: int64

In [7]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [8]:
dates = pd.date_range('2014-07-01', '2014-07-06')
dates

DatetimeIndex(['2014-07-01', '2014-07-02', '2014-07-03', '2014-07-04',
               '2014-07-05', '2014-07-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
temps1 = Series([80, 82, 85, 90, 83, 87], 
                 index = dates)
temps1

2014-07-01    80
2014-07-02    82
2014-07-03    85
2014-07-04    90
2014-07-05    83
2014-07-06    87
Freq: D, dtype: int64

In [10]:
temps1.mean()

84.5

In [11]:
temps2 = Series([70, 75, 69, 83, 79, 77],
                index = dates)

temp_diffs = temps1 - temps2
temp_diffs

2014-07-01    10
2014-07-02     7
2014-07-03    16
2014-07-04     7
2014-07-05     4
2014-07-06    10
Freq: D, dtype: int64

In [12]:
temp_diffs['2014-07-03']

16

In [13]:
temp_diffs[2]

16

**DataFrame**

In [14]:
temps_df = DataFrame(
    {'Missoula': temps1,
     'Philadelphia': temps2})
temps_df

            Missoula  Philadelphia
2014-07-01        80            70
2014-07-02        82            75
2014-07-03        85            69
2014-07-04        90            83
2014-07-05        83            79
2014-07-06        87            77

In [15]:
temps_df['Missoula']

2014-07-01    80
2014-07-02    82
2014-07-03    85
2014-07-04    90
2014-07-05    83
2014-07-06    87
Freq: D, Name: Missoula, dtype: int64

In [16]:
temps_df['Philadelphia']

2014-07-01    70
2014-07-02    75
2014-07-03    69
2014-07-04    83
2014-07-05    79
2014-07-06    77
Freq: D, Name: Philadelphia, dtype: int64

In [17]:
temps_df[['Philadelphia', 'Missoula']]

            Philadelphia  Missoula
2014-07-01            70        80
2014-07-02            75        82
2014-07-03            69        85
2014-07-04            83        90
2014-07-05            79        83
2014-07-06            77        87

In [18]:
temps_df.Missoula

2014-07-01    80
2014-07-02    82
2014-07-03    85
2014-07-04    90
2014-07-05    83
2014-07-06    87
Freq: D, Name: Missoula, dtype: int64

In [19]:
temps_df.Missoula - temps_df.Philadelphia

2014-07-01    10
2014-07-02     7
2014-07-03    16
2014-07-04     7
2014-07-05     4
2014-07-06    10
Freq: D, dtype: int64

In [20]:
temps_df['Difference'] = temp_diffs
temps_df

            Missoula  Philadelphia  Difference
2014-07-01        80            70          10
2014-07-02        82            75           7
2014-07-03        85            69          16
2014-07-04        90            83           7
2014-07-05        83            79           4
2014-07-06        87            77          10

In [21]:
temps_df.columns

Index(['Missoula', 'Philadelphia', 'Difference'], dtype='object')

In [22]:
temps_df.Difference[1:4]

2014-07-02     7
2014-07-03    16
2014-07-04     7
Freq: D, Name: Difference, dtype: int64

In [23]:
temps_df.iloc[1]

Missoula        82
Philadelphia    75
Difference       7
Name: 2014-07-02 00:00:00, dtype: int64

In [24]:
temps_df.ix[1].index

Index(['Missoula', 'Philadelphia', 'Difference'], dtype='object')

In [25]:
temps_df.loc['2014-07-03']

Missoula        85
Philadelphia    69
Difference      16
Name: 2014-07-03 00:00:00, dtype: int64

In [26]:
temps_df.iloc[[1, 3, 5]].Difference

2014-07-02     7
2014-07-04     7
2014-07-06    10
Freq: 2D, Name: Difference, dtype: int64

In [27]:
temps_df.Missoula > 82

2014-07-01    False
2014-07-02    False
2014-07-03     True
2014-07-04     True
2014-07-05     True
2014-07-06     True
Freq: D, Name: Missoula, dtype: bool

In [28]:
temps_df[temps_df.Missoula > 82]

            Missoula  Philadelphia  Difference
2014-07-03        85            69          16
2014-07-04        90            83           7
2014-07-05        83            79           4
2014-07-06        87            77          10

Loading Data from files and the Web

In [29]:
!cat Data/pandas_test1.csv

 date,0,1,2
   2000-01-01 00:00:00,1.10376250134,-1.90997889703,-0.808955536115
   2000-01-02 00:00:00,1.18891664768,0.581119740849,0.86159734949
   2000-01-03 00:00:00,-0.964200042412,0.779764393246,1.82906224532
   2000-01-04 00:00:00,0.782130444001,-1.72066965573,-1.10824167327
   2000-01-05 00:00:00,-1.86701699823,-0.528368292754,-2.48830894087
   2000-01-06 00:00:00,2.56928022646,-0.471901478927,-0.835033249865
   2000-01-07 00:00:00,-0.39932258251,-0.676426550985,-0.0112559158931
   2000-01-08 00:00:00,1.64299299394,1.01341997845,1.43566709724
   2000-01-09 00:00:00,1.14730764657,2.13799951538,0.554171306191
   2000-01-10 00:00:00,0.933765825769,1.38715526486,-0.560142729978


In [30]:
df = pd.read_csv('Data/pandas_test1.csv', skipinitialspace=True)
df

                  date         0         1         2
0  2000-01-01 00:00:00  1.103763 -1.909979 -0.808956
1  2000-01-02 00:00:00  1.188917  0.581120  0.861597
2  2000-01-03 00:00:00 -0.964200  0.779764  1.829062
3  2000-01-04 00:00:00  0.782130 -1.720670 -1.108242
4  2000-01-05 00:00:00 -1.867017 -0.528368 -2.488309
5  2000-01-06 00:00:00  2.569280 -0.471901 -0.835033
6  2000-01-07 00:00:00 -0.399323 -0.676427 -0.011256
7  2000-01-08 00:00:00  1.642993  1.013420  1.435667
8  2000-01-09 00:00:00  1.147308  2.138000  0.554171
9  2000-01-10 00:00:00  0.933766  1.387155 -0.560143

In [31]:
df.date

0    2000-01-01 00:00:00
1    2000-01-02 00:00:00
2    2000-01-03 00:00:00
3    2000-01-04 00:00:00
4    2000-01-05 00:00:00
5    2000-01-06 00:00:00
6    2000-01-07 00:00:00
7    2000-01-08 00:00:00
8    2000-01-09 00:00:00
9    2000-01-10 00:00:00
Name: date, dtype: object

In [32]:
df.date[0]

'2000-01-01 00:00:00'

In [33]:
type(df.date[0])

str

In [34]:
df = pd.read_csv('Data/pandas_test1.csv', 
                 skipinitialspace=True,
                 parse_dates=['date'])
df

        date         0         1         2
0 2000-01-01  1.103763 -1.909979 -0.808956
1 2000-01-02  1.188917  0.581120  0.861597
2 2000-01-03 -0.964200  0.779764  1.829062
3 2000-01-04  0.782130 -1.720670 -1.108242
4 2000-01-05 -1.867017 -0.528368 -2.488309
5 2000-01-06  2.569280 -0.471901 -0.835033
6 2000-01-07 -0.399323 -0.676427 -0.011256
7 2000-01-08  1.642993  1.013420  1.435667
8 2000-01-09  1.147308  2.138000  0.554171
9 2000-01-10  0.933766  1.387155 -0.560143

In [35]:
type(df.date[0])

pandas.tslib.Timestamp

In [36]:
df.index

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [37]:
df = pd.read_csv('Data/pandas_test1.csv', 
                 parse_dates=['date'],
                 skipinitialspace=True,
                 index_col='date')
df

                   0         1         2
date                                    
2000-01-01  1.103763 -1.909979 -0.808956
2000-01-02  1.188917  0.581120  0.861597
2000-01-03 -0.964200  0.779764  1.829062
2000-01-04  0.782130 -1.720670 -1.108242
2000-01-05 -1.867017 -0.528368 -2.488309
2000-01-06  2.569280 -0.471901 -0.835033
2000-01-07 -0.399323 -0.676427 -0.011256
2000-01-08  1.642993  1.013420  1.435667
2000-01-09  1.147308  2.138000  0.554171
2000-01-10  0.933766  1.387155 -0.560143

In [38]:
df.index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08',
               '2000-01-09', '2000-01-10'],
              dtype='datetime64[ns]', name='date', freq=None)

In [43]:
#imports fpr reading data from Yahoo!
from pandas.io.data import DataReader
# from pandas_datareader.data import DataReader
from datetime import date
from dateutil.relativedelta import relativedelta

goog = DataReader("GOOG", "yahoo",
                  date.today() +
                  relativedelta(months=-3))

goog.tail()

                  Open        High         Low       Close   Volume  \
Date                                                                  
2016-01-11  716.609985  718.854980  703.539978  716.030029  2089300   
2016-01-12  721.679993  728.750000  717.317017  726.070007  2000500   
2016-01-13  730.849976  734.739990  698.609985  700.559998  2468300   
2016-01-14  705.380005  721.924988  689.099976  714.719971  2211900   
2016-01-15  692.289978  706.739990  685.369995  694.450012  3592400   

             Adj Close  
Date                    
2016-01-11  716.030029  
2016-01-12  726.070007  
2016-01-13  700.559998  
2016-01-14  714.719971  
2016-01-15  694.450012  

In [45]:
#imports fpr reading data from Yahoo!
from pandas.io.data import DataReader
# from pandas_datareader.data import DataReader
from datetime import date
from dateutil.relativedelta import relativedelta

googG = DataReader("GOOG", "google",
                  date.today() +
                  relativedelta(months=-3))

googG.tail()

              Open    High     Low   Close   Volume
Date                                               
2016-01-11  716.61  718.86  703.54  716.03  2090621
2016-01-12  721.68  728.75  717.32  726.07  2024509
2016-01-13  730.85  734.74  698.61  700.56  2468295
2016-01-14  705.38  721.92  689.10  714.72  2211853
2016-01-15  692.29  706.74  685.37  694.45  3608109

In [47]:
#imports fpr reading data from Yahoo!
from pandas.io.data import DataReader
# from pandas_datareader.data import DataReader
from datetime import date
from dateutil.relativedelta import relativedelta

fred = DataReader("VIXCLS", "fred",
                  date.today() +
                  relativedelta(months=-3))

fred.tail()

            VIXCLS
DATE              
2016-01-08   27.01
2016-01-11   24.30
2016-01-12   22.47
2016-01-13   25.22
2016-01-14   23.95

In [48]:
goog.plot(y='Adj Close')

<matplotlib.axes._subplots.AxesSubplot at 0x10740d860>

## Chapter 3 ##

In [49]:
import numpy as np

In [50]:
def squares(values):
    result = []
    for v in values:
        result.append(v*v)
    return result
    
    
to_square = range(100000)

%timeit squares(to_square)

100 loops, best of 3: 15.3 ms per loop


In [51]:
# NumPy vectorization

array_to_square = np.arange(0, 100000)
%timeit array_to_square ** 2

The slowest run took 7.01 times longer than the fastest. This could mean that an intermediate result is being cached 
10000 loops, best of 3: 74.9 µs per loop


In [52]:
a1 = np.array([1, 2, 3, 4, 5])
a1

array([1, 2, 3, 4, 5])

In [53]:
# n-dimensional array
type(a1)

numpy.ndarray

In [54]:
np.size(a1)

5

In [55]:
a2 = np.array([1, 2, 3, 4.0, 5.0])
a2

array([ 1.,  2.,  3.,  4.,  5.])

In [56]:
a2.dtype

dtype('float64')

In [57]:
a3 = np.array([0] * 10)
a3

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [58]:
np.array(range(10))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [59]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [60]:
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [61]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [62]:
np.arange(0, 10, 2)

array([0, 2, 4, 6, 8])

In [63]:
np.arange(10, 0, -1)

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

In [64]:
np.linspace(0, 10, 4)

array([  0.        ,   3.33333333,   6.66666667,  10.        ])

In [65]:
a1 = np.arange(0, 10)
a1 * 2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [66]:
a2 = np.arange(10, 20)
a1 + a2

array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [67]:
np.array([[1, 2], [3,4]])

array([[1, 2],
       [3, 4]])

In [68]:
m = np.arange(0, 20).reshape(5, 4)
m

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [69]:
np.size(m)

20

In [70]:
np.size(m, 0)

5

In [71]:
np.size(m, 1)

4

In [72]:
m[1, 2]

6

In [73]:
m[1,]

array([4, 5, 6, 7])

In [74]:
m[:, 2]

array([ 2,  6, 10, 14, 18])

In [75]:
a = np.arange(5)
a < 2

array([ True,  True, False, False, False], dtype=bool)

In [76]:
( a < 2 ) | ( a > 3 )

array([ True,  True, False, False,  True], dtype=bool)

In [77]:
def exp (x):
    return x<3 or x>3
np.vectorize(exp)(a)

array([ True,  True,  True, False,  True], dtype=bool)

In [78]:
# Boolean selection
r = a < 3
a[r]

array([0, 1, 2])

In [79]:
np.sum(a < 3)

3

In [80]:
a1 = np.arange(0, 5)
a2 = np.arange(5, 0, -1)
a1 < a2

array([ True,  True,  True, False, False], dtype=bool)

In [81]:
a1 = np.arange(9).reshape(3, 3)
a2 = np.arange(9, 0, -1).reshape(3, 3)
a1 < a2

array([[ True,  True,  True],
       [ True,  True, False],
       [False, False, False]], dtype=bool)

In [82]:
a1 = np.arange(1, 10)
a1[3:8]

array([4, 5, 6, 7, 8])

In [83]:
a1[::2]

array([1, 3, 5, 7, 9])

In [84]:
a1[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1])

In [85]:
a1[9:0:-1]

array([9, 8, 7, 6, 5, 4, 3, 2])

In [86]:
a1[5:]

array([6, 7, 8, 9])

In [87]:
a1[:5]

array([1, 2, 3, 4, 5])

In [88]:
m

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [89]:
# all rows, column position 1
m[:,1]

array([ 1,  5,  9, 13, 17])

In [90]:
m[:,1:3]

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10],
       [13, 14],
       [17, 18]])

In [91]:
m[3:5, :]

array([[12, 13, 14, 15],
       [16, 17, 18, 19]])

In [92]:
m[3:5, 1:3]

array([[13, 14],
       [17, 18]])

In [93]:
m[[1,3,4],:]

array([[ 4,  5,  6,  7],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [94]:
a = np.arange(0, 9)
m = a.reshape(3, 3)
m

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [95]:
reshaped = m.reshape(9)
reshaped

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [96]:
raveled = m.ravel()
raveled

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [97]:
m

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [98]:
reshaped = m.reshape(np.size(m))
raveled = m.ravel()
reshaped[2] = 1000
raveled[5] = 2000
m

array([[   0,    1, 1000],
       [   3,    4, 2000],
       [   6,    7,    8]])

In [99]:
# flattened makes a copy
m2 = np.arange(0, 9).reshape(3, 3)
flattened = m2.flatten()
flattened[0] = 1000
flattened

array([1000,    1,    2,    3,    4,    5,    6,    7,    8])

In [100]:
m2

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [101]:
flattened.shape

(9,)

In [102]:
flattened.shape = (3, 3)
flattened

array([[1000,    1,    2],
       [   3,    4,    5],
       [   6,    7,    8]])

In [103]:
flattened.transpose()

array([[1000,    3,    6],
       [   1,    4,    7],
       [   2,    5,    8]])

In [104]:
flattened.T

array([[1000,    3,    6],
       [   1,    4,    7],
       [   2,    5,    8]])

In [105]:
m = np.arange(0, 9).reshape(3, 3)
m.resize(1, 9)
m

array([[0, 1, 2, 3, 4, 5, 6, 7, 8]])

In [106]:
a = np.arange(9).reshape(3, 3)
b = (a + 1) * 10
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [107]:
b

array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [108]:
np.hstack((a, b))

array([[ 0,  1,  2, 10, 20, 30],
       [ 3,  4,  5, 40, 50, 60],
       [ 6,  7,  8, 70, 80, 90]])

In [109]:
np.concatenate((a, b), axis = 1)

array([[ 0,  1,  2, 10, 20, 30],
       [ 3,  4,  5, 40, 50, 60],
       [ 6,  7,  8, 70, 80, 90]])

In [110]:
np.vstack((a, b))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [111]:
np.concatenate((a, b), axis = 0)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [112]:
np.dstack((a, b))

array([[[ 0, 10],
        [ 1, 20],
        [ 2, 30]],

       [[ 3, 40],
        [ 4, 50],
        [ 5, 60]],

       [[ 6, 70],
        [ 7, 80],
        [ 8, 90]]])

In [113]:
one_d_a = np.arange(5)
one_d_a

array([0, 1, 2, 3, 4])

In [114]:
one_d_b = (one_d_a * 10)
one_d_b

array([ 0, 10, 20, 30, 40])

In [115]:
np.column_stack((one_d_a, one_d_b))

array([[ 0,  0],
       [ 1, 10],
       [ 2, 20],
       [ 3, 30],
       [ 4, 40]])

In [116]:
np.row_stack((one_d_a, one_d_b))

array([[ 0,  1,  2,  3,  4],
       [ 0, 10, 20, 30, 40]])

In [117]:
a = np.arange(12).reshape(3, 4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [118]:
np.hsplit(a, 4)

[array([[0],
        [4],
        [8]]), array([[1],
        [5],
        [9]]), array([[ 2],
        [ 6],
        [10]]), array([[ 3],
        [ 7],
        [11]])]

In [119]:
np.hsplit(a, 2)

[array([[0, 1],
        [4, 5],
        [8, 9]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11]])]

In [120]:
np.hsplit(a, [1, 3])

[array([[0],
        [4],
        [8]]), array([[ 1,  2],
        [ 5,  6],
        [ 9, 10]]), array([[ 3],
        [ 7],
        [11]])]

In [121]:
np.split(a, 2, axis = 1)

[array([[0, 1],
        [4, 5],
        [8, 9]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11]])]

In [122]:
a = np.arange(12).reshape(4, 3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [123]:
np.vsplit(a, 4)

[array([[0, 1, 2]]),
 array([[3, 4, 5]]),
 array([[6, 7, 8]]),
 array([[ 9, 10, 11]])]

In [124]:
np.vsplit(a, 2)

[array([[0, 1, 2],
        [3, 4, 5]]), array([[ 6,  7,  8],
        [ 9, 10, 11]])]

In [125]:
np.vsplit(a, [1, 3])

[array([[0, 1, 2]]), array([[3, 4, 5],
        [6, 7, 8]]), array([[ 9, 10, 11]])]

In [126]:
np.split(a, 2, axis=0)

[array([[0, 1, 2],
        [3, 4, 5]]), array([[ 6,  7,  8],
        [ 9, 10, 11]])]

In [127]:
c = np.arange(27).reshape(3, 3, 3)
c

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [128]:
np.dsplit(c, 3)

[array([[[ 0],
         [ 3],
         [ 6]],
 
        [[ 9],
         [12],
         [15]],
 
        [[18],
         [21],
         [24]]]), array([[[ 1],
         [ 4],
         [ 7]],
 
        [[10],
         [13],
         [16]],
 
        [[19],
         [22],
         [25]]]), array([[[ 2],
         [ 5],
         [ 8]],
 
        [[11],
         [14],
         [17]],
 
        [[20],
         [23],
         [26]]])]

In [129]:
m = np.arange(10, 19).reshape(3, 3)
print(m)
print("{0} min of the entire matrix".format(m.min()))
print("{0} max of the entire matrix".format(m.max()))
print("{0} position of the min value".format(m.argmin()))
print("{0} position of the max value".format(m.argmax()))
print("{0} mins down each column".format(m.min(axis = 0)))
print("{0} mins across each row".format(m.min(axis = 1)))
print("{0} maxs down each column".format(m.max(axis = 0)))
print("{0} maxs across each rox".format(m.max(axis = 1)))

[[10 11 12]
 [13 14 15]
 [16 17 18]]
10 min of the entire matrix
18 max of the entire matrix
0 position of the min value
8 position of the max value
[10 11 12] mins down each column
[10 13 16] mins across each row
[16 17 18] maxs down each column
[12 15 18] maxs across each rox


In [130]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [131]:
a.mean(), a.std(), a.var()

(4.5, 2.8722813232690143, 8.25)

In [132]:
a = np.arange(1, 6)
a

array([1, 2, 3, 4, 5])

In [133]:
a.sum(), a.prod()

(15, 120)

In [134]:
a.cumsum(), a.cumprod()

(array([ 1,  3,  6, 10, 15]), array([  1,   2,   6,  24, 120]))

In [135]:
a = np.arange(10)
(a < 5).any() # any < 5?

True

In [136]:
np.arange(10).reshape(2, 5).size

10

## Chapter 4 ##

In [138]:
import numpy as np
import pandas as pd

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

In [140]:
s1 = pd.Series(2)
s1

0    2
dtype: int64