In [1]:
import numpy as np
import pandas as pd

In [2]:
index = pd.date_range("1/1/2000", periods=8)

In [3]:
index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [4]:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

In [5]:
s

a   -0.574018
b   -0.061551
c    0.153363
d   -0.698093
e   -0.467623
dtype: float64

In [6]:
s.loc["a"]

-0.5740181980206319

In [7]:
df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])

In [8]:
df

Unnamed: 0,A,B,C
2000-01-01,-0.816962,0.016242,-0.798559
2000-01-02,1.20937,-0.740141,0.837012
2000-01-03,-1.091975,0.352786,1.266263
2000-01-04,1.056718,-0.114568,0.442382
2000-01-05,-1.158944,0.004515,-0.733047
2000-01-06,0.950257,-1.17227,1.06265
2000-01-07,0.695968,0.985892,-1.551044
2000-01-08,1.09207,-1.662409,-0.469109


In [9]:
df.loc["2000-01-01"]

A   -0.816962
B    0.016242
C   -0.798559
Name: 2000-01-01 00:00:00, dtype: float64

In [10]:
df.loc["2000-01-01", "A"]

-0.8169620182658892

In [11]:
type(df.loc["2000-01-01", "A"])

numpy.float64

In [12]:
df.loc[:, "A"]

2000-01-01   -0.816962
2000-01-02    1.209370
2000-01-03   -1.091975
2000-01-04    1.056718
2000-01-05   -1.158944
2000-01-06    0.950257
2000-01-07    0.695968
2000-01-08    1.092070
Freq: D, Name: A, dtype: float64

In [13]:
type(df.loc[:, "A"])

pandas.core.series.Series

In [14]:
df.loc[:, "A":]

Unnamed: 0,A,B,C
2000-01-01,-0.816962,0.016242,-0.798559
2000-01-02,1.20937,-0.740141,0.837012
2000-01-03,-1.091975,0.352786,1.266263
2000-01-04,1.056718,-0.114568,0.442382
2000-01-05,-1.158944,0.004515,-0.733047
2000-01-06,0.950257,-1.17227,1.06265
2000-01-07,0.695968,0.985892,-1.551044
2000-01-08,1.09207,-1.662409,-0.469109


In [15]:
type(df.loc[:, "A":])

pandas.core.frame.DataFrame

In [16]:
df.to_numpy()

array([[-0.81696202,  0.01624218, -0.79855928],
       [ 1.20936962, -0.74014105,  0.837012  ],
       [-1.09197543,  0.35278645,  1.26626293],
       [ 1.056718  , -0.11456785,  0.4423821 ],
       [-1.15894357,  0.00451499, -0.73304667],
       [ 0.95025733, -1.17226991,  1.06265026],
       [ 0.69596822,  0.98589213, -1.55104407],
       [ 1.09207049, -1.66240876, -0.46910896]])

In [17]:
df.to_numpy().shape

(8, 3)

In [19]:
for i in pd.Series([0, 1, 2, 3]).index:
    print(type(i))

<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>


# Flexible binary operations

## Matching / broadcasting behavior

In [20]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)

In [21]:
df

Unnamed: 0,one,two,three
a,-0.966245,0.545095,
b,-0.330277,-1.777735,2.267374
c,-0.825424,0.0266,0.831805
d,,-0.472193,0.585474


In [22]:
row = df.iloc[1]

In [23]:
row

one     -0.330277
two     -1.777735
three    2.267374
Name: b, dtype: float64

In [24]:
column = df["two"]

In [25]:
column

a    0.545095
b   -1.777735
c    0.026600
d   -0.472193
Name: two, dtype: float64

In [27]:
df.sub(row, axis="columns")

Unnamed: 0,one,two,three
a,-0.635968,2.322829,
b,0.0,0.0,0.0
c,-0.495147,1.804335,-1.435569
d,,1.305542,-1.6819


In [28]:
df.sub(row, axis=1)

Unnamed: 0,one,two,three
a,-0.635968,2.322829,
b,0.0,0.0,0.0
c,-0.495147,1.804335,-1.435569
d,,1.305542,-1.6819


In [29]:
df.sub(column, axis="index")

Unnamed: 0,one,two,three
a,-1.51134,0.0,
b,1.447457,0.0,4.045109
c,-0.852025,0.0,0.805205
d,,0.0,1.057667


In [30]:
df.sub(column, axis=0)

Unnamed: 0,one,two,three
a,-1.51134,0.0,
b,1.447457,0.0,4.045109
c,-0.852025,0.0,0.805205
d,,0.0,1.057667


In [31]:
dfmi = df.copy()

In [35]:
column

a    0.545095
b   -1.777735
c    0.026600
d   -0.472193
Name: two, dtype: float64

In [32]:
dfmi.index = pd.MultiIndex.from_tuples(
    [(1, "a"), (1, "b"), (1, "c"), (2, "a")], names=["first", "second"]
)

In [33]:
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,-0.966245,0.545095,
1,b,-0.330277,-1.777735,2.267374
1,c,-0.825424,0.0266,0.831805
2,a,,-0.472193,0.585474


In [37]:
dfmi.sub(column, axis=0, level="second")

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,-1.51134,0.0,
1,b,1.447457,0.0,4.045109
1,c,-0.852025,0.0,0.805205
2,a,,-1.017288,0.04038


In [38]:
s = pd.Series(np.arange(10))

In [39]:
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [40]:
div, rem = divmod(s, 3)

In [41]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    2
7    2
8    2
9    3
dtype: int64

In [42]:
rem

0    0
1    1
2    2
3    0
4    1
5    2
6    0
7    1
8    2
9    0
dtype: int64

In [45]:
idx = pd.Index(np.arange(10))

In [46]:
idx

Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [47]:
div, rem = divmod(idx, 3)

In [48]:
div

Index([0, 0, 0, 1, 1, 1, 2, 2, 2, 3], dtype='int64')

In [49]:
rem

Index([0, 1, 2, 0, 1, 2, 0, 1, 2, 0], dtype='int64')

In [50]:
div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6])

In [51]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    1
7    1
8    1
9    1
dtype: int64

In [52]:
rem

0    0
1    1
2    2
3    0
4    0
5    1
6    1
7    2
8    2
9    3
dtype: int64

## Missing data / operations with fill values

In [53]:
df

Unnamed: 0,one,two,three
a,-0.966245,0.545095,
b,-0.330277,-1.777735,2.267374
c,-0.825424,0.0266,0.831805
d,,-0.472193,0.585474


In [61]:
value = np.random.randn(4,)

In [62]:
value[-1] = np.nan

In [63]:
value

array([ 0.14226949, -0.04021945, -0.64599186,         nan])

In [64]:
df2 = pd.DataFrame({"one": value, "two": np.random.randn(4,), "three": np.random.randn(4,)}, index=["a", "b", "c", "d"])

In [65]:
df2

Unnamed: 0,one,two,three
a,0.142269,0.038235,0.628218
b,-0.040219,-0.092864,-0.997562
c,-0.645992,1.006355,1.516746
d,,-0.050081,0.348156


In [66]:
df + df2

Unnamed: 0,one,two,three
a,-0.823975,0.58333,
b,-0.370496,-1.870598,1.269812
c,-1.471416,1.032955,2.348551
d,,-0.522274,0.93363


In [67]:
df.add(df2, fill_value=0)

Unnamed: 0,one,two,three
a,-0.823975,0.58333,0.628218
b,-0.370496,-1.870598,1.269812
c,-1.471416,1.032955,2.348551
d,,-0.522274,0.93363


## Boolean reductions

In [68]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [71]:
df2 = pd.DataFrame({"one": np.abs(np.random.randn(4,)), \
                    "two": np.abs(np.random.randn(4,)), \
                    "three": np.abs(np.random.randn(4,))})

In [72]:
df2

Unnamed: 0,one,two,three
0,0.227978,0.42871,0.329845
1,0.181527,0.780938,0.023819
2,0.510442,0.174465,0.188398
3,1.962497,0.919794,1.161257


In [73]:
(df2 > 0).all()

one      True
two      True
three    True
dtype: bool

In [74]:
df2 > 0

Unnamed: 0,one,two,three
0,True,True,True
1,True,True,True
2,True,True,True
3,True,True,True


In [75]:
df > 0

Unnamed: 0,one,two,three
a,False,True,False
b,False,False,True
c,False,True,True
d,False,False,True


In [76]:
(df > 0).any()

one      False
two       True
three     True
dtype: bool

In [77]:
(df > 0).any().any()

True

In [78]:
(df > 0).any().all()

False