In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
s = pd.Series([1, 3, 5, np.nan, 7, 9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    7.0
5    9.0
dtype: float64

In [6]:
dates = pd.date_range("20220223", periods=6)
dates

DatetimeIndex(['2022-02-23', '2022-02-24', '2022-02-25', '2022-02-26',
               '2022-02-27', '2022-02-28'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [69]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20220101"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2022-01-01,1.0,3,test,foo
1,1.0,2022-01-01,1.0,3,train,foo
2,1.0,2022-01-01,1.0,3,test,foo
3,1.0,2022-01-01,1.0,3,train,foo


In [25]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [27]:
df.head()

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492


In [28]:
df.tail(3)

Unnamed: 0,A,B,C,D
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [29]:
df.index

DatetimeIndex(['2022-02-23', '2022-02-24', '2022-02-25', '2022-02-26',
               '2022-02-27', '2022-02-28'],
              dtype='datetime64[ns]', freq='D')

In [30]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [31]:
df.to_numpy()

array([[ 1.21640959, -0.58431356,  1.42323274,  1.73738825],
       [-0.75092467,  0.01353459, -0.89499484,  0.56595967],
       [ 1.47422334, -0.84877   , -0.36768789,  0.35705475],
       [-1.12528441, -1.85101079,  0.61112687, -1.02644113],
       [-0.31337775, -0.6479892 , -1.35388287,  0.93949166],
       [ 0.22456369, -0.80272267, -2.52301042, -0.39018592]])

In [32]:
df2.to_numpy()

array([[1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [33]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.120935,-0.786879,-0.517536,0.363878
std,1.052998,0.606208,1.409028,0.97574
min,-1.125284,-1.851011,-2.52301,-1.026441
25%,-0.641538,-0.837258,-1.239161,-0.203376
50%,-0.044407,-0.725356,-0.631341,0.461507
75%,0.968448,-0.600232,0.366423,0.846109
max,1.474223,0.013535,1.423233,1.737388


In [34]:
df.T

Unnamed: 0,2022-02-23,2022-02-24,2022-02-25,2022-02-26,2022-02-27,2022-02-28
A,1.21641,-0.750925,1.474223,-1.125284,-0.313378,0.224564
B,-0.584314,0.013535,-0.84877,-1.851011,-0.647989,-0.802723
C,1.423233,-0.894995,-0.367688,0.611127,-1.353883,-2.52301
D,1.737388,0.56596,0.357055,-1.026441,0.939492,-0.390186


In [35]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-02-23,1.737388,1.423233,-0.584314,1.21641
2022-02-24,0.56596,-0.894995,0.013535,-0.750925
2022-02-25,0.357055,-0.367688,-0.84877,1.474223
2022-02-26,-1.026441,0.611127,-1.851011,-1.125284
2022-02-27,0.939492,-1.353883,-0.647989,-0.313378
2022-02-28,-0.390186,-2.52301,-0.802723,0.224564


In [36]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596


In [37]:
df["A"]

2022-02-23    1.216410
2022-02-24   -0.750925
2022-02-25    1.474223
2022-02-26   -1.125284
2022-02-27   -0.313378
2022-02-28    0.224564
Freq: D, Name: A, dtype: float64

In [38]:
df[0:3]

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055


In [39]:
df["20220224":"20220228"]

Unnamed: 0,A,B,C,D
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [40]:
df.loc[dates[0]]

A    1.216410
B   -0.584314
C    1.423233
D    1.737388
Name: 2022-02-23 00:00:00, dtype: float64

In [42]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2022-02-23,1.21641,-0.584314
2022-02-24,-0.750925,0.013535
2022-02-25,1.474223,-0.84877
2022-02-26,-1.125284,-1.851011
2022-02-27,-0.313378,-0.647989
2022-02-28,0.224564,-0.802723


In [44]:
df.loc["20220225":"20220228", ["A", "B"]]

Unnamed: 0,A,B
2022-02-25,1.474223,-0.84877
2022-02-26,-1.125284,-1.851011
2022-02-27,-0.313378,-0.647989
2022-02-28,0.224564,-0.802723


In [45]:
df.loc["20220228", ["A", "B"]]

A    0.224564
B   -0.802723
Name: 2022-02-28 00:00:00, dtype: float64

In [46]:
df.loc[dates[0], "A"]

1.2164095916595234

In [47]:
df.at[dates[0], "A"]

1.2164095916595234

In [48]:
df.iloc[3]

A   -1.125284
B   -1.851011
C    0.611127
D   -1.026441
Name: 2022-02-26 00:00:00, dtype: float64

In [49]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2022-02-26,-1.125284,-1.851011
2022-02-27,-0.313378,-0.647989


In [50]:
df

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [51]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2022-02-24,-0.750925,-0.894995
2022-02-25,1.474223,-0.367688
2022-02-27,-0.313378,-1.353883


In [52]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055


In [53]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2022-02-23,-0.584314,1.423233
2022-02-24,0.013535,-0.894995
2022-02-25,-0.84877,-0.367688
2022-02-26,-1.851011,0.611127
2022-02-27,-0.647989,-1.353883
2022-02-28,-0.802723,-2.52301


In [54]:
df.iloc[1, 1]

0.013534589744097242

In [55]:
df.iat[1, 1]

0.013534589744097242

In [56]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [57]:
df[df > 0]

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,,1.423233,1.737388
2022-02-24,,0.013535,,0.56596
2022-02-25,1.474223,,,0.357055
2022-02-26,,,0.611127,
2022-02-27,,,,0.939492
2022-02-28,0.224564,,,


In [58]:
df2 = df.copy()

In [60]:
df2["E"] = ["one", "one", "two", "three", "four", "three"]
df2

Unnamed: 0,A,B,C,D,E
2022-02-23,1.21641,-0.584314,1.423233,1.737388,one
2022-02-24,-0.750925,0.013535,-0.894995,0.56596,one
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,two
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441,three
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,four
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186,three


In [61]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,two
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,four


In [63]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20220223", periods=6))
s1

2022-02-23    1
2022-02-24    2
2022-02-25    3
2022-02-26    4
2022-02-27    5
2022-02-28    6
Freq: D, dtype: int64

In [77]:
df["F"] = s1
df

Unnamed: 0,A,B,C,D,F
2022-02-23,0.0,-0.584314,1.423233,1.737388,1
2022-02-24,-0.750925,0.013535,-0.894995,0.56596,2
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,3
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441,4
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,5
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186,6


In [78]:
df.at[dates[0], "A"] = 0
df

Unnamed: 0,A,B,C,D,F
2022-02-23,0.0,-0.584314,1.423233,1.737388,1
2022-02-24,-0.750925,0.013535,-0.894995,0.56596,2
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,3
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441,4
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,5
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186,6


In [80]:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D,F
2022-02-23,0.0,0.0,1.423233,1.737388,1
2022-02-24,-0.750925,0.013535,-0.894995,0.56596,2
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,3
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441,4
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,5
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186,6


In [81]:
df.loc[:, "D"] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2022-02-23,0.0,0.0,1.423233,5,1
2022-02-24,-0.750925,0.013535,-0.894995,5,2
2022-02-25,1.474223,-0.84877,-0.367688,5,3
2022-02-26,-1.125284,-1.851011,0.611127,5,4
2022-02-27,-0.313378,-0.647989,-1.353883,5,5
2022-02-28,0.224564,-0.802723,-2.52301,5,6


In [83]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2022-02-23,0.0,0.0,-1.423233,-5,-1
2022-02-24,-0.750925,-0.013535,-0.894995,-5,-2
2022-02-25,-1.474223,-0.84877,-0.367688,-5,-3
2022-02-26,-1.125284,-1.851011,-0.611127,-5,-4
2022-02-27,-0.313378,-0.647989,-1.353883,-5,-5
2022-02-28,-0.224564,-0.802723,-2.52301,-5,-6


In [84]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ["E"])
df1.loc[dates[0] : dates[1], "E"] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2022-02-23,0.0,0.0,1.423233,5,1,1.0
2022-02-24,-0.750925,0.013535,-0.894995,5,2,1.0
2022-02-25,1.474223,-0.84877,-0.367688,5,3,
2022-02-26,-1.125284,-1.851011,0.611127,5,4,


In [85]:
df1.dropna(how="any")

Unnamed: 0,A,B,C,D,F,E
2022-02-23,0.0,0.0,1.423233,5,1,1.0
2022-02-24,-0.750925,0.013535,-0.894995,5,2,1.0


In [87]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2022-02-23,0.0,0.0,1.423233,5,1,1.0
2022-02-24,-0.750925,0.013535,-0.894995,5,2,1.0
2022-02-25,1.474223,-0.84877,-0.367688,5,3,5.0
2022-02-26,-1.125284,-1.851011,0.611127,5,4,5.0


In [88]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2022-02-23,False,False,False,False,False,False
2022-02-24,False,False,False,False,False,False
2022-02-25,False,False,False,False,False,True
2022-02-26,False,False,False,False,False,True


In [89]:
df.mean()

A   -0.081800
B   -0.689493
C   -0.517536
D    5.000000
F    3.500000
dtype: float64

In [90]:
df.mean(1)

2022-02-23    1.484647
2022-02-24    1.073523
2022-02-25    1.651553
2022-02-26    1.326966
2022-02-27    1.536950
2022-02-28    1.579766
Freq: D, dtype: float64

In [97]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
s

2022-02-23    NaN
2022-02-24    NaN
2022-02-25    1.0
2022-02-26    3.0
2022-02-27    5.0
2022-02-28    NaN
Freq: D, dtype: float64

In [98]:
df.sub(s, axis="index")

Unnamed: 0,A,B,C,D,F
2022-02-23,,,,,
2022-02-24,,,,,
2022-02-25,0.474223,-1.84877,-1.367688,4.0,2.0
2022-02-26,-4.125284,-4.851011,-2.388873,2.0,1.0
2022-02-27,-5.313378,-5.647989,-6.353883,0.0,0.0
2022-02-28,,,,,


In [99]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2022-02-23,0.0,0.0,1.423233,5,1
2022-02-24,-0.750925,0.013535,0.528238,10,3
2022-02-25,0.723299,-0.835235,0.16055,15,6
2022-02-26,-0.401986,-2.686246,0.771677,20,10
2022-02-27,-0.715363,-3.334235,-0.582206,25,15
2022-02-28,-0.4908,-4.136958,-3.105216,30,21


In [101]:
df.apply(lambda x: x.max() - x.min())

A    2.599508
B    1.864545
C    3.946243
D    0.000000
F    5.000000
dtype: float64

In [105]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    5
1    5
2    4
3    1
4    6
5    5
6    0
7    5
8    5
9    5
dtype: int32

In [106]:
s.value_counts()

5    6
4    1
1    1
6    1
0    1
dtype: int64

In [107]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [109]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,0.723211,0.482555,-0.566821,1.023446
1,0.133799,0.627141,-0.271139,0.567261
2,0.348491,-0.724072,-1.744044,-0.811076
3,-0.248251,1.131712,-0.38357,0.873038
4,-1.071361,0.172579,-0.631381,-1.276512
5,1.279017,0.656447,0.070497,-1.879534
6,-0.467673,-0.592992,0.98661,0.148461
7,-0.617012,2.06893,0.52754,0.114069
8,-0.420015,0.880216,0.586187,0.917847
9,0.718272,-0.627193,1.63872,0.560879


In [110]:
pieces = [df[:3], df[3:7], df[7:]]

In [111]:
pieces

[          0         1         2         3
 0  0.723211  0.482555 -0.566821  1.023446
 1  0.133799  0.627141 -0.271139  0.567261
 2  0.348491 -0.724072 -1.744044 -0.811076,
           0         1         2         3
 3 -0.248251  1.131712 -0.383570  0.873038
 4 -1.071361  0.172579 -0.631381 -1.276512
 5  1.279017  0.656447  0.070497 -1.879534
 6 -0.467673 -0.592992  0.986610  0.148461,
           0         1         2         3
 7 -0.617012  2.068930  0.527540  0.114069
 8 -0.420015  0.880216  0.586187  0.917847
 9  0.718272 -0.627193  1.638720  0.560879]

In [112]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.723211,0.482555,-0.566821,1.023446
1,0.133799,0.627141,-0.271139,0.567261
2,0.348491,-0.724072,-1.744044,-0.811076
3,-0.248251,1.131712,-0.38357,0.873038
4,-1.071361,0.172579,-0.631381,-1.276512
5,1.279017,0.656447,0.070497,-1.879534
6,-0.467673,-0.592992,0.98661,0.148461
7,-0.617012,2.06893,0.52754,0.114069
8,-0.420015,0.880216,0.586187,0.917847
9,0.718272,-0.627193,1.63872,0.560879


In [113]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})

In [114]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [115]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [116]:
pd.merge(left, right, on="key")

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [117]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
df

Unnamed: 0,A,B,C,D
0,foo,one,0.135461,0.312179
1,bar,one,0.537111,-1.297565
2,foo,two,1.038696,2.058621
3,bar,three,-0.503046,0.188416
4,foo,two,-0.722922,0.225139
5,bar,two,-1.45664,0.347218
6,foo,one,0.885266,-0.805854
7,foo,three,0.531064,-1.183341


In [120]:
df.groupby("A").sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-1.422576,-0.761931
foo,1.867565,0.606743


In [121]:
df.groupby(["A", "B"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.537111,-1.297565
bar,three,-0.503046,0.188416
bar,two,-1.45664,0.347218
foo,one,1.020728,-0.493676
foo,three,0.531064,-1.183341
foo,two,0.315773,2.283759


In [127]:
tuples = list(
    zip(
        *[
            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
            ["one", "two", "one", "two", "one", "two", "one", "two"],
        ]
    )
)

In [133]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.219261,1.014446
bar,two,0.41763,0.459968
baz,one,0.513018,1.168497
baz,two,-0.299681,-0.438081


In [134]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.219261,1.014446
bar,two,0.41763,0.459968
baz,one,0.513018,1.168497
baz,two,-0.299681,-0.438081
foo,one,0.251549,-1.848364
foo,two,-0.443371,-0.129616
qux,one,-1.026567,-1.393853
qux,two,-0.628313,-0.35246


In [136]:
stacked = df2.stack()
stacked

first  second   
bar    one     A   -0.219261
               B    1.014446
       two     A    0.417630
               B    0.459968
baz    one     A    0.513018
               B    1.168497
       two     A   -0.299681
               B   -0.438081
dtype: float64

In [137]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.219261,1.014446
bar,two,0.41763,0.459968
baz,one,0.513018,1.168497
baz,two,-0.299681,-0.438081


In [138]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.219261,0.41763
bar,B,1.014446,0.459968
baz,A,0.513018,-0.299681
baz,B,1.168497,-0.438081


In [139]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.219261,0.513018
one,B,1.014446,1.168497
two,A,0.41763,-0.299681
two,B,0.459968,-0.438081


In [140]:
df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),   
    }
)
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.327845,0.394399
1,one,B,foo,0.237457,-0.358715
2,two,C,foo,1.286432,-0.308848
3,three,A,bar,2.857433,-1.176876
4,one,B,bar,0.252496,0.504522
5,one,C,bar,-0.657143,-0.540318
6,two,A,foo,0.012389,-0.56695
7,three,B,foo,0.637402,-0.121269
8,one,C,foo,1.069567,0.390893
9,one,A,bar,1.054304,1.70643


In [141]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,1.054304,-0.327845
one,B,0.252496,0.237457
one,C,-0.657143,1.069567
three,A,2.857433,
three,B,,0.637402
three,C,-0.095211,
two,A,,0.012389
two,B,-0.177413,
two,C,,1.286432


In [147]:
rng = pd.date_range("1/1/2012", periods=100, freq="S")
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.resample("5Min").sum()

2012-01-01    25232
Freq: 5T, dtype: int32

In [149]:
ts

2012-01-01 00:00:00    496
2012-01-01 00:00:01    155
2012-01-01 00:00:02    214
2012-01-01 00:00:03    172
2012-01-01 00:00:04    418
                      ... 
2012-01-01 00:01:35    352
2012-01-01 00:01:36    127
2012-01-01 00:01:37     94
2012-01-01 00:01:38     64
2012-01-01 00:01:39    319
Freq: S, Length: 100, dtype: int32

In [150]:
rng = pd.date_range("3/6/2012 00:00", periods=5, freq="D")
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2012-03-06   -1.572910
2012-03-07   -0.650926
2012-03-08   -1.207607
2012-03-09    0.433308
2012-03-10    1.404296
Freq: D, dtype: float64

In [151]:
ts_utc = ts.tz_localize("UTC")
ts_utc

2012-03-06 00:00:00+00:00   -1.572910
2012-03-07 00:00:00+00:00   -0.650926
2012-03-08 00:00:00+00:00   -1.207607
2012-03-09 00:00:00+00:00    0.433308
2012-03-10 00:00:00+00:00    1.404296
Freq: D, dtype: float64

In [152]:
ts_utc.tz_convert("US/Eastern")

2012-03-05 19:00:00-05:00   -1.572910
2012-03-06 19:00:00-05:00   -0.650926
2012-03-07 19:00:00-05:00   -1.207607
2012-03-08 19:00:00-05:00    0.433308
2012-03-09 19:00:00-05:00    1.404296
Freq: D, dtype: float64

In [156]:
rng = pd.date_range("1/1/2012", periods=5, freq="M")
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-01-31   -0.311597
2012-02-29   -0.317247
2012-03-31    1.063979
2012-04-30    0.216261
2012-05-31    0.687834
Freq: M, dtype: float64

In [158]:
ps = ts.to_period()
ps

2012-01   -0.311597
2012-02   -0.317247
2012-03    1.063979
2012-04    0.216261
2012-05    0.687834
Freq: M, dtype: float64

In [159]:
ps.to_timestamp()

2012-01-01   -0.311597
2012-02-01   -0.317247
2012-03-01    1.063979
2012-04-01    0.216261
2012-05-01    0.687834
Freq: MS, dtype: float64

In [153]:
df = pd.DataFrame(
        {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)

In [160]:
prng = pd.period_range("1990Q1", "2000Q4", freq="Q-NOV")
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq("M", "e") + 1).asfreq("H", "S")+ 9
ts.head()

1990-03-01 09:00    1.356882
1990-06-01 09:00    0.852872
1990-09-01 09:00    0.699896
1990-12-01 09:00   -1.484208
1991-03-01 09:00    0.636760
Freq: H, dtype: float64

In [163]:
df = pd.DataFrame(
    {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)
df

Unnamed: 0,id,raw_grade
0,1,a
1,2,b
2,3,b
3,4,a
4,5,a
5,6,e


In [164]:
df["grade"] = df["raw_grade"].astype("category")
df["grade"]

0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): ['a', 'b', 'e']

In [165]:
df["grade"].cat.categories = ["very good", "good", "very bad"]

In [166]:
df["grade"]

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (3, object): ['very good', 'good', 'very bad']

In [170]:
df["grade"] = df["grade"].cat.set_categories(
    ["very bad", "bad", "medium", "good", "very good"]
)

In [171]:
df.sort_values(by="grade")

Unnamed: 0,id,raw_grade,grade
5,6,e,very bad
1,2,b,good
2,3,b,good
0,1,a,very good
3,4,a,very good
4,5,a,very good


In [172]:
df.groupby("grade").size()

grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64