In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
s = pd.Series([1, 3, 5, np.nan, 7, 9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    7.0
5    9.0
dtype: float64

In [6]:
dates = pd.date_range("20220223", periods=6)
dates

DatetimeIndex(['2022-02-23', '2022-02-24', '2022-02-25', '2022-02-26',
               '2022-02-27', '2022-02-28'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [24]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20220101"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2022-01-01,1.0,3,test,foo
1,1.0,2022-01-01,1.0,3,train,foo
2,1.0,2022-01-01,1.0,3,test,foo
3,1.0,2022-01-01,1.0,3,train,foo


In [25]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [27]:
df.head()

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492


In [28]:
df.tail(3)

Unnamed: 0,A,B,C,D
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [29]:
df.index

DatetimeIndex(['2022-02-23', '2022-02-24', '2022-02-25', '2022-02-26',
               '2022-02-27', '2022-02-28'],
              dtype='datetime64[ns]', freq='D')

In [30]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [31]:
df.to_numpy()

array([[ 1.21640959, -0.58431356,  1.42323274,  1.73738825],
       [-0.75092467,  0.01353459, -0.89499484,  0.56595967],
       [ 1.47422334, -0.84877   , -0.36768789,  0.35705475],
       [-1.12528441, -1.85101079,  0.61112687, -1.02644113],
       [-0.31337775, -0.6479892 , -1.35388287,  0.93949166],
       [ 0.22456369, -0.80272267, -2.52301042, -0.39018592]])

In [32]:
df2.to_numpy()

array([[1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2022-01-01 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [33]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.120935,-0.786879,-0.517536,0.363878
std,1.052998,0.606208,1.409028,0.97574
min,-1.125284,-1.851011,-2.52301,-1.026441
25%,-0.641538,-0.837258,-1.239161,-0.203376
50%,-0.044407,-0.725356,-0.631341,0.461507
75%,0.968448,-0.600232,0.366423,0.846109
max,1.474223,0.013535,1.423233,1.737388


In [34]:
df.T

Unnamed: 0,2022-02-23,2022-02-24,2022-02-25,2022-02-26,2022-02-27,2022-02-28
A,1.21641,-0.750925,1.474223,-1.125284,-0.313378,0.224564
B,-0.584314,0.013535,-0.84877,-1.851011,-0.647989,-0.802723
C,1.423233,-0.894995,-0.367688,0.611127,-1.353883,-2.52301
D,1.737388,0.56596,0.357055,-1.026441,0.939492,-0.390186


In [35]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-02-23,1.737388,1.423233,-0.584314,1.21641
2022-02-24,0.56596,-0.894995,0.013535,-0.750925
2022-02-25,0.357055,-0.367688,-0.84877,1.474223
2022-02-26,-1.026441,0.611127,-1.851011,-1.125284
2022-02-27,0.939492,-1.353883,-0.647989,-0.313378
2022-02-28,-0.390186,-2.52301,-0.802723,0.224564


In [36]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596


In [37]:
df["A"]

2022-02-23    1.216410
2022-02-24   -0.750925
2022-02-25    1.474223
2022-02-26   -1.125284
2022-02-27   -0.313378
2022-02-28    0.224564
Freq: D, Name: A, dtype: float64

In [38]:
df[0:3]

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055


In [39]:
df["20220224":"20220228"]

Unnamed: 0,A,B,C,D
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [40]:
df.loc[dates[0]]

A    1.216410
B   -0.584314
C    1.423233
D    1.737388
Name: 2022-02-23 00:00:00, dtype: float64

In [42]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2022-02-23,1.21641,-0.584314
2022-02-24,-0.750925,0.013535
2022-02-25,1.474223,-0.84877
2022-02-26,-1.125284,-1.851011
2022-02-27,-0.313378,-0.647989
2022-02-28,0.224564,-0.802723


In [44]:
df.loc["20220225":"20220228", ["A", "B"]]

Unnamed: 0,A,B
2022-02-25,1.474223,-0.84877
2022-02-26,-1.125284,-1.851011
2022-02-27,-0.313378,-0.647989
2022-02-28,0.224564,-0.802723


In [45]:
df.loc["20220228", ["A", "B"]]

A    0.224564
B   -0.802723
Name: 2022-02-28 00:00:00, dtype: float64

In [46]:
df.loc[dates[0], "A"]

1.2164095916595234

In [47]:
df.at[dates[0], "A"]

1.2164095916595234

In [48]:
df.iloc[3]

A   -1.125284
B   -1.851011
C    0.611127
D   -1.026441
Name: 2022-02-26 00:00:00, dtype: float64

In [49]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2022-02-26,-1.125284,-1.851011
2022-02-27,-0.313378,-0.647989


In [50]:
df

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [51]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2022-02-24,-0.750925,-0.894995
2022-02-25,1.474223,-0.367688
2022-02-27,-0.313378,-1.353883


In [52]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2022-02-24,-0.750925,0.013535,-0.894995,0.56596
2022-02-25,1.474223,-0.84877,-0.367688,0.357055


In [53]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2022-02-23,-0.584314,1.423233
2022-02-24,0.013535,-0.894995
2022-02-25,-0.84877,-0.367688
2022-02-26,-1.851011,0.611127
2022-02-27,-0.647989,-1.353883
2022-02-28,-0.802723,-2.52301


In [54]:
df.iloc[1, 1]

0.013534589744097242

In [55]:
df.iat[1, 1]

0.013534589744097242

In [56]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,-0.584314,1.423233,1.737388
2022-02-25,1.474223,-0.84877,-0.367688,0.357055
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186


In [57]:
df[df > 0]

Unnamed: 0,A,B,C,D
2022-02-23,1.21641,,1.423233,1.737388
2022-02-24,,0.013535,,0.56596
2022-02-25,1.474223,,,0.357055
2022-02-26,,,0.611127,
2022-02-27,,,,0.939492
2022-02-28,0.224564,,,


In [58]:
df2 = df.copy()

In [60]:
df2["E"] = ["one", "one", "two", "three", "four", "three"]
df2

Unnamed: 0,A,B,C,D,E
2022-02-23,1.21641,-0.584314,1.423233,1.737388,one
2022-02-24,-0.750925,0.013535,-0.894995,0.56596,one
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,two
2022-02-26,-1.125284,-1.851011,0.611127,-1.026441,three
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,four
2022-02-28,0.224564,-0.802723,-2.52301,-0.390186,three


In [61]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2022-02-25,1.474223,-0.84877,-0.367688,0.357055,two
2022-02-27,-0.313378,-0.647989,-1.353883,0.939492,four
