In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20240430',periods=6)
dates

DatetimeIndex(['2024-04-30', '2024-05-01', '2024-05-02', '2024-05-03',
               '2024-05-04', '2024-05-05'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2024-04-30,-0.601366,-0.367457,-1.267363,-0.49002
2024-05-01,-1.537855,0.309635,-0.777803,0.53443
2024-05-02,-0.271131,0.071352,-0.284037,0.159849
2024-05-03,-0.149212,0.00086,0.499172,-0.681015
2024-05-04,2.290008,-1.07387,0.584623,-1.007528
2024-05-05,1.733432,-0.254478,0.602313,0.35629


In [5]:
df2 = pd.DataFrame(
    {
        'A':1,
        'B':pd.Timestamp('20240509'),
        'C':pd.Series(1,index=list(range(4)),dtype='float32'),
        'D':np.array([3]*4,dtype='int32'),
        'E':pd.Categorical(['test','train','test','train']),
        'F':'foo'
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2024-05-09,1.0,3,test,foo
1,1,2024-05-09,1.0,3,train,foo
2,1,2024-05-09,1.0,3,test,foo
3,1,2024-05-09,1.0,3,train,foo


In [6]:
df2.dtypes

A            int64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [7]:
df.head()

Unnamed: 0,A,B,C,D
2024-04-30,-0.601366,-0.367457,-1.267363,-0.49002
2024-05-01,-1.537855,0.309635,-0.777803,0.53443
2024-05-02,-0.271131,0.071352,-0.284037,0.159849
2024-05-03,-0.149212,0.00086,0.499172,-0.681015
2024-05-04,2.290008,-1.07387,0.584623,-1.007528


In [8]:
df.tail(3)

Unnamed: 0,A,B,C,D
2024-05-03,-0.149212,0.00086,0.499172,-0.681015
2024-05-04,2.290008,-1.07387,0.584623,-1.007528
2024-05-05,1.733432,-0.254478,0.602313,0.35629


In [9]:
df.index

DatetimeIndex(['2024-04-30', '2024-05-01', '2024-05-02', '2024-05-03',
               '2024-05-04', '2024-05-05'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df.values

array([[-6.01365544e-01, -3.67456895e-01, -1.26736275e+00,
        -4.90019762e-01],
       [-1.53785525e+00,  3.09635042e-01, -7.77802600e-01,
         5.34430271e-01],
       [-2.71131471e-01,  7.13518438e-02, -2.84036810e-01,
         1.59848906e-01],
       [-1.49211825e-01,  8.59667052e-04,  4.99171962e-01,
        -6.81014804e-01],
       [ 2.29000807e+00, -1.07387004e+00,  5.84623105e-01,
        -1.00752846e+00],
       [ 1.73343213e+00, -2.54477832e-01,  6.02313126e-01,
         3.56290081e-01]])

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.243979,-0.218993,-0.107182,-0.187999
std,1.463843,0.482949,0.797079,0.623709
min,-1.537855,-1.07387,-1.267363,-1.007528
25%,-0.518807,-0.339212,-0.654361,-0.633266
50%,-0.210172,-0.126809,0.107568,-0.165085
75%,1.262771,0.053729,0.56326,0.30718
max,2.290008,0.309635,0.602313,0.53443


In [12]:
df.T

Unnamed: 0,2024-04-30,2024-05-01,2024-05-02,2024-05-03,2024-05-04,2024-05-05
A,-0.601366,-1.537855,-0.271131,-0.149212,2.290008,1.733432
B,-0.367457,0.309635,0.071352,0.00086,-1.07387,-0.254478
C,-1.267363,-0.777803,-0.284037,0.499172,0.584623,0.602313
D,-0.49002,0.53443,0.159849,-0.681015,-1.007528,0.35629


In [13]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0,D,C,B,A
2024-04-30,-0.49002,-1.267363,-0.367457,-0.601366
2024-05-01,0.53443,-0.777803,0.309635,-1.537855
2024-05-02,0.159849,-0.284037,0.071352,-0.271131
2024-05-03,-0.681015,0.499172,0.00086,-0.149212
2024-05-04,-1.007528,0.584623,-1.07387,2.290008
2024-05-05,0.35629,0.602313,-0.254478,1.733432


In [14]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2024-05-04,2.290008,-1.07387,0.584623,-1.007528
2024-04-30,-0.601366,-0.367457,-1.267363,-0.49002
2024-05-05,1.733432,-0.254478,0.602313,0.35629
2024-05-03,-0.149212,0.00086,0.499172,-0.681015
2024-05-02,-0.271131,0.071352,-0.284037,0.159849
2024-05-01,-1.537855,0.309635,-0.777803,0.53443


In [15]:
df['A']

2024-04-30   -0.601366
2024-05-01   -1.537855
2024-05-02   -0.271131
2024-05-03   -0.149212
2024-05-04    2.290008
2024-05-05    1.733432
Freq: D, Name: A, dtype: float64

In [19]:
df[1:3]

Unnamed: 0,A,B,C,D
2024-05-01,-1.537855,0.309635,-0.777803,0.53443
2024-05-02,-0.271131,0.071352,-0.284037,0.159849


In [22]:
df['20240501':'20240505']

Unnamed: 0,A,B,C,D
2024-05-01,-1.537855,0.309635,-0.777803,0.53443
2024-05-02,-0.271131,0.071352,-0.284037,0.159849
2024-05-03,-0.149212,0.00086,0.499172,-0.681015
2024-05-04,2.290008,-1.07387,0.584623,-1.007528
2024-05-05,1.733432,-0.254478,0.602313,0.35629


In [23]:
df.loc[dates[0]]

A   -0.601366
B   -0.367457
C   -1.267363
D   -0.490020
Name: 2024-04-30 00:00:00, dtype: float64

In [25]:
df.loc[:,['A','B']]

Unnamed: 0,A,B
2024-04-30,-0.601366,-0.367457
2024-05-01,-1.537855,0.309635
2024-05-02,-0.271131,0.071352
2024-05-03,-0.149212,0.00086
2024-05-04,2.290008,-1.07387
2024-05-05,1.733432,-0.254478


In [26]:
df.loc['20240501':'20240505',['A','B']]

Unnamed: 0,A,B
2024-05-01,-1.537855,0.309635
2024-05-02,-0.271131,0.071352
2024-05-03,-0.149212,0.00086
2024-05-04,2.290008,-1.07387
2024-05-05,1.733432,-0.254478


In [27]:
df.loc['20240501',['A','B']]

A   -1.537855
B    0.309635
Name: 2024-05-01 00:00:00, dtype: float64

In [29]:
df.at[dates[0],'A']

-0.601365544208439