In [1]:
import numpy as np
import pandas as pd

In [2]:
# create series by a list values

s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
#create dataframe by numpy array

dates = pd.date_range("20220913",periods=6)

dates

DatetimeIndex(['2022-09-13', '2022-09-14', '2022-09-15', '2022-09-16',
               '2022-09-17', '2022-09-18'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list("ABCD"))

In [6]:
df

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,-0.013707,-0.419336
2022-09-14,-0.240254,-1.437997,-0.68061,2.238446
2022-09-15,0.215563,-1.301001,0.20478,-1.156588
2022-09-16,0.406662,-1.205261,0.493098,1.390879
2022-09-17,1.188936,0.454451,0.322318,-2.685991
2022-09-18,-0.546333,1.686757,-0.018351,-1.734101


In [7]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [8]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [9]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [10]:
df.head()

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,-0.013707,-0.419336
2022-09-14,-0.240254,-1.437997,-0.68061,2.238446
2022-09-15,0.215563,-1.301001,0.20478,-1.156588
2022-09-16,0.406662,-1.205261,0.493098,1.390879
2022-09-17,1.188936,0.454451,0.322318,-2.685991


In [11]:
df.tail(2)

Unnamed: 0,A,B,C,D
2022-09-17,1.188936,0.454451,0.322318,-2.685991
2022-09-18,-0.546333,1.686757,-0.018351,-1.734101


In [12]:
df.index

DatetimeIndex(['2022-09-13', '2022-09-14', '2022-09-15', '2022-09-16',
               '2022-09-17', '2022-09-18'],
              dtype='datetime64[ns]', freq='D')

In [13]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.179754,-0.189224,0.051255,-0.394449
std,0.59844,1.303516,0.409136,1.883977
min,-0.546333,-1.437997,-0.68061,-2.685991
25%,-0.166703,-1.277066,-0.01719,-1.589723
50%,0.134756,-0.375405,0.095536,-0.787962
75%,0.358887,0.614394,0.292934,0.938325
max,1.188936,1.686757,0.493098,2.238446


In [15]:
df.T

Unnamed: 0,2022-09-13,2022-09-14,2022-09-15,2022-09-16,2022-09-17,2022-09-18
A,0.053948,-0.240254,0.215563,0.406662,1.188936,-0.546333
B,0.667708,-1.437997,-1.301001,-1.205261,0.454451,1.686757
C,-0.013707,-0.68061,0.20478,0.493098,0.322318,-0.018351
D,-0.419336,2.238446,-1.156588,1.390879,-2.685991,-1.734101


In [16]:
df

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,-0.013707,-0.419336
2022-09-14,-0.240254,-1.437997,-0.68061,2.238446
2022-09-15,0.215563,-1.301001,0.20478,-1.156588
2022-09-16,0.406662,-1.205261,0.493098,1.390879
2022-09-17,1.188936,0.454451,0.322318,-2.685991
2022-09-18,-0.546333,1.686757,-0.018351,-1.734101


In [17]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0,D,C,B,A
2022-09-13,-0.419336,-0.013707,0.667708,0.053948
2022-09-14,2.238446,-0.68061,-1.437997,-0.240254
2022-09-15,-1.156588,0.20478,-1.301001,0.215563
2022-09-16,1.390879,0.493098,-1.205261,0.406662
2022-09-17,-2.685991,0.322318,0.454451,1.188936
2022-09-18,-1.734101,-0.018351,1.686757,-0.546333


In [18]:
df["A"]

2022-09-13    0.053948
2022-09-14   -0.240254
2022-09-15    0.215563
2022-09-16    0.406662
2022-09-17    1.188936
2022-09-18   -0.546333
Freq: D, Name: A, dtype: float64

In [19]:
df[0:3]

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,-0.013707,-0.419336
2022-09-14,-0.240254,-1.437997,-0.68061,2.238446
2022-09-15,0.215563,-1.301001,0.20478,-1.156588


In [20]:
df.loc[dates[0]]

A    0.053948
B    0.667708
C   -0.013707
D   -0.419336
Name: 2022-09-13 00:00:00, dtype: float64

In [21]:
df.loc[:,['A','B']]

Unnamed: 0,A,B
2022-09-13,0.053948,0.667708
2022-09-14,-0.240254,-1.437997
2022-09-15,0.215563,-1.301001
2022-09-16,0.406662,-1.205261
2022-09-17,1.188936,0.454451
2022-09-18,-0.546333,1.686757


In [23]:
df.loc["20220913":"20220914", ["A", "B"]]

Unnamed: 0,A,B
2022-09-13,0.053948,0.667708
2022-09-14,-0.240254,-1.437997


In [None]:
#Selecting by position

In [24]:
df.iloc[3]

A    0.406662
B   -1.205261
C    0.493098
D    1.390879
Name: 2022-09-16 00:00:00, dtype: float64

In [25]:
df.iloc[3:5,0:2]

Unnamed: 0,A,B
2022-09-16,0.406662,-1.205261
2022-09-17,1.188936,0.454451


In [26]:
df.iloc[[1,4,5],[0,3]]

Unnamed: 0,A,D
2022-09-14,-0.240254,2.238446
2022-09-17,1.188936,-2.685991
2022-09-18,-0.546333,-1.734101


In [27]:
# Boolean Indexing

In [29]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,-0.013707,-0.419336
2022-09-15,0.215563,-1.301001,0.20478,-1.156588
2022-09-16,0.406662,-1.205261,0.493098,1.390879
2022-09-17,1.188936,0.454451,0.322318,-2.685991


In [37]:
df[df>0]

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,,
2022-09-14,,,,2.238446
2022-09-15,0.215563,,0.20478,
2022-09-16,0.406662,,0.493098,1.390879
2022-09-17,1.188936,0.454451,0.322318,
2022-09-18,,1.686757,,


In [31]:
df3 = df.copy()

In [32]:
df3['E'] = ['one','one','two','three','four','three']


In [33]:
df3

Unnamed: 0,A,B,C,D,E
2022-09-13,0.053948,0.667708,-0.013707,-0.419336,one
2022-09-14,-0.240254,-1.437997,-0.68061,2.238446,one
2022-09-15,0.215563,-1.301001,0.20478,-1.156588,two
2022-09-16,0.406662,-1.205261,0.493098,1.390879,three
2022-09-17,1.188936,0.454451,0.322318,-2.685991,four
2022-09-18,-0.546333,1.686757,-0.018351,-1.734101,three


In [34]:
df3[df3['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2022-09-15,0.215563,-1.301001,0.20478,-1.156588,two
2022-09-17,1.188936,0.454451,0.322318,-2.685991,four


In [36]:
df

Unnamed: 0,A,B,C,D
2022-09-13,0.053948,0.667708,-0.013707,-0.419336
2022-09-14,-0.240254,-1.437997,-0.68061,2.238446
2022-09-15,0.215563,-1.301001,0.20478,-1.156588
2022-09-16,0.406662,-1.205261,0.493098,1.390879
2022-09-17,1.188936,0.454451,0.322318,-2.685991
2022-09-18,-0.546333,1.686757,-0.018351,-1.734101


In [38]:
df.mean()

A    0.179754
B   -0.189224
C    0.051255
D   -0.394449
dtype: float64

In [39]:
df.mean(1)

2022-09-13    0.072153
2022-09-14   -0.030104
2022-09-15   -0.509312
2022-09-16    0.271344
2022-09-17   -0.180071
2022-09-18   -0.153007
Freq: D, dtype: float64