# Pandas

- How to install Libraries

In [9]:
# pip install pandas
# pip install numpy

- Import Libraries

In [10]:
import pandas as pd
import numpy as np

In [11]:
# Object Creation

s=pd.Series([1,2,3,4,5,np.nan])
s

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    NaN
dtype: float64

In [12]:
dates=pd.date_range("20230203",periods=6)
dates

DatetimeIndex(['2023-02-03', '2023-02-04', '2023-02-05', '2023-02-06',
               '2023-02-07', '2023-02-08'],
              dtype='datetime64[ns]', freq='D')

In [13]:
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-04,0.13068,-0.246637,-0.441246,2.068742
2023-02-05,0.585764,1.575855,-0.271244,1.068365
2023-02-06,-1.573761,-0.098175,2.268986,-0.95116
2023-02-07,0.377017,1.962754,-0.554459,1.577666
2023-02-08,0.207868,-1.572337,0.152318,-0.744113


In [14]:
df2 = pd.DataFrame(
    {

      "A": 1.0,
      "B": pd.Timestamp("20230101"),
      "C": pd.Series(1, index=list(range(4)),dtype="float32"),
      "D": np.array([3] * 4, dtype="int32"),
      "E":pd.Categorical(["girl","woman","test","train"]),
      "F":"females",
    }
)
df2


Unnamed: 0,A,B,C,D,E,F
0,1.0,2023-01-01,1.0,3,girl,females
1,1.0,2023-01-01,1.0,3,woman,females
2,1.0,2023-01-01,1.0,3,test,females
3,1.0,2023-01-01,1.0,3,train,females


In [15]:
df2.dtypes    # data types

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [16]:
df2.head()    # head of data

Unnamed: 0,A,B,C,D,E,F
0,1.0,2023-01-01,1.0,3,girl,females
1,1.0,2023-01-01,1.0,3,woman,females
2,1.0,2023-01-01,1.0,3,test,females
3,1.0,2023-01-01,1.0,3,train,females


In [17]:
df2.tail()      # tail of data

Unnamed: 0,A,B,C,D,E,F
0,1.0,2023-01-01,1.0,3,girl,females
1,1.0,2023-01-01,1.0,3,woman,females
2,1.0,2023-01-01,1.0,3,test,females
3,1.0,2023-01-01,1.0,3,train,females


In [18]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [19]:
df2.to_numpy()      # change into array(numpy)

array([[1.0, Timestamp('2023-01-01 00:00:00'), 1.0, 3, 'girl', 'females'],
       [1.0, Timestamp('2023-01-01 00:00:00'), 1.0, 3, 'woman',
        'females'],
       [1.0, Timestamp('2023-01-01 00:00:00'), 1.0, 3, 'test', 'females'],
       [1.0, Timestamp('2023-01-01 00:00:00'), 1.0, 3, 'train',
        'females']], dtype=object)

In [20]:
df2.describe()    # describe the whole data

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


In [21]:
df2.T     # change rows into coloms

Unnamed: 0,0,1,2,3
A,1.0,1.0,1.0,1.0
B,2023-01-01 00:00:00,2023-01-01 00:00:00,2023-01-01 00:00:00,2023-01-01 00:00:00
C,1.0,1.0,1.0,1.0
D,3,3,3,3
E,girl,woman,test,train
F,females,females,females,females


In [22]:
df.sort_index(axis=0,ascending=True)

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-04,0.13068,-0.246637,-0.441246,2.068742
2023-02-05,0.585764,1.575855,-0.271244,1.068365
2023-02-06,-1.573761,-0.098175,2.268986,-0.95116
2023-02-07,0.377017,1.962754,-0.554459,1.577666
2023-02-08,0.207868,-1.572337,0.152318,-0.744113


In [23]:
df.sort_values(by="B",ascending=False)

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-07,0.377017,1.962754,-0.554459,1.577666
2023-02-05,0.585764,1.575855,-0.271244,1.068365
2023-02-06,-1.573761,-0.098175,2.268986,-0.95116
2023-02-04,0.13068,-0.246637,-0.441246,2.068742
2023-02-08,0.207868,-1.572337,0.152318,-0.744113


In [24]:
df2["A"]   # only select one coloumn

0    1.0
1    1.0
2    1.0
3    1.0
Name: A, dtype: float64

In [25]:
df2[0:4]    # select data row wise

Unnamed: 0,A,B,C,D,E,F
0,1.0,2023-01-01,1.0,3,girl,females
1,1.0,2023-01-01,1.0,3,woman,females
2,1.0,2023-01-01,1.0,3,test,females
3,1.0,2023-01-01,1.0,3,train,females


In [26]:
df.loc[:,["A","B"]]      # only get A and B coloumn

Unnamed: 0,A,B
2023-02-03,0.220676,1.970725
2023-02-04,0.13068,-0.246637
2023-02-05,0.585764,1.575855
2023-02-06,-1.573761,-0.098175
2023-02-07,0.377017,1.962754
2023-02-08,0.207868,-1.572337


In [27]:
df.loc["20230203":"20230205",["A","B"]]     # between 2 dates we get data

Unnamed: 0,A,B
2023-02-03,0.220676,1.970725
2023-02-04,0.13068,-0.246637
2023-02-05,0.585764,1.575855


In [28]:
df.loc["20230203",["A","B","C"]]   # only get 1D data

A    0.220676
B    1.970725
C    0.559851
Name: 2023-02-03 00:00:00, dtype: float64

In [29]:
df.at[dates[0], "A"]     # only get 0 index date of A coloumn

0.22067570706120215

In [30]:
df.iloc[3]     # use to get index 3 data of all rows

A   -1.573761
B   -0.098175
C    2.268986
D   -0.951160
Name: 2023-02-06 00:00:00, dtype: float64

In [31]:
df.iloc[0:3,0:3]     # 0-3 rows and 0-3 coloumns

Unnamed: 0,A,B,C
2023-02-03,0.220676,1.970725,0.559851
2023-02-04,0.13068,-0.246637,-0.441246
2023-02-05,0.585764,1.575855,-0.271244


In [32]:
df.iloc[0:3,:]     # selected rows and all columns

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-04,0.13068,-0.246637,-0.441246,2.068742
2023-02-05,0.585764,1.575855,-0.271244,1.068365


In [33]:
df[df[:]>0]

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-04,0.13068,,,2.068742
2023-02-05,0.585764,1.575855,,1.068365
2023-02-06,,,2.268986,
2023-02-07,0.377017,1.962754,,1.577666
2023-02-08,0.207868,,0.152318,


In [34]:
df[df["A"]>0]

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-04,0.13068,-0.246637,-0.441246,2.068742
2023-02-05,0.585764,1.575855,-0.271244,1.068365
2023-02-07,0.377017,1.962754,-0.554459,1.577666
2023-02-08,0.207868,-1.572337,0.152318,-0.744113


In [35]:
df

Unnamed: 0,A,B,C,D
2023-02-03,0.220676,1.970725,0.559851,0.896152
2023-02-04,0.13068,-0.246637,-0.441246,2.068742
2023-02-05,0.585764,1.575855,-0.271244,1.068365
2023-02-06,-1.573761,-0.098175,2.268986,-0.95116
2023-02-07,0.377017,1.962754,-0.554459,1.577666
2023-02-08,0.207868,-1.572337,0.152318,-0.744113


In [41]:
df2["G"]=["one","two","two","two"]    # add new coloumn
df2

Unnamed: 0,A,B,C,D,E,F,G
0,1.0,2023-01-01,1.0,3,girl,females,one
1,1.0,2023-01-01,1.0,3,woman,females,two
2,1.0,2023-01-01,1.0,3,test,females,two
3,1.0,2023-01-01,1.0,3,train,females,two


# THE END