### Perform data manipulation using pandas

In [1]:
# import library
import pandas as pd

In [2]:
# find the version of pandas
pd.__version__


'2.2.3'

In [3]:
# create dataframe

df= pd.DataFrame({
    "name" : ["Tooba" , "Yusra" , "Esha" , "Aisha"] ,
    "age" : [20 , 18 , 21 , 12]
})

In [4]:
df

Unnamed: 0,name,age
0,Tooba,20
1,Yusra,18
2,Esha,21
3,Aisha,12


In [23]:
df.shape

(4, 2)

In [24]:
df.describe()

Unnamed: 0,AGE
count,4.0
mean,17.75
std,4.031129
min,12.0
25%,16.5
50%,19.0
75%,20.25
max,21.0


In [25]:
df.T

Unnamed: 0,0,1,2,3
NAME,Tooba,Yusra,Esha,Aisha
AGE,20.0,18.0,21.0,12.0


In [26]:
df.columns

Index(['NAME', 'AGE'], dtype='object')

In [27]:
df.dtypes

NAME     object
AGE     float64
dtype: object

In [5]:
# Rename the column name

df.rename(columns={"name" : "NAME" , "age" : "AGE"}, inplace=True)

In [6]:
# Add prefix of  column name 

df.add_prefix("Stu_")

Unnamed: 0,Stu_NAME,Stu_AGE
0,Tooba,20
1,Yusra,18
2,Esha,21
3,Aisha,12


In [7]:
# reverse the row order
df.loc[::-1]

Unnamed: 0,NAME,AGE
3,Aisha,12
2,Esha,21
1,Yusra,18
0,Tooba,20


In [8]:
df.loc[::-1].reset_index(drop=True)

Unnamed: 0,NAME,AGE
0,Aisha,12
1,Esha,21
2,Yusra,18
3,Tooba,20


In [9]:
# reverse column order
df.loc[: , ::-1].reset_index(drop=True)

Unnamed: 0,AGE,NAME
0,20,Tooba
1,18,Yusra
2,21,Esha
3,12,Aisha


In [10]:
# Select column by data type
df.select_dtypes(include=["int"])

Unnamed: 0,AGE
0,20
1,18
2,21
3,12


In [11]:
df.select_dtypes(exclude=["int"])

Unnamed: 0,NAME
0,Tooba
1,Yusra
2,Esha
3,Aisha


In [12]:
# Type casting

df["AGE"]= df["AGE"].astype(float)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   NAME    4 non-null      object 
 1   AGE     4 non-null      float64
dtypes: float64(1), object(1)
memory usage: 192.0+ bytes


In [14]:
# Spiting of data 
df_1 = df.sample(frac=0.5 , random_state=1)
df_1

Unnamed: 0,NAME,AGE
3,Aisha,12.0
2,Esha,21.0


In [15]:
df_2 = df.drop(df_1.index)
df_2

Unnamed: 0,NAME,AGE
0,Tooba,20.0
1,Yusra,18.0


In [16]:
# Concatenate
com = pd.concat([df_2 , df_1])


In [17]:
com

Unnamed: 0,NAME,AGE
0,Tooba,20.0
1,Yusra,18.0
3,Aisha,12.0
2,Esha,21.0


In [18]:
# Filteration of data
df.AGE.unique()

array([20., 18., 21., 12.])

In [19]:
df.NAME.unique()

array(['Tooba', 'Yusra', 'Esha', 'Aisha'], dtype=object)

In [20]:
df[(df["AGE"] <= 20) & (df["NAME"] == "Tooba") | (df["NAME"] == "Aisha")] 

Unnamed: 0,NAME,AGE
0,Tooba,20.0
3,Aisha,12.0


In [21]:
df.groupby(["AGE"]).count()

Unnamed: 0_level_0,NAME
AGE,Unnamed: 1_level_1
12.0,1
18.0,1
20.0,1
21.0,1


In [22]:
df.groupby(["AGE" , "NAME"]).size()

AGE   NAME 
12.0  Aisha    1
18.0  Yusra    1
20.0  Tooba    1
21.0  Esha     1
dtype: int64