In [1]:
import pandas as pd
import numpy as np

In [2]:
marks = pd.DataFrame({
    "Name": ["Jack", "James", "Rocky"],
    "Physics": [88,34,91],
    "Math": [29,38,54],
    "Chem": [89, 76, 96]
})

In [3]:
marks

Unnamed: 0,Name,Physics,Math,Chem
0,Jack,88,29,89
1,James,34,38,76
2,Rocky,91,54,96


In [4]:
marks.index # you can also change index

RangeIndex(start=0, stop=3, step=1)

In [5]:
marks.iloc[0] # access row wise postion -> returns a series

Name       Jack
Physics      88
Math         29
Chem         89
Name: 0, dtype: object

In [6]:
marks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Name     3 non-null      object
 1   Physics  3 non-null      int64 
 2   Math     3 non-null      int64 
 3   Chem     3 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 224.0+ bytes


In [7]:
marks.size

12

In [8]:
marks.shape

(3, 4)

In [9]:
marks.describe() # give some statistics info

Unnamed: 0,Physics,Math,Chem
count,3.0,3.0,3.0
mean,71.0,40.333333,87.0
std,32.07803,12.66228,10.148892
min,34.0,29.0,76.0
25%,61.0,33.5,82.5
50%,88.0,38.0,89.0
75%,89.5,46.0,92.5
max,91.0,54.0,96.0


## Indexing

In [10]:
marks

Unnamed: 0,Name,Physics,Math,Chem
0,Jack,88,29,89
1,James,34,38,76
2,Rocky,91,54,96


In [11]:
marks.iloc[-1] # last row 

Name       Rocky
Physics       91
Math          54
Chem          96
Name: 2, dtype: object

In [12]:
marks["Chem"] # for col info

0    89
1    76
2    96
Name: Chem, dtype: int64

In [13]:
marks[["Math", "Chem"]] # multiple indexing

Unnamed: 0,Math,Chem
0,29,89
1,38,76
2,54,96


## Slicing

In [14]:
marks

Unnamed: 0,Name,Physics,Math,Chem
0,Jack,88,29,89
1,James,34,38,76
2,Rocky,91,54,96


In [15]:
marks.iloc[0:2]

Unnamed: 0,Name,Physics,Math,Chem
0,Jack,88,29,89
1,James,34,38,76


In [16]:
marks.loc[0:2, "Physics"] # only physics col for 0 and 1 index row

0    88
1    34
2    91
Name: Physics, dtype: int64

In [17]:
marks["Chem"] > 80

0     True
1    False
2     True
Name: Chem, dtype: bool

In [18]:
marks[marks["Chem"] > 80] # boolean conditionals

Unnamed: 0,Name,Physics,Math,Chem
0,Jack,88,29,89
2,Rocky,91,54,96


In [19]:
marks.drop(columns="Chem") # drop col 'chem'

Unnamed: 0,Name,Physics,Math
0,Jack,88,29
1,James,34,38
2,Rocky,91,54


In [20]:
marks.drop(index=0) # drop index -> 0 or 1st row

Unnamed: 0,Name,Physics,Math,Chem
1,James,34,38,76
2,Rocky,91,54,96


## Operations

In [21]:
marks

Unnamed: 0,Name,Physics,Math,Chem
0,Jack,88,29,89
1,James,34,38,76
2,Rocky,91,54,96


In [22]:
marks["Chem"] + 2 # but it not manupulate actual marks. For thant need to use = 

0    91
1    78
2    98
Name: Chem, dtype: int64

In [23]:
marks["Total"] = marks["Physics"] + marks["Math"] + marks["Chem"]

In [24]:
marks

Unnamed: 0,Name,Physics,Math,Chem,Total
0,Jack,88,29,89,206
1,James,34,38,76,148
2,Rocky,91,54,96,241


### Rename

In [25]:
marks = marks.rename(columns= {
    "Total": "Total Marks"
},
index = {
    0: "Student 1"
}
)
# now actual marks is changed

In [26]:
marks

Unnamed: 0,Name,Physics,Math,Chem,Total Marks
Student 1,Jack,88,29,89,206
1,James,34,38,76,148
2,Rocky,91,54,96,241


In [27]:
marks.columns = ["Student", "Phy","Math", "Chem", "Total Marks"] # also possible

In [28]:
marks

Unnamed: 0,Student,Phy,Math,Chem,Total Marks
Student 1,Jack,88,29,89,206
1,James,34,38,76,148
2,Rocky,91,54,96,241


In [29]:
marks.index = ["Jack", "James", "Rocky"]
marks = marks.drop(columns="Student")

In [30]:
marks

Unnamed: 0,Phy,Math,Chem,Total Marks
Jack,88,29,89,206
James,34,38,76,148
Rocky,91,54,96,241


## Change a particular element

In [31]:
marks.loc["Jack", "Chem"] = 90

In [32]:
marks

Unnamed: 0,Phy,Math,Chem,Total Marks
Jack,88,29,90,206
James,34,38,76,148
Rocky,91,54,96,241


## Group By (Experiment later)

In [39]:
marks.value_counts()

Phy  Math  Chem  Total Marks
34   38    76    148            1
88   29    90    206            1
91   54    96    241            1
dtype: int64

In [47]:
m = marks.groupby('Total Marks')[['Math','Chem']].min()
m

Unnamed: 0_level_0,Math,Chem
Total Marks,Unnamed: 1_level_1,Unnamed: 2_level_1
148,38,76
206,29,90
241,54,96


In [46]:
m.index

Int64Index([148, 206, 241], dtype='int64', name='Total Marks')