In [1]:
import pandas as pd
import numpy as np

In [2]:
"""Pandas is a powerful Python library for data manipulation and analysis."""

data = pd.read_csv('student.csv')

print("This is the dataset of students")

data.head()

This is the dataset of students


Unnamed: 0,id,name,class,mark,gender
0,1,John Deo,Four,75,female
1,2,Max Ruin,Three,85,male
2,3,Arnold,Three,55,male
3,4,Krish Star,Four,60,female
4,5,John Mike,Four,60,female


In [3]:
data.shape

(35, 5)

In [6]:
def double_marks(mark):
  return mark * 2

In [7]:
data['total_mark'] = data['mark'].apply(double_marks)
data.head()

Unnamed: 0,id,name,class,mark,gender,total_mark
0,1,John Deo,Four,75,female,150
1,2,Max Ruin,Three,85,male,170
2,3,Arnold,Three,55,male,110
3,4,Krish Star,Four,60,female,120
4,5,John Mike,Four,60,female,120


In [8]:
# Apply a lambda function to double the marks
data['total_mark'] = data['mark'].apply(lambda x: x * 2)
data.head()

Unnamed: 0,id,name,class,mark,gender,total_mark
0,1,John Deo,Four,75,female,150
1,2,Max Ruin,Three,85,male,170
2,3,Arnold,Three,55,male,110
3,4,Krish Star,Four,60,female,120
4,5,John Mike,Four,60,female,120


In [10]:
#using apply row wise
def final_mark(row):
    return row['mark'] * row['total_mark'] / 10

In [11]:
# Apply the function row-wise (axis=1)
# Each row is passed as a Series object to the function
data['final_mark'] = data.apply(final_mark, axis=1)
print("\nDataFrame after applying function row-wise:")
data.head()


DataFrame after applying function row-wise:


Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark
0,1,John Deo,Four,75,female,150,1125.0
1,2,Max Ruin,Three,85,male,170,1445.0
2,3,Arnold,Three,55,male,110,605.0
3,4,Krish Star,Four,60,female,120,720.0
4,5,John Mike,Four,60,female,120,720.0


In [12]:
def column_sum_and_average(col):
    return pd.Series({'Sum': col.sum(), 'Average': col.mean()})

In [14]:
# Apply the function column-wise (axis=0)
# Each column is passed as a Series object to the function
result_data = data[['mark','total_mark']].apply(column_sum_and_average, axis=0)
result_data

Unnamed: 0,mark,total_mark
Sum,2613.0,5226.0
Average,74.657143,149.314286


In [15]:
#The map() function in Pandas is a Series method used to substitute each value in a Series with another value.
# Define a function to apply
def squaremark(mark):
    return mark ** 2

In [19]:
data['square_mark'] = data['mark'].map(squaremark)
data.head()

Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
0,1,John Deo,Four,75,female,150,1125.0,5625
1,2,Max Ruin,Three,85,male,170,1445.0,7225
2,3,Arnold,Three,55,male,110,605.0,3025
3,4,Krish Star,Four,60,female,120,720.0,3600
4,5,John Mike,Four,60,female,120,720.0,3600


In [20]:
data.head()

Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
0,1,John Deo,Four,75,female,150,1125.0,5625
1,2,Max Ruin,Three,85,male,170,1445.0,7225
2,3,Arnold,Three,55,male,110,605.0,3025
3,4,Krish Star,Four,60,female,120,720.0,3600
4,5,John Mike,Four,60,female,120,720.0,3600


In [22]:
"""Consider a DataFrame containing a mix of numerical and string data.
The applymap() function can be used to perform operations on all elements,
such as converting strings to uppercase or applying a mathematical function to numbers."""

def to_upper_if_string(x):
    if isinstance(x, str):
        return x.upper()
    return x

In [23]:
data_upper = data.applymap(to_upper_if_string)
data_upper.head()

  data_upper = data.applymap(to_upper_if_string)


Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
0,1,JOHN DEO,FOUR,75,FEMALE,150,1125.0,5625
1,2,MAX RUIN,THREE,85,MALE,170,1445.0,7225
2,3,ARNOLD,THREE,55,MALE,110,605.0,3025
3,4,KRISH STAR,FOUR,60,FEMALE,120,720.0,3600
4,5,JOHN MIKE,FOUR,60,FEMALE,120,720.0,3600


In [24]:
def sqrt_if_numeric(x):
    if isinstance(x, (int, float)):
        return np.sqrt(x)
    return x

In [25]:
data_sqrt = data.applymap(sqrt_if_numeric)
data_sqrt.head()

  data_sqrt = data.applymap(sqrt_if_numeric)


Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
0,1.0,John Deo,Four,8.660254,female,12.247449,33.54102,75.0
1,1.414214,Max Ruin,Three,9.219544,male,13.038405,38.013156,85.0
2,1.732051,Arnold,Three,7.416198,male,10.488088,24.596748,55.0
3,2.0,Krish Star,Four,7.745967,female,10.954451,26.832816,60.0
4,2.236068,John Mike,Four,7.745967,female,10.954451,26.832816,60.0


In [32]:
"""Sorting"""
#sort_values() method allows sorting a DataFrame based on the values of one or more specified columns.
sorted_by_class = data.sort_values(by='class',ascending=False)
print("\nSorted by 'class' (ascending):")
sorted_by_class.head()


Sorted by 'class' (ascending):


Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
2,3,Arnold,Three,55,male,110,605.0,3025
1,2,Max Ruin,Three,85,male,170,1445.0,7225
26,27,Big Nose,Three,81,female,162,1312.2,6561
34,35,Rows Noump,Six,88,female,176,1548.8,7744
8,9,Tes Qry,Six,78,male,156,1216.8,6084


In [28]:
sorted_by_multiple = data.sort_values(by=['class', 'name'], ascending=[True, True])
sorted_by_multiple.head()

Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
22,23,Herod,Eight,79,male,158,1248.2,6241
6,7,My John Rob,Fifth,78,male,156,1216.8,6084
7,8,Asruid,Five,85,male,170,1445.0,7225
17,18,Honny,Five,75,male,150,1125.0,5625
5,6,Alex John,Four,55,male,110,605.0,3025


In [30]:
"""The sort_index() method sorts the DataFrame based on its index labels."""
sorted_by_index = data.sort_index(ascending=False)
sorted_by_index.head()

Unnamed: 0,id,name,class,mark,gender,total_mark,final_mark,square_mark
34,35,Rows Noump,Six,88,female,176,1548.8,7744
33,34,Gain Toe,Seven,69,male,138,952.2,4761
32,33,Kenn Rein,Six,96,female,192,1843.2,9216
31,32,Binn Rott,Seven,90,female,180,1620.0,8100
30,31,Marry Toeey,Four,88,male,176,1548.8,7744
