In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

In [2]:
iris = px.data.iris()
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [3]:
# Another way to get the iris dataset
'''iris = sns.load_dataset('iris')
iris.head()''';

In [4]:
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [5]:
# Removing white spaces from a string
iris['species'] = iris['species'].apply(lambda x: x.strip())
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [6]:
iris.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
species_id        int64
dtype: object

In [7]:
# Converting strings or floats to integers
iris['sepal_length'] = iris['sepal_length'].apply(lambda x: int(x))
iris.dtypes

sepal_length      int64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
species_id        int64
dtype: object

In [8]:
iris['petal_width'].isnull().sum()

0

In [9]:
# Replacing null values with a default value
iris['petal_width'] = iris['petal_width'].apply(lambda x: x if x is not None else 0.0)
iris['petal_width'].isnull().sum()

0

In [10]:
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [11]:
# Removing punctuation from a string
import re
iris['species'] = iris['species'].apply(lambda x: re.sub(r'[^\w\s]','',x))
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [12]:
# Converting a string to uppercase
iris['species'] = iris['species'].apply(lambda x: x.upper())
iris['species'].unique()

array(['SETOSA', 'VERSICOLOR', 'VIRGINICA'], dtype=object)

In [13]:
# Selecting a specific column from a DataFrame
select_column = lambda df, col_name: df[col_name]
iris['petal_length_duplicated'] = select_column(iris, 'petal_length')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated
0,5,3.5,1.4,0.2,SETOSA,1,1.4
1,4,3.0,1.4,0.2,SETOSA,1,1.4
2,4,3.2,1.3,0.2,SETOSA,1,1.3
3,4,3.1,1.5,0.2,SETOSA,1,1.5
4,5,3.6,1.4,0.2,SETOSA,1,1.4


In [14]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated
0,5,3.5,1.4,0.2,SETOSA,1,1.4
1,4,3.0,1.4,0.2,SETOSA,1,1.4
2,4,3.2,1.3,0.2,SETOSA,1,1.3
3,4,3.1,1.5,0.2,SETOSA,1,1.5
4,5,3.6,1.4,0.2,SETOSA,1,1.4


In [15]:
# Applying a custom function to each row of a DataFrame
def multiply(row):
    return row['sepal_length'] * row['sepal_width']
    
iris['sepal_area'] = iris.apply(lambda row: multiply(row), axis=1)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated,sepal_area
0,5,3.5,1.4,0.2,SETOSA,1,1.4,17.5
1,4,3.0,1.4,0.2,SETOSA,1,1.4,12.0
2,4,3.2,1.3,0.2,SETOSA,1,1.3,12.8
3,4,3.1,1.5,0.2,SETOSA,1,1.5,12.4
4,5,3.6,1.4,0.2,SETOSA,1,1.4,18.0


In [16]:
# Filtering rows based on a condition
iris_filtered = iris[lambda x: x['sepal_length'] > 6.0]
iris_filtered.head(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated,sepal_area
50,7,3.2,4.7,1.4,VERSICOLOR,2,4.7,22.4
102,7,3.0,5.9,2.1,VIRGINICA,3,5.9,21.0
105,7,3.0,6.6,2.1,VIRGINICA,3,6.6,21.0
107,7,2.9,6.3,1.8,VIRGINICA,3,6.3,20.3
109,7,3.6,6.1,2.5,VIRGINICA,3,6.1,25.2
117,7,3.8,6.7,2.2,VIRGINICA,3,6.7,26.6
118,7,2.6,6.9,2.3,VIRGINICA,3,6.9,18.2
122,7,2.8,6.7,2.0,VIRGINICA,3,6.7,19.6
125,7,3.2,6.0,1.8,VIRGINICA,3,6.0,22.4
129,7,3.0,5.8,1.6,VIRGINICA,3,5.8,21.0


In [17]:
# Grouping a DataFrame by a specific column
iris_grouped = iris.groupby('species').mean()
iris_grouped

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width,species_id,petal_length_duplicated,sepal_area
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SETOSA,4.6,3.418,1.464,0.244,1.0,1.464,15.844
VERSICOLOR,5.48,2.77,4.26,1.326,2.0,4.26,15.246
VIRGINICA,6.08,2.974,5.552,2.026,3.0,5.552,18.164


In [18]:
# Mapping a function to a Series
iris['sepal_length_category'] = iris['sepal_length'].map(lambda x: 'short' if x < 5.0 else 'medium' if x < 6.0 else 'long')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated,sepal_area,sepal_length_category
0,5,3.5,1.4,0.2,SETOSA,1,1.4,17.5,medium
1,4,3.0,1.4,0.2,SETOSA,1,1.4,12.0,short
2,4,3.2,1.3,0.2,SETOSA,1,1.3,12.8,short
3,4,3.1,1.5,0.2,SETOSA,1,1.5,12.4,short
4,5,3.6,1.4,0.2,SETOSA,1,1.4,18.0,medium


In [19]:
# Applying a function element-wise to a Series
iris['petal_area'] = iris['petal_length'].apply(lambda x: x**2)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated,sepal_area,sepal_length_category,petal_area
0,5,3.5,1.4,0.2,SETOSA,1,1.4,17.5,medium,1.96
1,4,3.0,1.4,0.2,SETOSA,1,1.4,12.0,short,1.96
2,4,3.2,1.3,0.2,SETOSA,1,1.3,12.8,short,1.69
3,4,3.1,1.5,0.2,SETOSA,1,1.5,12.4,short,2.25
4,5,3.6,1.4,0.2,SETOSA,1,1.4,18.0,medium,1.96


In [20]:
# Concatenating two columns using a lambda function
iris['sepal_dimensions'] = iris.apply(lambda row: f"{row['sepal_length']} x {row['sepal_width']}", axis=1)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id,petal_length_duplicated,sepal_area,sepal_length_category,petal_area,sepal_dimensions
0,5,3.5,1.4,0.2,SETOSA,1,1.4,17.5,medium,1.96,5 x 3.5
1,4,3.0,1.4,0.2,SETOSA,1,1.4,12.0,short,1.96,4 x 3.0
2,4,3.2,1.3,0.2,SETOSA,1,1.3,12.8,short,1.69,4 x 3.2
3,4,3.1,1.5,0.2,SETOSA,1,1.5,12.4,short,2.25,4 x 3.1
4,5,3.6,1.4,0.2,SETOSA,1,1.4,18.0,medium,1.96,5 x 3.6


In [21]:
# Calculating the mean of a list using a lambda function
num_list = [1, 2, 3, 4, 5]
mean = lambda x: sum(x)/len(x)
result = mean(num_list)
result

3.0

In [22]:
# Calculating the median of a list using a lambda function
num_list = [1, 2, 3, 4, 5]
median = lambda x: sorted(x)[len(x)//2] if len(x)%2!=0 else (sorted(x)[len(x)//2] + sorted(x)[(len(x)//2)-1])/2
result = median(num_list)
result

3

In [23]:
# Applying a function to a nested list using a lambda function
nested_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
func = lambda x: sum(x)/len(x)
result = list(map(func, nested_list))
result

[2.0, 5.0, 8.0]

In [24]:
# Calculating the variance of a list using a lambda function
num_list = [1, 2, 3, 4, 5]
mean = lambda x: sum(x)/len(x)
variance = lambda x: sum([(i-mean(x))**2 for i in x])/len(x)
result = variance(num_list)
result

2.0