In [41]:
import pandas as pd
import numpy as np

In [42]:
data = {
    'A': [1, 2, np.nan, 4],
    'B': [5, np.nan, np.nan, 8],
    'C': [9, 10, 11, 12]
}

df = pd.DataFrame(data)
print(df)

     A    B   C
0  1.0  5.0   9
1  2.0  NaN  10
2  NaN  NaN  11
3  4.0  8.0  12


In [43]:
print(df.isna())

       A      B      C
0  False  False  False
1  False   True  False
2   True   True  False
3  False  False  False


In [48]:
# Drop rows with missing values
df_dropped_rows = df.dropna()

In [49]:
df_dropped_rows

Unnamed: 0,A,B,C
0,1.0,5.0,9
3,4.0,8.0,12


In [46]:
df_dropped_rows = df.dropna(thresh=2)

In [47]:
df_dropped_rows

Unnamed: 0,A,B,C
0,1.0,5.0,9
1,2.0,,10
3,4.0,8.0,12


In [15]:
# Drop columns with missing values
df_dropped_columns = df.dropna(axis=1)

In [16]:
df_dropped_columns

Unnamed: 0,C
0,9
1,10
2,11
3,12


In [17]:
# Fill missing values with a specific value (e.g., 0)
df.fillna(0)

Unnamed: 0,A,B,C
0,1.0,5.0,9
1,2.0,0.0,10
2,0.0,0.0,11
3,4.0,8.0,12


In [18]:
# Fill missing values with the mean of the column
df.fillna(df.mean())

Unnamed: 0,A,B,C
0,1.0,5.0,9
1,2.0,6.5,10
2,2.333333,6.5,11
3,4.0,8.0,12


In [19]:
# Fill missing values with the previous value in the column (forward fill)
df.fillna(method='ffill')

Unnamed: 0,A,B,C
0,1.0,5.0,9
1,2.0,5.0,10
2,2.0,5.0,11
3,4.0,8.0,12


In [20]:
# Fill missing values with the next value in the column (backward fill)
df.fillna(method='bfill')

Unnamed: 0,A,B,C
0,1.0,5.0,9
1,2.0,8.0,10
2,4.0,8.0,11
3,4.0,8.0,12


In [21]:
# get unique value
df['A'].unique()

array([ 1.,  2., nan,  4.])

In [22]:
# get number of unique value
df['A'].nunique()

3

In [24]:
# get the count of each unique value
df['A'].value_counts()

1.0    1
2.0    1
4.0    1
Name: A, dtype: int64

In [26]:
data = {
    'Math': [85, 90, 78, 92, 88],
    'Physics': [80, 85, 86, 92, 95],
    'Chemistry': [70, 75, 72, 89, 85]
}

df = pd.DataFrame(data, index=['Alice', 'Bob', 'Charlie', 'David', 'Eva'])
print(df)

         Math  Physics  Chemistry
Alice      85       80         70
Bob        90       85         75
Charlie    78       86         72
David      92       92         89
Eva        88       95         85


In [27]:
# Row-wise operation: average score per student
row_average = df.apply(lambda row: row.mean(), axis=1)

In [29]:
row_average

Alice      78.333333
Bob        83.333333
Charlie    78.666667
David      91.000000
Eva        89.333333
dtype: float64

In [30]:
# Column-wise operation: average score per subject
col_average = df.apply(lambda col: col.mean(), axis=0)

In [31]:
col_average

Math         86.6
Physics      87.6
Chemistry    78.2
dtype: float64

In [32]:
df.columns

Index(['Math', 'Physics', 'Chemistry'], dtype='object')

In [33]:
df.index

Index(['Alice', 'Bob', 'Charlie', 'David', 'Eva'], dtype='object')

In [34]:
data = {
    'Region': ['North', 'North', 'South', 'South', 'East', 'East', 'West', 'West'],
    'Product': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'Sales': [100, 150, 200, 250, 300, 350, 400, 450]
}

df = pd.DataFrame(data)

In [35]:
df

Unnamed: 0,Region,Product,Sales
0,North,A,100
1,North,B,150
2,South,A,200
3,South,B,250
4,East,A,300
5,East,B,350
6,West,A,400
7,West,B,450


In [39]:
pivot_table = df.pivot_table(
                values='Sales',
                index='Region',
                columns='Product',
                aggfunc='sum')

In [40]:
pivot_table

Product,A,B
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,300,350
North,100,150
South,200,250
West,400,450
