In [1]:
import pandas as pd
import numpy as np

from pandas import DataFrame

In [2]:
data = {
    'Title': ['The Odyssey', 'The Iliad', 'The Aeneid', 'The Odyssey', 'Beowulf', 'The Divine Comedy', np.nan],
    'Author': ['Homer', 'Homer', 'Virgil', 'Homer', 'Unknown', 'Dante', 'Homer'],
    'Year': [800, 750, 29, 800, np.nan, 1320, 800],
    'Genre': ['Epic', 'Epic', 'Epic', 'Epic', 'Epic', 'Poetry', 'Epic']
}

In [5]:
ancient_manuscripts_df = DataFrame(data)
ancient_manuscripts_df

Unnamed: 0,Title,Author,Year,Genre
0,The Odyssey,Homer,800.0,Epic
1,The Iliad,Homer,750.0,Epic
2,The Aeneid,Virgil,29.0,Epic
3,The Odyssey,Homer,800.0,Epic
4,Beowulf,Unknown,,Epic
5,The Divine Comedy,Dante,1320.0,Poetry
6,,Homer,800.0,Epic


In [13]:
epic_manuscripts_boolean = ancient_manuscripts_df['Genre'] == 'Epic'
epic_manuscripts = ancient_manuscripts_df[epic_manuscripts_boolean]
epic_manuscripts

Unnamed: 0,Title,Author,Year,Genre
0,The Odyssey,Homer,800.0,Epic
1,The Iliad,Homer,750.0,Epic
2,The Aeneid,Virgil,29.0,Epic
3,The Odyssey,Homer,800.0,Epic
4,Beowulf,Unknown,,Epic
6,,Homer,800.0,Epic


In [15]:
ancient_manuscripts_df['Year'].fillna(round(epic_manuscripts['Year'].mean(),0), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ancient_manuscripts_df['Year'].fillna(round(epic_manuscripts['Year'].mean(),0), inplace=True)


In [16]:
ancient_manuscripts_df

Unnamed: 0,Title,Author,Year,Genre
0,The Odyssey,Homer,800.0,Epic
1,The Iliad,Homer,750.0,Epic
2,The Aeneid,Virgil,29.0,Epic
3,The Odyssey,Homer,800.0,Epic
4,Beowulf,Unknown,636.0,Epic
5,The Divine Comedy,Dante,1320.0,Poetry
6,,Homer,800.0,Epic


In [17]:
ancient_manuscripts_df['Title'].fillna('Unknown', inplace=True)

In [18]:
ancient_manuscripts_df

Unnamed: 0,Title,Author,Year,Genre
0,The Odyssey,Homer,800.0,Epic
1,The Iliad,Homer,750.0,Epic
2,The Aeneid,Virgil,29.0,Epic
3,The Odyssey,Homer,800.0,Epic
4,Beowulf,Unknown,636.0,Epic
5,The Divine Comedy,Dante,1320.0,Poetry
6,Unknown,Homer,800.0,Epic


In [32]:
ancient_manuscripts_df.isna().sum().sum()

np.int64(0)

In [42]:
# Written by Homer
homer_manuscripts = ancient_manuscripts_df[ancient_manuscripts_df['Author'] == 'Homer']
print("Number of written manuscripts by Homer: ", homer_manuscripts['Title'].count())
homer_manuscripts

Number of written manuscripts by Homer:  4


Unnamed: 0,Title,Author,Year,Genre
0,The Odyssey,Homer,800.0,Epic
1,The Iliad,Homer,750.0,Epic
3,The Odyssey,Homer,800.0,Epic
6,Unknown,Homer,800.0,Epic


In [43]:
new_data = pd.DataFrame({
    'Title': ['Paradise Lost'],
    'Author': ['Milton'],
    'Year': [1667],
    'Genre': ['Poetry']
})

In [44]:
ancient_manuscripts_df = pd.concat([ancient_manuscripts_df, new_data], ignore_index=True)
ancient_manuscripts_df

Unnamed: 0,Title,Author,Year,Genre
0,The Odyssey,Homer,800.0,Epic
1,The Iliad,Homer,750.0,Epic
2,The Aeneid,Virgil,29.0,Epic
3,The Odyssey,Homer,800.0,Epic
4,Beowulf,Unknown,636.0,Epic
5,The Divine Comedy,Dante,1320.0,Poetry
6,Unknown,Homer,800.0,Epic
7,Paradise Lost,Milton,1667.0,Poetry


In [46]:
genre_group = ancient_manuscripts_df.groupby('Genre')
mean_year = genre_group['Year'].mean()
DataFrame(mean_year)

Unnamed: 0_level_0,Year
Genre,Unnamed: 1_level_1
Epic,635.833333
Poetry,1493.5
