In [16]:
import pandas as pd

data = {
    "Name": ["John", "Sarah", "Mike", "John", "Indai"],
    "Age": [25,None, -10, 25, 40],
    "Gender": ["M", "Female", "male", "M", "F"],
    "Salary": [50000, 52000, None, 50000, 70000]
}

df = pd.DataFrame(data)

print(df)


    Name   Age  Gender   Salary
0   John  25.0       M  50000.0
1  Sarah   NaN  Female  52000.0
2   Mike -10.0    male      NaN
3   John  25.0       M  50000.0
4  Indai  40.0       F  70000.0


In [18]:
print(df.isnull())

    Name    Age  Gender  Salary
0  False  False   False   False
1  False   True   False   False
2  False  False   False    True
3  False  False   False   False
4  False  False   False   False


In [19]:
print(df.duplicated())

0    False
1    False
2    False
3     True
4    False
dtype: bool


In [25]:
df = df.drop_duplicates()
print(df)

    Name   Age  Gender   Salary
0   John  25.0       M  50000.0
1  Sarah  25.0  Female  52000.0
2   Mike -10.0    male      NaN
4  Indai  40.0       F  70000.0


In [31]:
df['Age'].fillna((df['Age'].mean()), inplace=True)
df['Salary'].fillna((df['Salary'].mean()), inplace=True)

print(df)

    Name   Age  Gender        Salary
0   John  25.0       M  50000.000000
1  Sarah  25.0  Female  52000.000000
2   Mike -10.0    male  57333.333333
4  Indai  40.0       F  70000.000000


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna((df['Age'].mean()), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Age'].fillna((df['Age'].mean()), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method

In [35]:
# df["Gender"].str.lower().map({
#     "m": "Male",
#     "male": "Male",
#     "f": "Female",
#     "female": "Female"
# })

df["Gender"].replace({
    "M": "Male",
    "male": "Male",
    "F": "Female",
    "Female": "Female"
})

print(df)

    Name   Age  Gender        Salary
0   John  25.0    Male  50000.000000
1  Sarah  25.0  Female  52000.000000
2   Mike -10.0    Male  57333.333333
4  Indai  40.0  Female  70000.000000


In [47]:
(df["Age"] - df["Age"].min()) / (df["Age"].max() - df["Age"].min())
(df["Salary"] - df["Salary"].min()) / (df["Salary"].max() - df["Salary"].min())

print(df)

    Name   Age  Gender        Salary  Age_norm  Salary_norm
0   John  25.0    Male  50000.000000       0.7     0.000000
1  Sarah  25.0  Female  52000.000000       0.7     0.100000
2   Mike -10.0    Male  57333.333333       0.0     0.366667
4  Indai  40.0  Female  70000.000000       1.0     1.000000
