In [76]:
import pandas as pd

#Creation of messy dataset.
data = {
    'Name': [
        'James', 'michael', 'robert', 'John', None, 'David', 'william', 'Richard',
        'Mark', 'william', 'Alice', 'linda', 'susan', 'jessica', 'Karen', 'Nancy',
        'Amanda', 'deborah', 'Alice', 'kimberly'
    ],

    'Age': [
        22, 36, 38, 35 ,None, 26, 28, 26,
        27, 28, 26, 12, 26, 38, 28, None,
        30, 35, 26, 22
    ],

    'Country': [
        'usa', 'us', None, 'United States', 'aus', 'United Kingdom', 'aus', 'uk',
        'Mexico', 'Australia', 'Brazil', 'bra', None, 'mex', 'uk', 'United States',
        'Argentina', 'Mexico', 'bra', 'uk'
    ],

    'Salary': [
        112500, 84300, 72300, 92500, 61300, 79300, 85600, 87100,
        97100, 135200, 56000, 34200, 123200, 89000, 176200, 200132,
        94000, 76400, 123400, 65000
    ]
}

df = pd.DataFrame(data)
print(df)

        Name   Age         Country  Salary
0      James 22.00             usa  112500
1    michael 36.00              us   84300
2     robert 38.00            None   72300
3       John 35.00   United States   92500
4       None   NaN             aus   61300
5      David 26.00  United Kingdom   79300
6    william 28.00             aus   85600
7    Richard 26.00              uk   87100
8       Mark 27.00          Mexico   97100
9    william 28.00       Australia  135200
10     Alice 26.00          Brazil   56000
11     linda 12.00             bra   34200
12     susan 26.00            None  123200
13   jessica 38.00             mex   89000
14     Karen 28.00              uk  176200
15     Nancy   NaN   United States  200132
16    Amanda 30.00       Argentina   94000
17   deborah 35.00          Mexico   76400
18     Alice 26.00             bra  123400
19  kimberly 22.00              uk   65000


In [83]:
#Removes whitespace, ensures each name is properly capitalized.
df['Name'] = df['Name'].str.strip().str.title()

#Fills in missing age values with median age value.
df['Age'] = df['Age'].fillna(df['Age'].median())

#Applies Uniformity to country abbreviations and formatting.
df['Country'] = df['Country'].replace({
    'usa': 'United States',
    'us': 'United States',
    'aus': 'Australia',
    'bra': 'Brazil',
    'mex': 'Mexico',
    'uk': 'United Kingdom'
})

#Fills missing countries with 'Unknown' variable.
df['Country'] = df['Country'].fillna('Unknown')

#Sets salary datatype as floating-point number.
df['Salary'] = df['Salary'].astype(float)

#Formats the salary values to include commas.
pd.set_option('display.float_format', '{:,}'.format)

#Sorts names alphabetically.
#Removes any duplicates from the table for better comprehension.
dfsort = df.sort_values(by='Name', ascending=True).drop_duplicates('Name')
print(dfsort)

        Name  Age         Country    Salary
10     Alice 26.0          Brazil  56,000.0
16    Amanda 30.0       Argentina  94,000.0
5      David 26.0  United Kingdom  79,300.0
17   Deborah 35.0          Mexico  76,400.0
0      James 22.0   United States 112,500.0
13   Jessica 38.0          Mexico  89,000.0
3       John 35.0   United States  92,500.0
14     Karen 28.0  United Kingdom 176,200.0
19  Kimberly 22.0  United Kingdom  65,000.0
11     Linda 12.0          Brazil  34,200.0
8       Mark 27.0          Mexico  97,100.0
1    Michael 36.0   United States  84,300.0
15     Nancy 27.5   United States 200,132.0
7    Richard 26.0  United Kingdom  87,100.0
2     Robert 38.0         Unknown  72,300.0
12     Susan 26.0         Unknown 123,200.0
9    William 28.0       Australia 135,200.0
4       None 27.5       Australia  61,300.0
