In [1]:
# import the library
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# read the table from the html webpage
df = pd.read_html("https://www.ssa.gov/OACT/babynames/decades/names2010s.html")

In [2]:
# clean the dataframe
df = df[0]
df = df[:-1]

In [3]:
# 'Unnamed: 0_level_0' represents rank column
#  make its data type integer
df['Unnamed: 0_level_0'] = df['Unnamed: 0_level_0'].astype('int')

# create dataframe for male and female
df_male = pd.concat([df['Unnamed: 0_level_0'],df['Males']],axis=1)
df_female = pd.concat([df['Unnamed: 0_level_0'],df['Females']],axis=1)

In [4]:
df_male

Unnamed: 0,Rank,Name,Number
0,1,Noah,182993
1,2,Liam,173717
2,3,Jacob,162958
3,4,William,159697
4,5,Mason,157652
...,...,...,...
195,196,Bradley,20128
196,197,Cayden,19729
197,198,Xander,19270
198,199,Graham,19239


In [5]:
df_female

Unnamed: 0,Rank,Name,Number
0,1,Emma,194755
1,2,Olivia,184291
2,3,Sophia,180896
3,4,Isabella,170265
4,5,Ava,155606
...,...,...,...
195,196,Amaya,16230
196,197,Emerson,16216
197,198,Julianna,16165
198,199,Cecilia,16125


In [6]:
# append both the dataframe
df_mf = df_male.append(df_female)

# reset the index so that index starts from zero
df_mf.reset_index(drop=True,inplace=True)

In [7]:
df_mf

Unnamed: 0,Rank,Name,Number
0,1,Noah,182993
1,2,Liam,173717
2,3,Jacob,162958
3,4,William,159697
4,5,Mason,157652
...,...,...,...
395,196,Amaya,16230
396,197,Emerson,16216
397,198,Julianna,16165
398,199,Cecilia,16125


In [8]:
# filter the dataframe to contain only names 
# that start with 'C' or contain a letter 'X' or 'x' 

df_mf = df_mf[df_mf['Name'].apply(lambda name:
                                  name.startswith('C') 
                                  or ('X' in name) 
                                  or ('x' in name)
                                 )]

In [9]:
df_mf

Unnamed: 0,Rank,Name,Number
7,8,Alexander,141899
26,27,Christopher,101919
29,30,Carter,95090
37,38,Caleb,86573
40,41,Christian,83994
48,49,Charles,69898
51,52,Jaxon,69008
52,53,Connor,68199
56,57,Cameron,66101
67,68,Colton,58377


In [10]:
# check its datatype
df_mf.dtypes

Rank       int32
Name      object
Number    object
dtype: object

In [11]:
# convert the datatype of 'Number' from object to int
df_mf['Number'] = df_mf['Number'].astype('int')   

In [12]:
# check its datatype
df_mf.dtypes

Rank       int32
Name      object
Number     int32
dtype: object

In [13]:
# sort the dataframe on the basis of Number
df_mf.sort_values('Number',ascending=False,inplace=True)

# reset the index so that index starts from zero
df_mf.reset_index(drop=True,inplace=True)

# first  10 rows

In [14]:
df_mf.head(10)

Unnamed: 0,Rank,Name,Number
0,8,Alexander,141899
1,9,Charlotte,102339
2,27,Christopher,101919
3,30,Carter,95090
4,38,Caleb,86573
5,15,Chloe,85160
6,41,Christian,83994
7,49,Charles,69898
8,52,Jaxon,69008
9,53,Connor,68199


# sum of the number

In [15]:
# Dataframe that contain names that start with 'C' 
df_C = df_mf[df_mf['Name'].apply(lambda name: name.startswith('C'))]
df_C

Unnamed: 0,Rank,Name,Number
1,9,Charlotte,102339
2,27,Christopher,101919
3,30,Carter,95090
4,38,Caleb,86573
5,15,Chloe,85160
6,41,Christian,83994
7,49,Charles,69898
9,53,Connor,68199
10,57,Cameron,66101
11,68,Colton,58377


In [16]:
# sum of the number of the 'C' names 
print(df_C['Number'].sum())

1388914


In [17]:
# DataFrame with the names that contain a letter 'X' or 'x' 
df_Xx = df_mf[df_mf['Name'].apply(lambda name: ('X' in name) or ('x' in name))]
df_Xx

Unnamed: 0,Rank,Name,Number
0,8,Alexander,141899
8,52,Jaxon,69008
15,85,Xavier,48596
18,93,Jaxson,42287
19,54,Alexa,41971
20,60,Alexis,40314
24,115,Max,34097
25,120,Alex,32524
26,121,Maxwell,32407
27,122,Axel,31749


In [18]:
# sum of the number of names with 'X' or 'x'
print(df_Xx['Number'].sum())

641955
