# Lab | Revisiting Machine Learning Case Study

- In this lab, you will use `learningSet.csv` file which we used during the class. 

## Instructions

Complete the following steps on the categorical columns in the dataset:

- Check for null values in all the columns
- Create a new empty list called `drop_list`. We will append to this list a set of columns to be dropped later. Add the following columns to this:
    - `OSOURCE` - symbol definitions not provided, too many categories
    - `ZIP` - we are including states already
- Identify columns that have over 85% missing values and add them to the previous list.
- Remove the columns included in the `drop_list` from the dataframe
- Now, reduce the number of categories in the column `GENDER`. The column should only have either "M" for males, "F" for females, and "other" for all the rest
    - Note that there are a few null values in the column. We will first replace those null values using the code below:

    ```python
    print(categorical['GENDER'].value_counts())
    categorical['GENDER'] = categorical['GENDER'].fillna('F')
    ```


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', None)

In [4]:
data = pd.read_csv('~/Desktop/bootcamp/Week_7/Day_1/Morning/learningSet.csv')

  data = pd.read_csv('~/Desktop/bootcamp/Week_7/Day_1/Morning/learningSet.csv')


In [5]:
data.shape

(95412, 481)

In [12]:
# check full values in all columns]
nulls_percent_df = pd.DataFrame()
nulls_percent_df = pd.DataFrame(data.isna().sum().reset_index())
nulls_percent_df

Unnamed: 0,index,0
0,ODATEDW,0
1,OSOURCE,0
2,TCODE,0
3,STATE,0
4,ZIP,0
...,...,...
476,MDMAUD_R,0
477,MDMAUD_F,0
478,MDMAUD_A,0
479,CLUSTER2,132


In [63]:
# create empy list
drop_list = []
drop_list.extend(['OSOURCE', 'ZIP'])
#drop_list.append('OSOURCE')
#drop_list.append('ZIP')

In [28]:
drop_list

['OSOURCE', 'ZIP']

In [44]:
# Define the function here
def columns_above_threshold(df: pd.DataFrame, threshold=0.25) -> list:
    '''
    this function takes as input a dataframe and a threshold to return a list of columns
    '''
    df_copy = df.copy()
    print('reading data...')
    column_list_nas = pd.DataFrame(df_copy.isna().sum()/len(df_copy)).reset_index() # obtaining the % of NAs per column
    column_list_nas.columns = ['column_name', 'nulls_percentage']
    print('creating new dataframe with na %s') 
    
    col_list = column_list_nas[column_list_nas['nulls_percentage']>threshold]['column_name'].tolist() # filtering the columns with NAs above the threshold
    print('creating filtered list...')
    
    #col_list = col_list['column_name'].tolist() # filtering the columns with NAs above the threshold
    print(len(col_list))
    
    return col_list

In [45]:
# call function
above_85_list = columns_above_threshold(data, 0.85)

reading data...
creating new dataframe with na %s
creating filtered list...
25


In [None]:
# option 1 to append new values from list to previous list
for col in above_85_list:
    if col in drop_list:
        pass
    elif col not in drop_list:
        drop_list.append(col)
    else:
        pass
len(drop_list)

In [73]:
# option 2 to append new values from list to previous list
drop_list += [col for col in above_85_list if col not in drop_list]
len(drop_list)

27

In [74]:
# option 3 to append new values from list to previous list

drop_list = [col for col in above_85_list if col not in drop_list] + drop_list
len(drop_list)

27

In [76]:
def remove_cols_from_df(df:pd.DataFrame, remove_list) -> pd.DataFrame:
    '''
    this fn removes columns list from a dataframe
    input:df and list
    output: df
    '''
    df_copy = df.copy()
    dataframe_shape = df_copy.shape
    dataframe_col_num = int(len(df_copy.columns))
    print(f'reading data... dataframe_shape: {dataframe_shape}')
    
    df_copy.drop(columns=remove_list, inplace=True, errors='ignore')
    dataframe_col_num_post = int(len(df_copy.columns))
    removed_columns_count = int(dataframe_col_num_post - dataframe_col_num)
    dataframe_shape_post = df_copy.shape
    print(f'removing {removed_columns_count} columns... post_dataframe_shape: {dataframe_shape_post}')
    return df_copy
    


In [77]:
df_test = remove_cols_from_df(data, drop_list)

reading data... dataframe_shape: (95412, 481)
removing -27 columns... post_dataframe_shape: (95412, 454)


In [80]:
df_test.shape

(95412, 454)

In [81]:
data = df_test

## X y Split and num / cat split

In [82]:
Y = data[['TARGET_B', 'TARGET_D']]
Y.head()

Unnamed: 0,TARGET_B,TARGET_D
0,0,0.0
1,0,0.0
2,0,0.0
3,0,0.0
4,0,0.0


In [83]:
numerical = data.select_dtypes(np.number)
numerical = numerical.drop(columns = ['TARGET_B', 'TARGET_D'])
numerical.head()

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MBCRAFT,MBGARDEN,MBBOOKS,MBCOLECT,MAGFAML,MAGFEM,MAGMALE,PUBGARDN,PUBCULIN,PUBHLTH,PUBDOITY,PUBNEWFN,PUBPHOTO,PUBOPP,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,ADATE_2,ADATE_3,ADATE_4,ADATE_5,ADATE_6,ADATE_7,ADATE_8,ADATE_9,ADATE_10,ADATE_11,ADATE_12,ADATE_13,ADATE_14,ADATE_15,ADATE_16,ADATE_17,ADATE_18,ADATE_19,ADATE_20,ADATE_21,ADATE_22,ADATE_23,ADATE_24,CARDPROM,MAXADATE,NUMPROM,CARDPM12,NUMPRM12,RDATE_8,RDATE_9,RDATE_11,RDATE_12,RDATE_14,RDATE_16,RDATE_18,RDATE_19,RDATE_22,RDATE_24,RAMNT_8,RAMNT_9,RAMNT_11,RAMNT_12,RAMNT_14,RAMNT_16,RAMNT_18,RAMNT_19,RAMNT_22,RAMNT_24,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,NEXTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,8901,0,3712,60.0,,,0,,,,,,,,,,,,,,,0,39,34,18,10,2,1,5.0,992,264,332,0,35,65,47,53,92,1,0,0,11,0,0,0,0,0,0,0,11,0,0,0,39,48,51,40,50,54,25,31,42,27,11,14,18,17,13,11,15,12,11,34,25,18,26,10,23,18,33,49,28,12,4,61,7,12,19,198,276,97,95,2,2,0,0,7,7,0,479,635,3,2,86,14,96,4,7,38,80,70,32,84,16,6,2,5,9,15,3,17,50,25,0,0,0,2,7,13,27,47,0,1,61,58,61,15,4,2,0,0,14,1,0,0,2,5,17,73,0.0,177.0,682.0,307,318,349,378,12883,13,23,23,23,15,1,0,0,1,4,25,24,26,17,2,0,0,2,28,4,51,1,46,54,3,88,8,0,0,0,0,0,0,4,1,13,14,16,2,45,56,64,50,64,44,62,53,99,0,0,9,3,8,13,9,0,3,9,3,15,19,5,4,3,0,3,41,1,0,7,13,6,5,0,4,9,4,1,3,10,2,1,7,78,2,0,120,16,10,39,21,8,4,3,5,20,3,19,4,0,0,0,18,39,0,34,23,18,16,1,4,0,23,0,0,5,1,0,0,0,0,0,2,0,3,74,88,8,0,4,96,77,19,13,31,5,14,14,31,54,46,0,0,90,0,10,0,0,0,33,65,40,99,99,6,2,10,7,9706,9606.0,9604.0,9604.0,9603.0,9602.0,9601.0,9511.0,9510.0,9510.0,9508.0,9507.0,9506.0,9504.0,9503.0,9502.0,9501.0,9411.0,9411.0,9410.0,9409.0,9407.0,9406.0,27,9702,74,6,14,,,,,9507.0,9505.0,,,,9406.0,,,,,10.0,11.0,,,,9.0,240.0,31,14,5.0,9208,12.0,9402,10.0,9512,8911,9003.0,4.0,7.741935,95515,0,4,39.0
1,9401,1,5202,46.0,6.0,9.0,16,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0,15,55,11,6,2,1,9.0,3611,940,998,99,0,0,50,50,67,0,0,31,6,4,2,6,4,14,0,0,2,0,1,4,34,41,43,32,42,45,32,33,46,21,13,14,33,23,10,4,2,11,16,36,22,15,12,1,5,4,21,75,55,23,9,69,4,3,24,317,360,99,99,0,0,0,0,0,0,0,5468,5218,12,10,96,4,97,3,9,59,94,88,55,95,5,4,1,3,5,4,2,18,44,5,0,0,0,97,98,98,98,99,94,0,83,76,73,21,5,0,0,0,4,0,0,0,91,91,91,94,4480.0,13.0,803.0,1088,1096,1026,1037,36175,2,6,2,5,15,14,13,10,33,2,5,2,5,15,14,14,10,32,6,2,66,3,56,44,9,80,14,0,0,0,0,0,0,6,0,2,24,32,12,71,70,83,58,81,57,64,57,99,99,0,22,24,4,21,13,2,1,6,0,4,1,0,3,1,0,6,13,1,2,8,18,11,4,3,4,10,7,11,1,6,2,1,16,69,5,2,160,5,5,12,21,7,30,20,14,24,4,24,10,0,0,0,8,15,0,55,10,11,0,0,2,0,3,1,1,2,3,1,1,0,3,0,0,0,42,39,50,7,27,16,99,92,53,5,10,2,26,56,97,99,0,0,0,96,0,4,0,0,0,99,0,99,99,99,20,4,6,5,9706,9606.0,9604.0,9604.0,9603.0,9602.0,9601.0,9511.0,9510.0,9510.0,9509.0,,,,9503.0,,,9411.0,9411.0,9410.0,9409.0,,9406.0,12,9702,32,6,13,,9512.0,,,,9504.0,,,,,,25.0,,,,12.0,,,,,47.0,3,1,10.0,9310,25.0,9512,25.0,9512,9310,9504.0,18.0,15.666667,148535,0,2,1.0
2,9001,1,0,,3.0,1.0,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,20,29,33,6,8,1,1.0,7001,2040,2669,0,2,98,49,51,96,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,35,43,46,37,45,49,23,35,40,25,13,20,19,16,13,10,8,15,14,30,22,19,25,10,23,21,35,44,22,6,2,63,9,9,19,183,254,69,69,1,6,5,3,3,3,0,497,546,2,1,78,22,93,7,18,36,76,65,30,86,14,7,2,5,11,17,3,17,60,18,0,1,0,0,1,6,18,50,0,4,36,49,51,14,5,4,2,24,11,2,3,6,0,2,9,44,0.0,281.0,518.0,251,292,292,340,11576,32,18,20,15,12,2,0,0,1,20,19,24,18,16,2,0,0,1,28,8,31,11,38,62,8,74,22,0,0,0,0,0,2,2,1,21,19,24,6,61,65,73,59,70,56,78,62,82,99,4,10,5,2,6,12,0,1,9,5,18,20,5,7,6,0,11,33,4,3,2,12,3,3,2,0,7,8,3,3,6,7,1,8,74,3,1,120,22,20,28,16,6,5,3,1,23,1,16,6,0,0,0,10,21,0,28,23,32,8,1,14,1,5,0,0,7,0,0,0,0,0,1,0,0,2,84,96,3,0,0,92,65,29,9,22,3,12,23,50,69,31,0,0,0,6,35,44,0,15,22,77,17,97,92,9,2,6,5,9706,9606.0,9604.0,9604.0,9603.0,9602.0,9601.0,9511.0,,9510.0,9508.0,9507.0,9506.0,9504.0,9503.0,,9501.0,9411.0,,,9409.0,9407.0,9406.0,26,9702,63,6,14,,,,9509.0,9506.0,9504.0,9501.0,,9409.0,9406.0,,,,11.0,9.0,9.0,8.0,,8.0,6.0,202.0,27,14,2.0,9111,16.0,9207,5.0,9512,9001,9101.0,12.0,7.481481,15078,1,4,60.0
3,8701,0,2801,70.0,1.0,4.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,23,14,31,3,0,3,0.0,640,160,219,0,8,92,54,46,61,0,0,11,32,6,2,0,0,0,0,0,31,0,0,1,32,40,44,34,43,47,25,45,35,20,15,25,17,17,12,7,7,20,17,30,14,19,25,11,23,23,27,50,30,15,8,63,9,6,23,199,283,85,83,3,4,1,0,2,0,2,1000,1263,2,1,48,52,93,7,6,36,73,61,30,84,16,6,3,3,21,12,4,13,36,13,0,0,0,10,25,50,69,92,10,15,42,55,50,15,5,4,0,9,42,4,0,5,1,8,17,34,9340.0,67.0,862.0,386,388,396,423,15130,27,12,4,26,22,5,0,0,4,35,5,6,12,30,6,0,0,5,22,14,26,20,46,54,3,58,36,0,0,0,0,0,6,0,0,17,13,15,0,43,69,81,53,68,45,33,31,0,99,23,17,3,0,6,6,0,0,13,42,12,0,0,0,42,0,6,3,0,0,0,23,3,3,6,0,3,3,3,3,3,0,3,6,87,0,0,120,28,12,14,27,10,3,5,0,19,1,17,0,0,0,0,13,23,0,14,40,31,16,0,1,0,13,0,0,4,0,0,0,3,0,0,0,0,29,67,56,41,3,0,94,43,27,4,38,0,10,19,39,45,55,0,0,45,22,17,0,0,16,23,77,22,93,89,16,2,6,6,9706,9606.0,9604.0,9604.0,9603.0,9602.0,9601.0,9511.0,,9510.0,9508.0,9507.0,9506.0,9504.0,9503.0,9502.0,9501.0,9411.0,9411.0,9410.0,9409.0,,,27,9702,66,6,14,,,9512.0,9509.0,9508.0,9505.0,,,,,,,10.0,10.0,10.0,7.0,,,,,109.0,16,7,2.0,8711,11.0,9411,10.0,9512,8702,8711.0,9.0,6.8125,172556,1,4,41.0
4,8601,0,2001,78.0,3.0,2.0,60,1.0,0.0,9.0,0.0,4.0,1.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,1.0,1,28,9,53,26,3,2,,2520,627,761,99,0,0,46,54,2,98,0,0,1,0,0,0,0,0,0,0,0,0,0,0,33,45,50,36,46,50,27,34,43,23,14,21,13,15,20,12,5,13,15,34,19,19,31,7,27,16,26,57,36,24,14,42,17,9,33,235,323,99,98,0,0,0,0,0,0,0,576,594,4,3,90,10,97,3,0,42,82,49,22,92,8,20,3,17,9,23,1,1,1,0,21,58,19,0,1,2,16,67,0,2,45,52,53,16,6,0,0,0,9,0,0,0,25,58,74,83,5000.0,127.0,528.0,240,250,293,321,9836,24,29,23,13,4,4,0,0,2,21,30,22,16,4,5,0,0,3,35,8,11,14,20,80,4,73,22,1,1,0,0,0,3,1,2,1,24,27,3,76,61,73,51,65,49,80,31,81,99,10,17,8,2,6,15,3,7,22,2,9,0,7,2,2,0,6,1,5,2,2,12,2,7,6,4,15,29,4,3,26,3,2,7,49,12,1,120,16,20,30,13,3,12,5,2,26,1,20,7,1,1,1,15,28,4,9,16,53,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,65,99,0,0,0,90,45,18,25,34,0,1,3,6,33,67,0,0,9,14,72,3,0,0,99,1,21,99,96,6,2,7,11,9706,9606.0,9604.0,9604.0,9603.0,9512.0,9601.0,9511.0,9510.0,9509.0,9508.0,9502.0,9506.0,,9503.0,9502.0,9412.0,9411.0,9411.0,9410.0,9506.0,9407.0,9406.0,43,9702,113,10,25,9601.0,,,,9506.0,,,,,,15.0,,,,10.0,,,,,,254.0,37,8,3.0,9310,15.0,9601,15.0,9601,7903,8005.0,14.0,6.864865,7112,1,2,26.0


In [84]:
categorical = data.select_dtypes([object])
categorical.head()

Unnamed: 0,STATE,MAILCODE,PVASTATE,NOEXCH,RECINHSE,RECP3,RECPGVG,RECSWEEP,MDMAUD,DOMAIN,CLUSTER,AGEFLAG,HOMEOWNR,CHILD03,CHILD07,CHILD12,CHILD18,GENDER,DATASRCE,SOLP3,SOLIH,MAJOR,GEOCODE,COLLECT1,VETERANS,BIBLE,CATLG,HOMEE,PETS,CDPLAY,STEREO,PCOWNERS,PHOTO,CRAFTS,FISHER,GARDENIN,BOATS,WALKER,KIDSTUFF,CARDS,PLATES,LIFESRC,PEPSTRFL,RFA_2,RFA_3,RFA_4,RFA_5,RFA_6,RFA_7,RFA_8,RFA_9,RFA_10,RFA_11,RFA_12,RFA_13,RFA_14,RFA_15,RFA_16,RFA_17,RFA_18,RFA_19,RFA_20,RFA_21,RFA_22,RFA_23,RFA_24,RFA_2R,RFA_2A,MDMAUD_R,MDMAUD_F,MDMAUD_A,GEOCODE2
0,IL,,,0,,,,,XXXX,T2,36,,,,,,,F,,,,,,,,,,,,,,,,,,,,,,,,,X,L4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,L,E,X,X,X,C
1,CA,,,0,,,,,XXXX,S1,14,E,H,,,,M,M,3.0,,,,2.0,,,,,,,,,,,,,,,,,,,,,L2G,A2G,A2G,A2G,A2G,A1E,A1E,A1E,A1E,A1E,A1E,,,,L1E,,,N1E,N1E,N1E,N1E,,F1E,L,G,X,X,X,A
2,NC,,,0,,,,,XXXX,R2,43,,U,,,,,M,3.0,,,,,,,,,,,,,,,,,,,,,,,,X,L4E,S4E,S4E,S4E,S4E,S4F,S4F,S4F,,S4F,S4F,S4F,S4F,S4F,S4F,,S4D,S4D,,,S4D,S4D,S3D,L,E,X,X,X,C
3,CA,,,0,,,,,XXXX,R2,44,E,U,,,,,F,3.0,,,,,,,,,,,,,,,,,,,,,,,,X,L4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,,S4E,S4E,S4E,S4E,S4E,S4E,S2D,S2D,A1D,A1D,A1D,A1D,,,L,E,X,X,X,C
4,FL,,,0,X,X,,,XXXX,S2,16,E,H,,,,,F,3.0,,12.0,,,,,Y,Y,,,,Y,,,Y,,Y,,Y,,Y,,3.0,,L2F,A2F,A2F,A2F,A1D,I2D,A1E,A1E,L1D,A1E,A1E,L1D,L3D,,L3D,A2D,A2D,A3D,A3D,A3D,I4E,A3D,A3D,L,F,X,X,X,A


## Reduce gender categories

In [85]:
# reduce GENDER categories to M F and other + replace nulls
print(categorical['GENDER'].value_counts())
categorical['GENDER'] = categorical['GENDER'].fillna('F')

GENDER
F    51277
M    39094
      2957
U     1715
J      365
C        2
A        2
Name: count, dtype: int64


In [87]:
print(categorical['GENDER'].value_counts())


GENDER
F    51277
M    39094
      2957
U     1715
J      365
C        2
A        2
Name: count, dtype: int64


In [100]:
type(categorical['GENDER'].unique().tolist())

list

In [104]:
gender_unique = categorical['GENDER'].unique().tolist()
new_gender_options = ['F','M','other']
other_genders = [value for value in gender_unique if value not in new_gender_options]

def clean_gender(x):
    if x in other_genders:
        return 'other'
    else:
        return x
        
categorical['GENDER'] = list(map(clean_gender, categorical['GENDER']))

In [105]:
print(categorical['GENDER'].value_counts())


GENDER
F        51277
M        39094
other     5041
Name: count, dtype: int64


## Next steps