## Overview
- [Testing on full names](#test_full)
    - [Testing against the train/dev sets](#train_dev)
    - [Testing against the test set](#test_set)
- [Test on first names](#first_name)
    - [Testing against the train/dev sets](#train_dev2)
    - [Testing against the test set](#test_set2)

In [3]:
from genderLR import GenderLR
from utils import *

gender = GenderLR()

<a name='test_full'></a>
## Testing on full names

<a name='train_dev'></a>
### Testing against the train/dev sets

In [4]:
train_ds_all = data_loader('data/train_ds.txt')
train_ds, train_ds_dev = train_ds_all[:100000], train_ds_all[100000:]
len(train_ds), train_ds[:5]

(100000,
 [['阎莹暂', 'F'], ['吕荣辉', 'M'], ['曾泽彬', 'M'], ['董二庄', 'M'], ['华治权', 'M']])

In [5]:
# test the accuracy of the model on the selected train set
# including those gender undefined cases
accu = gender.accuracy(train_ds, full_name=True)
accu, gender.mismatch[:20]

(0.95492,
 [['name', 'gender', 'pred', 'prob'],
  ['贾晔', 'U', 'M', 0.40640049019849533],
  ['林文冰', 'U', 'M', 0.6743175415253797],
  ['宋文会', 'U', 'M', 0.6135669859259315],
  ['吴乐懿', 'U', 'M', 0.8300395610175295],
  ['林水荣', 'U', 'M', 0.9270598936609558],
  ['施正漪', 'M', 'F', 0.7704180986894983],
  ['吉晗', 'M', 'F', 0.6536717990218155],
  ['柯逸', 'U', 'M', 0.7354662573578914],
  ['终晨云', 'M', 'F', 0.5007160117983905],
  ['张晓华', 'M', 'F', 0.5262898528073406],
  ['吴宝懿', 'U', 'M', 0.939861336175106],
  ['赵珣', 'U', 'M', 0.47723471469191386],
  ['魏俊华', 'U', 'M', 0.8885340496765125],
  ['卞郡', 'M', 'F', 0.4492423468024235],
  ['陈游', 'U', 'M', 0.8091822301550559],
  ['于树玉', 'U', 'M', 0.7308590990958171],
  ['段常桃', 'M', 'F', 0.5039338213337725],
  ['荷英', 'U', 'F', 0.8609835356439166],
  ['黄杨华', 'U', 'M', 0.7864069411425665]])

In [6]:
# test the accuracy of the model on the selected train set
# excluding those gender undefined cases
accu = gender.accuracy(train_ds, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.9773533452256464,
 [['name', 'gender', 'pred', 'prob'],
  ['施正漪', 'M', 'F', 0.7704180986894983],
  ['吉晗', 'M', 'F', 0.6536717990218155],
  ['终晨云', 'M', 'F', 0.5007160117983905],
  ['张晓华', 'M', 'F', 0.5262898528073406],
  ['卞郡', 'M', 'F', 0.4492423468024235],
  ['段常桃', 'M', 'F', 0.5039338213337725],
  ['李旭彤', 'F', 'M', 0.7795938004611246],
  ['乔海云', 'F', 'M', 0.6367215582947406],
  ['游松菱', 'M', 'F', 0.6828052234934916],
  ['闻思嘉', 'M', 'F', 0.529137167730791],
  ['卓华昕', 'M', 'F', 0.44526170742995663],
  ['张艳忠', 'M', 'F', 0.6786791188738392],
  ['郁琦', 'M', 'F', 0.3581180542375616],
  ['边新捧', 'F', 'M', 0.5762718239593515],
  ['池寿丹', 'M', 'F', 0.8798694099727873],
  ['苗云清', 'M', 'U', 0.3532734617770906],
  ['李桂阳', 'F', 'M', 0.5339663638265105],
  ['曾祥英', 'F', 'M', 0.6135133617781545],
  ['关晶源', 'F', 'M', 0.5065397044341299]])

**For the rest of the original train set**

In [7]:
# test the accuracy of the model on the rest of the original train set
# including those gender undefined cases

accu = gender.accuracy(train_ds_dev, full_name=True)
accu, gender.mismatch[:20]

(0.9466896351158294,
 [['name', 'gender', 'pred', 'prob'],
  ['刘博今', 'U', 'M', 0.9707047450933136],
  ['德春', 'F', 'M', 0.37994922908444184],
  ['王水', 'U', 'M', 0.7747237307066903],
  ['白若清', 'M', 'F', 0.5148961424899021],
  ['萧丁', 'U', 'M', 0.7661898310914489],
  ['果叶', 'U', 'F', 0.742756560737446],
  ['张玉乐', 'U', 'M', 0.45458259843519183],
  ['聂秀国', 'M', 'F', 0.5478546574622881],
  ['卓礼萱', 'M', 'F', 0.5602594339563255],
  ['巩亚卓', 'M', 'F', 0.5366690277861579],
  ['公羊云平', 'M', 'U', 0.5071093333990999],
  ['邱春荣', 'F', 'M', 0.7478654961817557],
  ['王一晓', 'U', 'M', 0.7286339835935126],
  ['倪钰滋', 'M', 'F', 0.5048349551733823],
  ['潘潇汝', 'F', 'M', 0.5003482503014777],
  ['汤煊怡', 'F', 'M', 0.5415553325343744],
  ['濮阳晓晓', 'F', 'U', 0.47603372050175013],
  ['林清荣', 'U', 'M', 0.8549075931670357],
  ['黎喜', 'U', 'M', 0.790668723733955]])

In [8]:
# test the accuracy of the model on the rest of the original train set
# excluding those gender undefined cases

accu = gender.accuracy(train_ds_dev, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.9702947322091952,
 [['name', 'gender', 'pred', 'prob'],
  ['德春', 'F', 'M', 0.37994922908444184],
  ['白若清', 'M', 'F', 0.5148961424899021],
  ['聂秀国', 'M', 'F', 0.5478546574622881],
  ['卓礼萱', 'M', 'F', 0.5602594339563255],
  ['巩亚卓', 'M', 'F', 0.5366690277861579],
  ['公羊云平', 'M', 'U', 0.5071093333990999],
  ['邱春荣', 'F', 'M', 0.7478654961817557],
  ['倪钰滋', 'M', 'F', 0.5048349551733823],
  ['潘潇汝', 'F', 'M', 0.5003482503014777],
  ['汤煊怡', 'F', 'M', 0.5415553325343744],
  ['濮阳晓晓', 'F', 'U', 0.47603372050175013],
  ['邵舒阳', 'M', 'F', 0.5277071334916746],
  ['广雨辰', 'F', 'M', 0.858384099403983],
  ['齐金玉', 'M', 'F', 0.6371652871900674],
  ['李璞', 'F', 'M', 0.8078102328127373],
  ['郝春华', 'M', 'F', 0.3646456564000686],
  ['耿义雅', 'M', 'F', 0.6583017865288876],
  ['溥卓诗', 'M', 'F', 0.5504687395587152],
  ['明群', 'M', 'U', 0.5271229497252596]])

**For the original dev set**

In [10]:
# loading the original dev set
dev_ds = data_loader('data/dev_ds.txt')
len(dev_ds), dev_ds[:5]

(365811,
 [['冯瑞琳', 'F'], ['曹凯棋', 'M'], ['危义祥', 'M'], ['强识闻', 'M'], ['钮缤鲃', 'M']])

In [11]:
# test the accuracy of the model on the original dev set
# including those gender undefined cases

accu = gender.accuracy(dev_ds, full_name=True)
accu, gender.mismatch[:20]

(0.94644502215625,
 [['name', 'gender', 'pred', 'prob'],
  ['付睿', 'U', 'M', 0.8311858450077128],
  ['郭连', 'U', 'F', 0.6128853366948264],
  ['游丁', 'U', 'M', 0.7661898310914489],
  ['林乔', 'U', 'M', 0.46442977434012034],
  ['闫韶华', 'U', 'M', 0.8835874436603273],
  ['李思宁', 'M', 'F', 0.5513951408603314],
  ['祝詹晨', 'M', 'U', 0.4289339476065103],
  ['车萃卜', 'F', 'M', 0.5218822803705685],
  ['刘畏', 'U', 'M', 0.5632632879903952],
  ['巢云夕', 'M', 'F', 0.5139186479273077],
  ['南英', 'U', 'F', 0.8609835356439166],
  ['武亭', 'F', 'M', 0.5835804599462253],
  ['汤云瑞', 'M', 'F', 0.5278466163026296],
  ['容庚', 'U', 'M', 0.7025191336050127],
  ['李润培', 'U', 'M', 0.948242222916413],
  ['潘雪明', 'U', 'F', 0.5222983314582598],
  ['罗九', 'U', 'M', 0.8759067846656167],
  ['杨棣华', 'U', 'M', 0.7546019635927789],
  ['隋文', 'U', 'M', 0.6451627573058977]])

In [12]:
# test the accuracy of the model on the original dev set
# excluding those gender undefined cases

accu = gender.accuracy(dev_ds, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.9705512482947177,
 [['name', 'gender', 'pred', 'prob'],
  ['李思宁', 'M', 'F', 0.5513951408603314],
  ['祝詹晨', 'M', 'U', 0.4289339476065103],
  ['车萃卜', 'F', 'M', 0.5218822803705685],
  ['巢云夕', 'M', 'F', 0.5139186479273077],
  ['武亭', 'F', 'M', 0.5835804599462253],
  ['汤云瑞', 'M', 'F', 0.5278466163026296],
  ['孙坤玥', 'M', 'F', 0.5255140277152626],
  ['诸乔楚', 'M', 'F', 0.5397862223520067],
  ['宗继红', 'M', 'F', 0.5753134922560033],
  ['纪会会', 'F', 'U', 0.5074093787912801],
  ['毕琳伟', 'M', 'F', 0.7279461459535833],
  ['孙军红', 'F', 'M', 0.9009563144219227],
  ['张文瑜', 'M', 'F', 0.6038500746856287],
  ['龙佳桤', 'M', 'F', 0.7706763303877168],
  ['严滠芳', 'M', 'F', 0.9982434513411969],
  ['毛银', 'F', 'M', 0.6033869728115671],
  ['鲍晗', 'M', 'F', 0.6536717990218155],
  ['祝舍明', 'M', 'U', 0.5650848281413636],
  ['汪砰', 'M', 'U', 0.554112070901956]])

<a name='test_set'></a>
## Testing against the test set

In [13]:
# loading the original test set
# please note that the test set contains full names
test_ds = data_loader('data/test_ds.txt')
len(test_ds), test_ds[:5]

(365811,
 [['邬爱清', 'F'], ['杜文吕', 'M'], ['任千焱', 'M'], ['鲍梦冉', 'F'], ['薛俊霖', 'M']])

In [14]:
# test the accuracy of the model on the original test set
# including those gender undefined cases

accu = gender.accuracy(test_ds, full_name=True)
accu, gender.mismatch[:20]

(0.9462728020753887,
 [['name', 'gender', 'pred', 'prob'],
  ['顾仁疋', 'F', 'M', 0.951494650027016],
  ['幸路', 'U', 'M', 0.7046140119549571],
  ['商禹', 'U', 'M', 0.9143276590605068],
  ['易宛其', 'M', 'F', 0.7085827978822331],
  ['蔚韦君', 'M', 'F', 0.7219030270992229],
  ['任爰好', 'M', 'F', 0.7521378056877494],
  ['童仕君', 'U', 'M', 0.8478630748258673],
  ['杨晓一', 'U', 'M', 0.7286339835935126],
  ['舒海华', 'U', 'M', 0.7885843833905349],
  ['卢少冰', 'U', 'M', 0.7701917415910329],
  ['范文佳', 'M', 'F', 0.45505228616429405],
  ['史梵', 'M', 'F', 0.39515143017123255],
  ['吴昕阳', 'U', 'M', 0.9279654532438039],
  ['王春清', 'F', 'M', 0.7151642962315684],
  ['农月部', 'M', 'F', 0.9903755821554568],
  ['淳于文灵', 'M', 'F', 0.5427887968864502],
  ['井建华', 'U', 'M', 0.8129042376544283],
  ['辜永红', 'F', 'M', 0.702495246245912],
  ['安彬', 'U', 'M', 0.8379973144736247]])

In [15]:
# test the accuracy of the model on the original test set
# excluding those gender undefined cases

accu = gender.accuracy(test_ds, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.970174685114852,
 [['name', 'gender', 'pred', 'prob'],
  ['顾仁疋', 'F', 'M', 0.951494650027016],
  ['易宛其', 'M', 'F', 0.7085827978822331],
  ['蔚韦君', 'M', 'F', 0.7219030270992229],
  ['任爰好', 'M', 'F', 0.7521378056877494],
  ['范文佳', 'M', 'F', 0.45505228616429405],
  ['史梵', 'M', 'F', 0.39515143017123255],
  ['王春清', 'F', 'M', 0.7151642962315684],
  ['农月部', 'M', 'F', 0.9903755821554568],
  ['淳于文灵', 'M', 'F', 0.5427887968864502],
  ['辜永红', 'F', 'M', 0.702495246245912],
  ['聂驯侘', 'M', 'U', 0.554112070901956],
  ['李来月', 'F', 'M', 0.788230634374804],
  ['宁文蔚', 'M', 'F', 0.5391734909647223],
  ['常海仪', 'M', 'F', 0.7240201061900376],
  ['蓝柳扬', 'F', 'M', 0.5368824575412814],
  ['尹昕', 'F', 'M', 0.4691869057902933],
  ['查群', 'F', 'U', 0.5271229497252596],
  ['雷琦琦', 'M', 'F', 0.3581180542375616],
  ['家雅凯', 'M', 'F', 0.6117890475114353]])

<a name='first_name'></a>
## Test on first names

Rerun the above algorithms again, including splitting the train set into train_ds and train_dev

In [16]:
train_ds_all = data_loader('data/train_ds.txt', full_name=False)
train_ds, train_ds_dev = train_ds_all[:100000], train_ds_all[100000:]
len(train_ds), train_ds[:5]

(100000, [['莹暂', 'F'], ['荣辉', 'M'], ['泽彬', 'M'], ['二庄', 'M'], ['治权', 'M']])

In [17]:
# test the accuracy of the model on the selected train set
# including those gender undefined cases
accu = gender.accuracy(train_ds, full_name=True)
accu, gender.mismatch[:20]

(0.8571,
 [['name', 'gender', 'pred', 'prob'],
  ['晔', 'U', 'M', 0.40640049019849533],
  ['洪君', 'M', 'U', 0.5206657693467208],
  ['俊', 'M', 'U', 0.554112070901956],
  ['苗利', 'F', 'M', 0.5620270927619253],
  ['达', 'M', 'U', 0.554112070901956],
  ['忠华', 'M', 'U', 0.5883437014656338],
  ['美啸', 'F', 'M', 0.8481864322051416],
  ['文冰', 'U', 'F', 0.5127995647795527],
  ['磊', 'M', 'U', 0.554112070901956],
  ['墨', 'M', 'U', 0.554112070901956],
  ['斐然', 'F', 'M', 0.5523766287348633],
  ['乐懿', 'U', 'F', 0.5040915502527811],
  ['水荣', 'U', 'M', 0.6172585693920711],
  ['玉青', 'F', 'U', 0.4250337695369403],
  ['正漪', 'M', 'F', 0.9992333658673426],
  ['晗', 'M', 'F', 0.6536717990218155],
  ['逸', 'U', 'M', 0.7354662573578914],
  ['丹彬', 'F', 'M', 0.8379973144736247],
  ['庆', 'M', 'U', 0.554112070901956]])

In [18]:
# test the accuracy of the model on the selected train set
# excluding those gender undefined cases

accu = gender.accuracy(train_ds, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.8686822086895901,
 [['name', 'gender', 'pred', 'prob'],
  ['洪君', 'M', 'U', 0.5206657693467208],
  ['俊', 'M', 'U', 0.554112070901956],
  ['苗利', 'F', 'M', 0.5620270927619253],
  ['达', 'M', 'U', 0.554112070901956],
  ['忠华', 'M', 'U', 0.5883437014656338],
  ['美啸', 'F', 'M', 0.8481864322051416],
  ['磊', 'M', 'U', 0.554112070901956],
  ['墨', 'M', 'U', 0.554112070901956],
  ['斐然', 'F', 'M', 0.5523766287348633],
  ['玉青', 'F', 'U', 0.4250337695369403],
  ['正漪', 'M', 'F', 0.9992333658673426],
  ['晗', 'M', 'F', 0.6536717990218155],
  ['丹彬', 'F', 'M', 0.8379973144736247],
  ['庆', 'M', 'U', 0.554112070901956],
  ['晨云', 'M', 'U', 0.5925564159943968],
  ['雪逸', 'F', 'M', 0.7354662573578914],
  ['晓华', 'M', 'U', 0.5883437014656338],
  ['兴敏', 'M', 'F', 0.5106319045316918],
  ['乐', 'M', 'U', 0.554112070901956]])

In [19]:
# test the accuracy of the model on the rest of the original train set
# including those gender undefined cases

accu = gender.accuracy(train_ds_dev, full_name=True)
accu, gender.mismatch[:20]

(0.8493090006460319,
 [['name', 'gender', 'pred', 'prob'],
  ['禹含', 'M', 'F', 0.9142187596403223],
  ['娜', 'F', 'U', 0.554112070901956],
  ['改青', 'F', 'U', 0.4250337695369403],
  ['勇', 'M', 'U', 0.554112070901956],
  ['天音', 'M', 'F', 0.9789635578956217],
  ['春', 'F', 'U', 0.554112070901956],
  ['岩', 'M', 'U', 0.554112070901956],
  ['月严', 'F', 'M', 0.7863604765607316],
  ['翦敏', 'M', 'F', 0.5106319045316918],
  ['东冬', 'M', 'F', 0.47053096641042813],
  ['子', 'M', 'U', 0.554112070901956],
  ['韶', 'M', 'U', 0.554112070901956],
  ['玉珂', 'F', 'M', 0.5488373229938105],
  ['丽峰', 'F', 'M', 0.7735170873161514],
  ['路', 'M', 'U', 0.554112070901956],
  ['雪平', 'F', 'M', 0.5839001827674885],
  ['坚', 'M', 'U', 0.554112070901956],
  ['玉乐', 'U', 'M', 0.783239266426579],
  ['朱雨', 'M', 'F', 0.4070793987584367]])

In [20]:
# test the accuracy of the model on the rest of the original train set
# excluding those gender undefined cases
accu = gender.accuracy(train_ds_dev, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.861629252286152,
 [['name', 'gender', 'pred', 'prob'],
  ['禹含', 'M', 'F', 0.9142187596403223],
  ['娜', 'F', 'U', 0.554112070901956],
  ['改青', 'F', 'U', 0.4250337695369403],
  ['勇', 'M', 'U', 0.554112070901956],
  ['天音', 'M', 'F', 0.9789635578956217],
  ['春', 'F', 'U', 0.554112070901956],
  ['岩', 'M', 'U', 0.554112070901956],
  ['月严', 'F', 'M', 0.7863604765607316],
  ['翦敏', 'M', 'F', 0.5106319045316918],
  ['东冬', 'M', 'F', 0.47053096641042813],
  ['子', 'M', 'U', 0.554112070901956],
  ['韶', 'M', 'U', 0.554112070901956],
  ['玉珂', 'F', 'M', 0.5488373229938105],
  ['丽峰', 'F', 'M', 0.7735170873161514],
  ['路', 'M', 'U', 0.554112070901956],
  ['雪平', 'F', 'M', 0.5839001827674885],
  ['坚', 'M', 'U', 0.554112070901956],
  ['朱雨', 'M', 'F', 0.4070793987584367],
  ['红遥', 'F', 'M', 0.4614531905281432]])

**For the original dev set**

In [21]:
# loading the original dev set
dev_ds = data_loader('data/dev_ds.txt', full_name=False)
len(dev_ds), dev_ds[:5]

(365811, [['瑞琳', 'F'], ['凯棋', 'M'], ['义祥', 'M'], ['识闻', 'M'], ['缤鲃', 'M']])

In [22]:
# test the accuracy of the model on the original dev set
# including those gender undefined cases

accu = gender.accuracy(dev_ds, full_name=True)
accu, gender.mismatch[:20]

(0.850081052784088,
 [['name', 'gender', 'pred', 'prob'],
  ['梅云', 'F', 'U', 0.5925564159943968],
  ['穗君', 'F', 'U', 0.5206657693467208],
  ['梦饶', 'F', 'M', 0.5512089884475259],
  ['佳嘉', 'F', 'M', 0.6504799666787816],
  ['玉墨', 'F', 'M', 0.6682442165247393],
  ['承隍', 'M', 'U', 0.554112070901956],
  ['艳砚', 'F', 'M', 0.5814055986164764],
  ['天秘', 'M', 'F', 0.5832588205825965],
  ['凤辰', 'F', 'M', 0.7723805377370389],
  ['子懿', 'M', 'F', 0.5040915502527811],
  ['宇烟', 'M', 'F', 0.9808327176375053],
  ['奇', 'M', 'U', 0.554112070901956],
  ['佳臻', 'F', 'M', 0.7452530112063541],
  ['明华', 'M', 'U', 0.5883437014656338],
  ['凤桐', 'F', 'M', 0.5139461732815358],
  ['智', 'M', 'U', 0.554112070901956],
  ['睿', 'U', 'M', 0.8311858450077128],
  ['磊', 'M', 'U', 0.554112070901956],
  ['文藻', 'M', 'U', 0.6607274090794647]])

In [23]:
# test the accuracy of the model on the original dev set
# excluding those gender undefined cases

accu = gender.accuracy(dev_ds, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.8624882805701678,
 [['name', 'gender', 'pred', 'prob'],
  ['梅云', 'F', 'U', 0.5925564159943968],
  ['穗君', 'F', 'U', 0.5206657693467208],
  ['梦饶', 'F', 'M', 0.5512089884475259],
  ['佳嘉', 'F', 'M', 0.6504799666787816],
  ['玉墨', 'F', 'M', 0.6682442165247393],
  ['承隍', 'M', 'U', 0.554112070901956],
  ['艳砚', 'F', 'M', 0.5814055986164764],
  ['天秘', 'M', 'F', 0.5832588205825965],
  ['凤辰', 'F', 'M', 0.7723805377370389],
  ['子懿', 'M', 'F', 0.5040915502527811],
  ['宇烟', 'M', 'F', 0.9808327176375053],
  ['奇', 'M', 'U', 0.554112070901956],
  ['佳臻', 'F', 'M', 0.7452530112063541],
  ['明华', 'M', 'U', 0.5883437014656338],
  ['凤桐', 'F', 'M', 0.5139461732815358],
  ['智', 'M', 'U', 0.554112070901956],
  ['磊', 'M', 'U', 0.554112070901956],
  ['文藻', 'M', 'U', 0.6607274090794647],
  ['平', 'M', 'U', 0.554112070901956]])

<a name='test_set'></a>
## Testing against the test set

In [24]:
# loading the original test set
test_ds = data_loader('data/test_ds.txt', full_name=False)
len(test_ds), test_ds[:5]

(365811, [['爱清', 'F'], ['文吕', 'M'], ['千焱', 'M'], ['梦冉', 'F'], ['俊霖', 'M']])

In [25]:
# test the accuracy of the model on the original test set
# including those gender undefined cases

accu = gender.accuracy(test_ds, full_name=True)
accu, gender.mismatch[:20]

(0.8480636175511398,
 [['name', 'gender', 'pred', 'prob'],
  ['爱清', 'F', 'M', 0.6147397168213984],
  ['俊酆', 'M', 'U', 0.5326388489406404],
  ['白', 'F', 'U', 0.554112070901956],
  ['紫乔', 'F', 'M', 0.46442977434012034],
  ['令群', 'M', 'U', 0.5271229497252596],
  ['年华', 'M', 'U', 0.5883437014656338],
  ['建', 'M', 'U', 0.554112070901956],
  ['芮姚', 'F', 'M', 0.35295753964074583],
  ['松', 'M', 'U', 0.554112070901956],
  ['静文', 'F', 'M', 0.6451627573058977],
  ['玄琪', 'M', 'F', 0.5650839376409527],
  ['文郡', 'M', 'F', 0.4492423468024235],
  ['美文', 'F', 'M', 0.6451627573058977],
  ['韦君', 'M', 'U', 0.5206657693467208],
  ['苍', 'M', 'U', 0.554112070901956],
  ['凤营', 'F', 'M', 0.8360031558027466],
  ['惠勤', 'F', 'U', 0.4593824165096386],
  ['贺白', 'M', 'F', 0.6910497310419871],
  ['萧', 'F', 'U', 0.554112070901956]])

In [26]:
# test the accuracy of the model on the original test set
# excluding those gender undefined cases

accu = gender.accuracy(test_ds, full_name=True, exclude_U=True)
accu, gender.mismatch[:20]

(0.860473668106703,
 [['name', 'gender', 'pred', 'prob'],
  ['爱清', 'F', 'M', 0.6147397168213984],
  ['俊酆', 'M', 'U', 0.5326388489406404],
  ['白', 'F', 'U', 0.554112070901956],
  ['紫乔', 'F', 'M', 0.46442977434012034],
  ['令群', 'M', 'U', 0.5271229497252596],
  ['年华', 'M', 'U', 0.5883437014656338],
  ['建', 'M', 'U', 0.554112070901956],
  ['芮姚', 'F', 'M', 0.35295753964074583],
  ['松', 'M', 'U', 0.554112070901956],
  ['静文', 'F', 'M', 0.6451627573058977],
  ['玄琪', 'M', 'F', 0.5650839376409527],
  ['文郡', 'M', 'F', 0.4492423468024235],
  ['美文', 'F', 'M', 0.6451627573058977],
  ['韦君', 'M', 'U', 0.5206657693467208],
  ['苍', 'M', 'U', 0.554112070901956],
  ['凤营', 'F', 'M', 0.8360031558027466],
  ['惠勤', 'F', 'U', 0.4593824165096386],
  ['贺白', 'M', 'F', 0.6910497310419871],
  ['萧', 'F', 'U', 0.554112070901956]])