# ANN classifiers for German noun genders

Noun data and gender extraction method using <https://github.com/karoly-varasdi/de-wiktionary-parser>.



In [2]:
## to install dewiktionaryparser:
!git clone https://github.com/karoly-varasdi/de-wiktionary-parser
!pip install de-wiktionary-parser/dist/dewiktionaryparser-1.1.1.tar.gz

Cloning into 'de-wiktionary-parser'...
remote: Enumerating objects: 120, done.[K
remote: Total 120 (delta 0), reused 0 (delta 0), pack-reused 120[K
Receiving objects: 100% (120/120), 25.99 MiB | 41.65 MiB/s, done.
Resolving deltas: 100% (42/42), done.
Processing ./de-wiktionary-parser/dist/dewiktionaryparser-1.1.1.tar.gz
Collecting prettytable (from dewiktionaryparser==1.1.1)
  Downloading https://files.pythonhosted.org/packages/ef/30/4b0746848746ed5941f052479e7c23d2b56d174b82f4fd34a25e389831f5/prettytable-0.7.2.tar.bz2
Building wheels for collected packages: dewiktionaryparser, prettytable
  Running setup.py bdist_wheel for dewiktionaryparser ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/68/af/60/cd966fa9ccc8dd8735fa850151560574069d3e9f40103713c3
  Running setup.py bdist_wheel for prettytable ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/80/34/1c/3967380d9676d162cb59513bd9dc862d0584e045a162095606
Successfully built dewiktionarypars

In [3]:
## packages for dataset preparation
import dewiktionaryparser as dw
import csv
import pandas as pd
from numpy import NaN
from sklearn.preprocessing import LabelEncoder
import re
import zipfile

## packages for NN training
from keras.models import Sequential
from keras.layers import Dense
import keras.optimizers
import keras.backend as K
from keras.models import model_from_json
import numpy as np

Using TensorFlow backend.


## Creating the dataset

Loading DE wiktionary info on common nouns.

In [4]:
zipfile.ZipFile('de-wiktionary-parser/data/de_noun_entries_commons.zip').extractall('data')
commons = dw.GermanNounEntriesDict()
commons.retrieve_from_json(r'./data/de_noun_entries_commons.json')

Retrieving dictionary from ./data/de_noun_entries_commons.json . . .
Retrieved 69558 entries.



In [5]:
!ls
!ls data

data  de-wiktionary-parser  sample_data
de_noun_entries_commons.json


### One-hot encode gender information for nouns

In [0]:
def gender_onehot_encode(noundic):
    '''One-hot encode gender information for nouns; takes a dewiktionary dictionary and 
    returns a dictionary of noun: [m, f, n] items
    such that each value is a 3-place list, where 0 means the noun does not have that gender
    and 1 means that it does.
    E.g., {'Katze': [0, 1, 0]}'''
    
    genderdic = {'m': 0, 'f': 1, 'n': 2}
    nounsandgenders = {}
    nounswithnogenders = []
    
    print("\nCollecting gender information...")
    counter = 0
    for noun in noundic:
        counter += 1
        for gender in genderdic:
            if gender in ''.join(sorted(set(dw.explore.genders(noun, noundic)))):
                # set the index of that gender in the noun entry for nounsandgenders to 1 from 0
                # e.g., for 'm' (index 0 in genderdic): [0, 0, 1] -> [1, 0, 1]
                try:
                    nounsandgenders[noun][genderdic[gender]] = 1
                except KeyError:
                    nounsandgenders.setdefault(noun, [0, 0, 0])
                    nounsandgenders[noun][genderdic[gender]] = 1
        if not noun in nounsandgenders:
            nounswithnogenders.append(noun)
    
    print("Gender information from {} nouns collected.\n\
        {} nouns with genders included.\n\
        {} nouns with no genders omitted.".format(counter, len(nounsandgenders), len(nounswithnogenders)))
                    
    return nounsandgenders
        

## In case we want to save dictionary info:
def save_dic_to_csv(dic, path=r'.\data\dic.csv', encoding='utf-8'):
    with open(path, 'w', encoding=encoding) as csv_file:
        writer = csv.writer(csv_file)
        for key, value in dic.items():
           writer.writerow([key, value])
    print("Dictionary saved successfully to {}.".format(path))

Generate gender info for German common nouns:

In [7]:
nounsandgenders = gender_onehot_encode(commons)
# save_dic_to_csv(nounsandgenders, path=r'.\data\nounsandgenders.csv')


Collecting gender information...
Gender information from 69558 nouns collected.
        69272 nouns with genders included.
        286 nouns with no genders omitted.


An example:

In [8]:
print("[male, female, neuter] genders in wiktionary for 'Reis':\n m, f, n\n{}".format(nounsandgenders['Reis']))


[male, female, neuter] genders in wiktionary for 'Reis':
 m, f, n
[1, 0, 1]


In [9]:
print(nounsandgenders['Bild'])

[0, 0, 1]


### Create character-to-integer encoder

#### Collect all characters in common nouns:

In [0]:
def collect_chars(dic):
    '''Takes a dictionary whose keys are strings and returns the set of all characters 
    used in them as a sorted list.'''
    chars = set()
    print("\nCollecting characters...")
    for word in dic:
        for char in word:
            chars.add(char)
    print("{} characters collected.".format(len(chars)))
    return sorted(chars)

In [11]:
nounchars = collect_chars(nounsandgenders)


Collecting characters...
113 characters collected.


#### Integer encoder for characters from scikit-learn

In [12]:
# encode characters occurring in common nouns as integers 
char_encoder = LabelEncoder()
integer_encoded_chars = char_encoder.fit_transform(nounchars)

# ordered list of characters, with index corresponding to integer label:
char_list = list(char_encoder.inverse_transform(range(len(nounchars))))

# only for our information, create a character-integer dictionary
char_dic = {c: char_encoder.transform([c])[0] for c in nounchars}

  if diff:


In [13]:
# the character-ingeter dictionary
print(char_dic)

{' ': 0, '&': 1, "'": 2, ',': 3, '-': 4, '.': 5, '/': 6, '0': 7, '1': 8, '2': 9, '3': 10, '4': 11, '5': 12, '6': 13, '7': 14, '8': 15, '9': 16, 'A': 17, 'B': 18, 'C': 19, 'D': 20, 'E': 21, 'F': 22, 'G': 23, 'H': 24, 'I': 25, 'J': 26, 'K': 27, 'L': 28, 'M': 29, 'N': 30, 'O': 31, 'P': 32, 'Q': 33, 'R': 34, 'S': 35, 'T': 36, 'U': 37, 'V': 38, 'W': 39, 'X': 40, 'Y': 41, 'Z': 42, 'a': 43, 'b': 44, 'c': 45, 'd': 46, 'e': 47, 'f': 48, 'g': 49, 'h': 50, 'i': 51, 'j': 52, 'k': 53, 'l': 54, 'm': 55, 'n': 56, 'o': 57, 'p': 58, 'q': 59, 'r': 60, 's': 61, 't': 62, 'u': 63, 'v': 64, 'w': 65, 'x': 66, 'y': 67, 'z': 68, 'À': 69, 'Ä': 70, 'Å': 71, 'É': 72, 'Ö': 73, 'Ü': 74, 'ß': 75, 'à': 76, 'á': 77, 'â': 78, 'ã': 79, 'ä': 80, 'å': 81, 'ç': 82, 'è': 83, 'é': 84, 'ê': 85, 'í': 86, 'ï': 87, 'ñ': 88, 'ó': 89, 'ô': 90, 'õ': 91, 'ö': 92, 'ú': 93, 'û': 94, 'ü': 95, 'ā': 96, 'č': 97, 'ē': 98, 'ī': 99, 'ł': 100, 'ō': 101, 'Œ': 102, 'œ': 103, 'Š': 104, 'ū': 105, 'ǃ': 106, 'ǧ': 107, 'α': 108, 'β': 109, 'γ': 110,

A sample encoding:

In [14]:
char_encoder.transform(['H', 'a', 'l', 'l', 'o'])

array([24, 43, 54, 54, 57])

Some nouns with weird characters:

In [15]:
for noun in nounsandgenders:
    if 'Ḫ' in noun or 'β' in noun or 'ǃ' in noun or 'ū' in noun:
        print(noun)

β-Ketobuttersäure
Kyū
ǃXóõ
Ḫāriǧit


### Creating dataframe with the features to be used by the NN

In [16]:
print("Creating df with info on gender...")
# get gender information from nounsandgenders
df = pd.DataFrame.from_dict(nounsandgenders, 
                            orient='index', 
#                            columns=['m', 'f', 'n']
                           )
df.columns = ['m', 'f', 'n']

# Now get the last 4 characters and encode for each possible character whether it is the character in the relevant position.
print("Adding information on last 4 characters (this can take a while)...")

def is_char_in_pos(char, pos, noun):
    '''Returns 1 if char in position in noun, else 0'''
    val = 0
    try:
        if noun[pos] == char:
            val = 1
    except IndexError:
        pass
    return val
        
# We iterate through each of the last 4 positions.
for pos in range(4, 0, -1):
    # and we iterate through all characters used in common nouns
    for char in char_list:
        # create a new column for the (pos, char) pair and for each noun, assign 0 or 1 to it
        df[str(pos) + '_' + str(char)] = [is_char_in_pos(char, -1 * pos, noun) for noun in df.index.format()]
        #df[str(pos) + '_' + str(char)] = [1 if len(i) >= abs(pos) and i[pos] == char else 0 for i in df.index.format()]
    

# the first five entries
df.head()

Creating df with info on gender...
Adding information on last 4 characters (this can take a while)...


Unnamed: 0,m,f,n,4_,4_&,4_',"4_,",4_-,4_.,4_/,...,1_œ,1_Š,1_ū,1_ǃ,1_ǧ,1_α,1_β,1_γ,1_Ḫ,1_’
Hallo,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Subfamilia,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Subregnum,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Subdivisio,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Phylum,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
# to check that no rows have NaNs
df[df.isnull().any(axis=1)]

Unnamed: 0,m,f,n,4_,4_&,4_',"4_,",4_-,4_.,4_/,...,1_œ,1_Š,1_ū,1_ǃ,1_ǧ,1_α,1_β,1_γ,1_Ḫ,1_’


#### Add syllable count information

* Note that **digraphs** come first in the regex pattern so that they are counted as one.
* Note that this syllable count is only an approximation, as "ie" sometimes stands for 2 syllables ("*Familie*"), sometimes 1 ("*Allergie*").

In [18]:
# the syllabic vowels/vowel combinations in German
vowelregex = re.compile(r"(aa|ee|ie|oo|au|eu|äu|ei|ai|ey|ay|a|ä|e|i|o|ö|u|ü|y)")

# adding syllable count informatoin
df['syll-count'] = [len(re.findall(vowelregex, noun.lower())) for noun in df.index.format()]

df.head()

Unnamed: 0,m,f,n,4_,4_&,4_',"4_,",4_-,4_.,4_/,...,1_Š,1_ū,1_ǃ,1_ǧ,1_α,1_β,1_γ,1_Ḫ,1_’,syll-count
Hallo,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
Subfamilia,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
Subregnum,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
Subdivisio,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
Phylum,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2


The first 5 samples with only the columns with nonnull values for any of them showing:

In [19]:
df.head().loc[:,(df.head() != 0).any(axis=0)]

Unnamed: 0,f,n,4_a,4_g,4_i,4_y,3_l,3_n,3_s,2_i,2_l,2_u,1_a,1_m,1_o,syll-count
Hallo,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,2
Subfamilia,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,5
Subregnum,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,3
Subdivisio,1,0,0,0,1,0,0,0,1,1,0,0,0,0,1,5
Phylum,0,1,0,0,0,1,1,0,0,0,0,1,0,1,0,2


### To export the dataframe into a csv

In [0]:
# df.to_csv(r'.\data\allnoungenderdf.csv', encoding='utf-8')
# print("allnoungenderdf.csv saved successfully in data folder.")


## Setting the model parameters

#### Metrics

In [0]:
## can be used as keras model metrics

def mean_pred(y_true, y_pred):
    '''The average predicted value'''
    return K.mean(y_pred)


## precision and recall definitions from older verion of keras
## https://github.com/keras-team/keras/commit/a56b1a55182acf061b1eb2e2c86b48193a0e88f7
def precision(y_true, y_pred):
	"""Precision metric.
	Only computes a batch-wise average of precision.
	Computes the precision, a metric for multi-label classification of
	how many selected items are relevant.
	"""
	true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
	predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
	precision = true_positives / (predicted_positives + K.epsilon())
	return precision

def recall(y_true, y_pred):
	"""Recall metric.
	Only computes a batch-wise average of recall.
	Computes the recall, a metric for multi-label classification of
	how many relevant items are selected.
	"""
	true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
	possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
	recall = true_positives / (possible_positives + K.epsilon())
	return recall

def fbeta_score(y_true, y_pred, beta=1):
	"""Computes the F score.
	The F score is the weighted harmonic mean of precision and recall.
	Here it is only computed as a batch-wise average, not globally.
	This is useful for multi-label classification, where input samples can be
	classified as sets of labels. By only using accuracy (precision) a model
	would achieve a perfect score by simply assigning every class to every
	input. In order to avoid this, a metric should penalize incorrect class
	assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
	computes this, as a weighted mean of the proportion of correct class
	assignments vs. the proportion of incorrect class assignments.
	With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
	correct classes becomes more important, and with beta > 1 the metric is
	instead weighted towards penalizing incorrect class assignments.
	"""
	if beta < 0:
		raise ValueError('The lowest choosable beta is zero (only precision).')
	# If there are no true positives, fix the F score at 0 like sklearn.
	if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
		return 0
	p = precision(y_true, y_pred)
	r = recall(y_true, y_pred)
	bb = beta ** 2
	fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
	return fbeta_score

def fmeasure(y_true, y_pred):
	"""Computes the f-measure, the harmonic mean of precision and recall.
	Here it is only computed as a batch-wise average, not globally.
	"""
	return fbeta_score(y_true, y_pred, beta=1)



#### The parameters

In [0]:
hidden_layer_1_nodes = 200
hidden_layer_2_nodes = 60

activation_hidden_1 = 'relu'
activation_hidden_2 = 'relu'
activation_output = 'sigmoid'
loss='binary_crossentropy'
# loss='mean_squared_error'
sgd = keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
## (The default value for learning rate for stochastic gradient descent is 0.01)
optimizer='adadelta'

## since this is a binary categorisation test, 'accuracy' is here equal to 'binary_accuracy' (predicted value >= 0.5 equals 1)
metrics=['accuracy', precision, recall, fmeasure]

epochs = 8
batch_size = 20

#sample_size = len(df)
sample_size = 20000
# 10% of the sample_size will be used for testing, 90% for training

# fix random seed for reproducibility
seed = 5

np.random.seed(seed)

## Loading the dataset for training and testing

In [0]:
# if load from file
# df = pd.read_csv(r'.\data\allnoungenderdf.csv', encoding='utf-8')

In [23]:
# select randomly items for training and testing
ngtrain = df.sample(sample_size, random_state = seed)
# select 10% of these as test items, keep the rest as training data
ngtest = ngtrain.sample(frac = .1, random_state = seed)
# now delete the test data from the training data
ngtrain = ngtrain.drop(ngtest.index)

# convert pandas dataframes into numpy arrays for the model to use:
#ngtrain = ngtrain.values
#ngtest = ngtest.values


print("Number of ngtrain items:{}".format(len(ngtrain)))
print("Number of ngtest items:{}".format(len(ngtest)))

print("\nFirst 5 ngtrain items:\n")
ngtrain[:5]


Number of ngtrain items:18000
Number of ngtest items:2000

First 5 ngtrain items:



Unnamed: 0,m,f,n,4_,4_&,4_',"4_,",4_-,4_.,4_/,...,1_Š,1_ū,1_ǃ,1_ǧ,1_α,1_β,1_γ,1_Ḫ,1_’,syll-count
Abzweig,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
Abgasmanipulation,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
Kolonialimperium,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
Käuferin,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
Busfahrer,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3


In [24]:
# create input and output as numpy arrays
# Columns 0, 1, 2 are gender info = output, the rest are the features = input
input_train = ngtrain.values[:,3:]
output_train_m = ngtrain.values[:,0]
output_train_f = ngtrain.values[:,1]
output_train_n = ngtrain.values[:,2]
input_test = ngtest.values[:,3:]
output_test_m = ngtest.values[:,0]
output_test_f = ngtest.values[:,1]
output_test_n = ngtest.values[:,2]

print("Shape of input data:\t{}".format(np.shape(input_train)))
print("Shape of male, female, neuter output data:\t{}, {}, {}".format(np.shape(output_train_m), np.shape(output_train_f), np.shape(output_train_n)))

print("\nShape of input test data:\t{}".format(np.shape(input_test)))
print("Shape of male, female, neuter output test data:\t{}, {}, {}".format(np.shape(output_test_m), np.shape(output_test_f), np.shape(output_test_n)))


Shape of input data:	(18000, 453)
Shape of male, female, neuter output data:	(18000,), (18000,), (18000,)

Shape of input test data:	(2000, 453)
Shape of male, female, neuter output test data:	(2000,), (2000,), (2000,)


In [25]:
input_train[:5]

array([[0, 0, 0, ..., 0, 0, 2],
       [0, 0, 0, ..., 0, 0, 8],
       [0, 0, 0, ..., 0, 0, 8],
       [0, 0, 0, ..., 0, 0, 3],
       [0, 0, 0, ..., 0, 0, 3]])

In [26]:
output_train_m[:5]

array([1, 0, 0, 0, 1])

## Creating the m, f, n gender classifier models

Input dimensions: 4 * 113 + 1 = 453 = `len(input_train[0])`

Output: 0 or 1

In [0]:
model_m = Sequential()
model_m.add(Dense(hidden_layer_1_nodes, input_dim=len(input_train[0]), 
                activation=activation_hidden_1))
model_m.add(Dense(hidden_layer_2_nodes,
                activation=activation_hidden_2))
model_m.add(Dense(1, activation=activation_output))

# compile the model
model_m.compile(loss=loss, optimizer=optimizer, metrics=metrics)


In [0]:
model_f = Sequential()
model_f.add(Dense(hidden_layer_1_nodes, input_dim=len(input_train[0]), 
                activation=activation_hidden_1))
model_f.add(Dense(hidden_layer_2_nodes,
                activation=activation_hidden_2))
model_f.add(Dense(1, activation=activation_output))

# compile the model
model_f.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [0]:
model_n = Sequential()
model_n.add(Dense(hidden_layer_1_nodes, input_dim=len(input_train[0]), 
                activation=activation_hidden_1))
model_n.add(Dense(hidden_layer_2_nodes,
                activation=activation_hidden_2))
model_n.add(Dense(1, activation=activation_output))

# compile the model
model_n.compile(loss=loss, optimizer=optimizer, metrics=metrics)

### Training the models

In [30]:
model_m.fit(input_train, output_train_m, epochs=epochs, batch_size = batch_size)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f043e7865c0>

In [31]:
model_f.fit(input_train, output_train_f, epochs=epochs, batch_size = batch_size)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f043e7864e0>

In [32]:
model_n.fit(input_train, output_train_n, epochs=epochs, batch_size = batch_size)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f0437d26f60>

## Saving the models

In [0]:
## Save complete models (architecture, weights, traing config):
# model_m.save("models/model_m.h5")
# model_f.save("models/model_f.h5")
# model_n.save("models/model_n.h5")

## Save only architecture
model_arch = model_m.to_json()
# model reconstruction from JSON:
# new_model = model_from_json(model_arch)


## Model summary

In [56]:
# model_m.output_shape
model_m.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200)               90800     
_________________________________________________________________
dense_2 (Dense)              (None, 60)                12060     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 61        
Total params: 102,921
Trainable params: 102,921
Non-trainable params: 0
_________________________________________________________________


In [55]:
model_m_weights = model_m.get_weights()
model_m_config = model_m.get_config()

from pprint import pprint
pprint(model_m_config)

{'layers': [{'class_name': 'Dense',
             'config': {'activation': 'relu',
                        'activity_regularizer': None,
                        'batch_input_shape': (None, 453),
                        'bias_constraint': None,
                        'bias_initializer': {'class_name': 'Zeros',
                                             'config': {}},
                        'bias_regularizer': None,
                        'dtype': 'float32',
                        'kernel_constraint': None,
                        'kernel_initializer': {'class_name': 'VarianceScaling',
                                               'config': {'distribution': 'uniform',
                                                          'mode': 'fan_avg',
                                                          'scale': 1.0,
                                                          'seed': None}},
                        'kernel_regularizer': None,
                        'name': 'dense_1',
 

## Evaluating the model

In [35]:
scores_m = model_m.evaluate(input_test, output_test_m)
scores_f = model_f.evaluate(input_test, output_test_f)
scores_n = model_n.evaluate(input_test, output_test_n)

print("\n-----------\nMetrics of the models:\n-----------")
print("         \tmale\tfemale\tneuter")
for i in range(len(scores_m)):
    print("%s:    \t%.2f\t%.2f\t%.2f" % (model_m.metrics_names[i], scores_m[i], scores_f[i], scores_n[i]))
    


-----------
Metrics of the models:
-----------
         	male	female	neuter
loss:    	0.31	0.20	0.27
acc:    	0.87	0.92	0.89
precision:    	0.82	0.90	0.77
recall:    	0.83	0.92	0.74
fmeasure:    	0.82	0.90	0.74


#### Scikit-learn classification report

In [36]:
from sklearn.metrics import classification_report
pred_m = model_m.predict(input_test)
pred_f = model_f.predict(input_test)
pred_n = model_n.predict(input_test)

class_pred_m = np.round(pred_m).astype(int).flatten()
class_pred_f = np.round(pred_f).astype(int).flatten()
class_pred_n = np.round(pred_n).astype(int).flatten()

print("Classification report for male classifier:")
print(classification_report(output_test_m, class_pred_m))
print("\nClassification report for female classifier:")
print(classification_report(output_test_f, class_pred_f))
print("\nClassification report for neuter classifier:")
print(classification_report(output_test_n, class_pred_n))


#from sklearn.metrics import confusion_matrix, precision_score, recall_score
# def tn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0]
# def fp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 1]
# def fn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 0]
# def tp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 1]


from sklearn.metrics import precision_recall_fscore_support
precrecf1_m = precision_recall_fscore_support(output_test_m, class_pred_m)
precrecf1_f = precision_recall_fscore_support(output_test_f, class_pred_f)
precrecf1_n = precision_recall_fscore_support(output_test_n, class_pred_n)

prec_rec_f1_support_mfn = []
for i in range(3):
    try:
        val = sum([precrecf1_m[i][1]*precrecf1_m[3][1], precrecf1_f[i][1]*precrecf1_f[3][1], precrecf1_n[i][1]*precrecf1_n[3][1]]) / sum([precrecf1_m[3][1], precrecf1_f[3][1], precrecf1_n[3][1]])
    except ZeroDivisionError:
        val = 0
    prec_rec_f1_support_mfn.append(val)
    

basic_metrics_labels = ["precision", "recall", "f1-score", "support"]
print("\n------------------------------------\nWeighted mean metrics of the three gender classifier models:\n------------------------------------")
#print("\t".join(basic_metrics_labels))
#basic_metrics_means = []
for i in range(3):
    #basic_metrics_means.append(round(np.mean([precrecf1_m[i], precrecf1_f[i], precrecf1_n[i]]), 2))
    print("{}  \t{}".format(basic_metrics_labels[i], round(prec_rec_f1_support_mfn[i], 4)))
    #print("{}  \t{}".format(basic_metrics_labels[i], np.mean([precrecf1_m[i], precrecf1_f[i], precrecf1_n[i]])))
print("------------------------------------")

Classification report for male classifier:
             precision    recall  f1-score   support

          0       0.90      0.89      0.89      1229
          1       0.82      0.84      0.83       771

avg / total       0.87      0.87      0.87      2000


Classification report for female classifier:
             precision    recall  f1-score   support

          0       0.94      0.92      0.93      1165
          1       0.89      0.92      0.91       835

avg / total       0.92      0.92      0.92      2000


Classification report for neuter classifier:
             precision    recall  f1-score   support

          0       0.93      0.94      0.93      1560
          1       0.77      0.74      0.76       440

avg / total       0.89      0.89      0.89      2000


------------------------------------
Weighted mean metrics of the three gender classifier models:
------------------------------------
precision  	0.8399
recall  	0.85
f1-score  	0.8448
---------------------------------


#### For checking prediction of model for a single noun

Encoding a single noun as input to the models

In [0]:
def encode_noun(noun):
    '''Encodes a noun as np array with the required input features (last 4 character info + syllable count).'''
    info_for_input = []
    for pos in range(4, 0, -1):
        for char in char_list:
             info_for_input.append(is_char_in_pos(char, -1 * pos, noun))
        
    info_for_input.append(len(re.findall(vowelregex, noun.lower())))
    
    return np.array(info_for_input)

In [38]:
encode_noun('Hallo')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [0]:
def single_input(noun, model):
    return model.predict( np.array( [encode_noun(noun),] )  )

def multiple_input(nounlist, model):
    return [model.predict( np.array( [encode_noun(noun),] )  ) for noun in nounlist] 



In [40]:

print("Female model output for 'Katze':", single_input('Katze', model_f))

print("Male model output for 'Hund', 'Bär', 'Wasimmer:")
print(multiple_input(['Hund', 'Bär', 'Wasimmer'], model_m))

Female model output for 'Katze': [[0.9800426]]
Male model output for 'Hund', 'Bär', 'Wasimmer:
[array([[0.9406437]], dtype=float32), array([[0.9392669]], dtype=float32), array([[0.5297212]], dtype=float32)]


#### User-friendly version for guessing at the gender of a noun

In [0]:
def guess_gender(noun):
    gender_outputs = [round(single_input(noun, model)[0][0], 4) for model in [model_m, model_f, model_n]]
    print("The estimated probabilities for '{}' having the gender...".format(noun))
    print("male:\t{}%\nfemale:\t{}%\nneutral:\t{}%\n".format(int(round(gender_outputs[0]*100)), int(round(gender_outputs[1]*100)), int(round(gender_outputs[2]*100))))
    
    if noun in nounsandgenders:
        genders = ['male', 'female', 'neuter']
        noungenders = []
        for i in range(3):
            if nounsandgenders[noun][i] > .5:
                noungenders.append(genders[i])
        genderstr = ', '.join(noungenders)
        print("The actual gender(s) of {} based on Wiktionary:\t{}".format(noun, genderstr))

In [42]:
guess_gender('Dielung')

The estimated probabilities for 'Dielung' having the gender...
male:	1%
female:	99%
neutral:	0%



In [43]:
guess_gender('Wiederholung')

The estimated probabilities for 'Wiederholung' having the gender...
male:	0%
female:	100%
neutral:	0%

The actual gender(s) of Wiederholung based on Wiktionary:	female


In [44]:
guess_gender('Mädchen')

The estimated probabilities for 'Mädchen' having the gender...
male:	9%
female:	1%
neutral:	97%

The actual gender(s) of Mädchen based on Wiktionary:	neuter


In [45]:
guess_gender('Sälwiemerchen')

The estimated probabilities for 'Sälwiemerchen' having the gender...
male:	8%
female:	2%
neutral:	95%



In [46]:
guess_gender('Bild')

The estimated probabilities for 'Bild' having the gender...
male:	19%
female:	0%
neutral:	97%

The actual gender(s) of Bild based on Wiktionary:	neuter


In [57]:
guess_gender('Anything')

The estimated probabilities for 'Anything' having the gender...
male:	40%
female:	3%
neutral:	49%

