# Identifying gender predominant

This example shows how to use language feature to identify gender predominant.

In [1]:
MALE = 'male'
FEMALE = 'female'
UNKNOWN = 'unknown'
BOTH = 'both'

In [2]:
MALE_WORDS = set([
    'guy', 'spokesman', 'chairman', "men's", 'him', "he's", 'his',
    'boy', 'boyfriend', 'boyfriends', 'boys', 'brother', 'brothers', 'dad',
    'dads', 'dude', 'father', 'fathers', 'fiance', 'gentleman', 'gentlemen',
    'god', 'grandfather', 'grandpa', 'grandson', 'groom', 'he', 'himself',
    'husband', 'husbands', 'king', 'male', 'man', 'mr', 'nephew', 'nephews', 
    'priest', 'prince', 'son', 'sons', 'uncle', 'uncles', 'waiter', 'widower',
    'widowers'
])

In [3]:
FEMALE_WORDS = set([
    'heroine', 'spokeswoman', 'chairwoman', "women's", 'actress', 'women',
    "she's", 'her', 'aunt', 'aunts', 'bride', 'daughter', "daughters", 'female',
    'fiancee', 'girl', 'girlfriend', 'girlfriends', 'girld', 'godness',
    'granddaughter' ,'grandma', 'grandmother', 'herself,', 'ladies', 'lady',
    'mom', 'moms', 'mother', 'mothers', 'mrs', 'ms', 'niece', 'nieces', 
    'priestess', 'princess', 'queens', 'she', 'sister', 'sisters', 'waitress',
    'widow', 'widows', 'wife', 'wives', 'woman'
])

In [4]:
def genderize(words):
    mwlen = len(MALE_WORDS.intersection(words))
    fwlen = len(FEMALE_WORDS.intersection(words))
    
    if mwlen > 0 and fwlen == 0:
        return MALE
    elif mwlen == 0 and fwlen > 0:
        return FEMALE
    elif mwlen > 0 and fwlen > 0:
        return BOTH
    else:
        return UNKNOWN

In [6]:
from collections import Counter

def count_gender(sentences):
    sents = Counter()
    words = Counter()
    
    for sentence in sentences:
        gender = genderize(sentence)
        sents[gender] += 1
        words[gender] += len(sentence)
    
    return sents, words

In [64]:
import nltk

def parse_gender(text):
    sentences = [
        [word.lower() for word in nltk.word_tokenize(sentence)]
        for sentence in nltk.sent_tokenize(text)
    ]
    
    sents, words = count_gender(sentences)
    total = sum(words.values())
    
    for gender, count in words.items():
        pcent = (count / total) * 100
        nsents = sents[gender]
        
        print(
            "{:.3f}% {} ({} sentences)".format(pcent, gender, nsents)
        )

In [65]:
text = '''Weathering with You (Japanese: 天気の子, Hepburn: Tenki no Ko, lit. "Child of Weather") is a 2019 Japanese animated romance/fantasy film written and directed by Makoto Shinkai. Set in Japan during a period of exceptionally rainy weather, the film tells the story of a high-school boy who runs away from his rural home to Tokyo and befriends an orphan girl who has the ability to manipulate the weather. The film was produced by Genki Kawamura, and the music was composed by Radwimps.

The film was released in Japan on July 19, 2019; the previous day, a novel adaptation written by Shinkai—one of his original works—was published. A manga adaptation illustrated by Watari Kubota was first serialized in Kodansha's Afternoon on July 25 the same year. It was released in 140 countries throughout the world, earning over US$193.1 million worldwide and ¥14.06 billion in Japan. The film was selected as the Japanese entry for Best International Feature Film at the 92nd Academy Awards. It also received four Annie Award nominations including Best Independent Animated Feature.

The film received generally positive reviews. On the review aggregator website Rotten Tomatoes, the film has a 91% rating, with the critics' consensus saying; it is beautifully animated and narratively engaged. On Metacritic, the film also got generally favorable reactions. Individual reviewers have also praised the film for its animation, plot, music, visuals, and use of weather to convey the story's metaphor. Some compared the film with Shinkai's previous work, Your Name, criticizing it for its lack of clarity of vision and unresolved plot threads.'''

In [66]:
parse_gender(text)

76.568% unknown (12 sentences)
14.191% both (1 sentences)
9.241% male (1 sentences)
