# Lemmatization & Stemming of All Words in Dictionaries
Since the website's code was modified such that it is able to highlight any word in text that matches a word in any of the arrays/dictionaries plus any **suffix** inflections of the words, we will now reduce the current dictionary words to their stems. Many of the words currently in the dictionaries appear only in their inflected forms, meaning we risk not detecting the stems and other inflections of these words. For instance, if we only have the word 'collapsing' in our dictionary, ASTRSC will not be able to detect the words 'collapse', 'collapses', and 'collapsed'. Hence, in reducing 'collapsing' to its stem, ASTRSC would be able to detect all listed words. 

Author: Anabelle Colmenares

## Import Libraries
We will first import all necessary libraries

In [1]:
from nltk.stem.snowball import SnowballStemmer

## Process Arrays
We will then insert all current arrays into a dictionary and process them to extract the stems of all words in them. 

In [2]:
whitelistArray = [
    "pan-african",
    "pan african",
    "african studies",
    "african union",
    "south africa",
    "west africa",
    "north africa",
    "east africa",
    "southern africa",
    "northern africa",
    "eastern africa",
    "western africa",
    "african american",
  ]

generalizationArray = [
    "africa",
    "subsaharan",
    "continent",
    "sub-saharan",
  ]

tribalismArray = [
    "ethnic",
    "nomadic",
    "bushmen",
    "tribe",
    "pygmy",
    "tribal",
  ]

natureArray = [
    "wildlife",
    "fauna",
    "habitat",
    "habitats",
    "conservation",
    "animals",
    "birds",
    "animal",
    "flora",
    "livestock",
    "hunting",
    "parks",
    "biota",
    "geese",
    "egyptian geese",
    "egyptian goose",
    "goose",
    "wilderness",
    "wild",
    "park",
    "natural",
    "nature",
    "hunt",
    "camping",
    "wildcat",
    "sauvage",
    "savage",
    "wild-type",
    "waterfowl",
    "deer",
    "barbary stag",
    "ecology",
    "biodiversity",
    "forest",
    "conservancy",
    "wildfowl",
    "endangered species",
    "conservationists",
    "raptors",
    "birdlife",
    "wetlands",
    "otters",
    "spotted necked otter",
    "african clawless otter",
    "congo clawless otter",
    "eurasian otter",
    "fisheries",
    "waterbirds",
    "poachers",
    "bird sanctuary",
    "ecosystems",
    "reptiles",
    "naturalists",
    "marine",
    "waterways",
    "antelope",
    "wildflower",
    "ecotourism",
    "seabird",
    "turtles",
    "vegetation",
    "tortoises",
    "african spurred tortoise",
    "amphibians",
    "wolves",
    "stone curlew",
    "foxes",
    "sacred ibis",
    "african sacred ibis",
    "monarch butterfly",
    "african monarch butterfly",
    "plain tiger",
    "african monarch",
    "goshawk",
    "african goshawk",
    "cirl bunting",
    "leopards",
    "panther",
    "safari park",
    "seafowl",
    "bird of passage",
    "zoography",
    "zoological garden",
    "shore bird",
    "african black oystercatcher",
    "water thick-knee",
    "white-fronted plover",
    "blacksmith plover",
    "common sandpiper",
    "zoo",
    "mammal",
    "bird",
    "conservationist",
    "gallery forest",
    "zoographer",
    "wetland",
    "zoophagy",
    "bird table",
    "sea cow",
    "manatee",
    "tropical rain forest",
    "sea elephant",
    "elephant seal",
    "sea leopard",
    "aquatic bird",
    "birdwatch",
    "zoopathology",
    "wild dog",
    "african wild dog",
    "marine animal",
    "african elephant",
    "sanctuary",
    "miombo woodland",
    "woodland",
    "elephant bird",
    "birdling",
    "birdcatching",
    "rangership",
    "zoological",
    "wildgrave",
    "exotic",
    "indigenous",
    "aquatic",
    "terrestrial",
    "endangered",
    "migratory",
    "coastal",
    "captive",
    "vertebrate",
    "threatened",
    "fascinating",
    "nocturnal",
    "mammalian",
    "scenic",
    "endemic",
    "ocean",
    "underwater",
    "winged",
    "predatory",
    "fish",
    "plants",
    "forests",
    "forestry",
    "soil",
    "timber",
    "cattle",
    "insects",
    "soils",
    "wildlands",
    "wildflowers",
    "pesticides",
    "fishing",
    "extinction",
    "woods",
    "sanctuaries",
    "ecosystem",
    "elephants",
    "mammals",
    "outdoors",
    "ducks",
    "poaching",
    "harvesting",
    "watersheds",
    "decimation",
    "waterfalls",
    "forage",
    "invertebrates",
    "feeding",
    "extermination",
    "pastoralists",
    "shellfish",
    "whales",
    "buffalo",
    "african buffalo",
    "songbirds",
    "eagles",
    "seals",
    "squirrels",
    "hunted",
    "depleted",
    "inhabit",
    "exterminated",
    "forest ranger",
    "game warden",
    "gamekeeper",
    "park",
    "natural resources",
    "preservationist",
    "preserved",
    "safari",
    "serengeti",
    "agricultural",
    "biosphere",
    "environmental",
    "freshwater",
    "herbivores",
    "wildland",
    "pygmy hippopotamus",
    "ladybug",
    "golden eagle",
    "oranda goldfish",
    "oryxes",
    "cheetah",
    "rabbit",
    "scorpion",
    "crocodile",
    "critically endangered",
    "patas monkies",
    "mambas",
    "crocodiles",
    "comet moth",
    "bongos",
    "eagle",
    "white rhinoceroses",
    "madagascar cockroach",
    "dolphins",
    "eastern green mamba",
    "lizard",
    "night heron",
    "twig snake",
    "ostrich",
    "flycatcher",
    "vervet monkey",
    "northern white rhinoceros",
    "parrot",
    "shark",
    "angelshark",
    "olive baboon",
    "green bee eater",
    "flamingos",
    "bat",
    "shrimp",
    "fiddler crab",
    "doves",
    "jumping spider",
    "carpet viper",
    "horsefly",
    "jackson’s chameleon",
    "gorillas",
    "donkeys",
    "hyena",
    "mosquito",
    "sparrow",
    "macaque",
    "short-eared owl",
    "dragonfly",
    "chimps",
    "birds of prey",
    "suni",
    "cape buffalo",
    "mountain",
    "striped hyenas",
    "hyenas",
    "mountain gorilla",
    "striped hyena",
    "duck",
    "madagascar hissing cockroach",
    "african civet",
    "cuckoo",
    "nile crocodiles",
    "horned adder",
    "butterflies",
    "cape elephant shrew",
    "hippo",
    "moth",
    "guinea fowls",
    "geckos",
    "brown hyena",
    "shrew",
    "yellow cobra",
    "pigs",
    "warbler",
    "carpenter ant",
    "woodpecker",
    "wild boars",
    "chimpanzees",
    "snakes",
    "hammerheads",
    "liger",
    "extirpated",
    "bumblebee",
    "jellyfish",
    "oryx",
    "southeastern black rhino",
    "catfish",
    "peacock",
    "radiated tortoise",
    "lion",
    "cockroach",
    "elephant",
    "eurasian jay",
    "sardines",
    "wild cat",
    "cod",
    "springbok",
    "horses",
    "jackal",
    "sea eagle",
    "frog",
    "hyrax",
    "cross river gorilla",
    "heron",
    "hoopoe",
    "water buffalo",
    "common furniture beetle",
    "black rhino",
    "zebu",
    "scimitar-horned oryx",
    "hippos",
    "eel",
    "warthogs",
    "kingfishers",
    "african tree toad",
    "impala",
    "gerbils",
    "vinegaroon",
    "biscuit beetle",
    "insect",
    "wolf",
    "devil’s coach horse beetle",
    "lesser kudu",
    "gnat",
    "bush",
    "amphibian",
    "bird snake",
    "ring-tailed lemur",
    "leaf-tailed gecko",
    "huntsman spider",
    "bichir",
    "common wildebeest",
    "egyptian cobra",
    "egyptian asp",
    "linnet",
    "rhinoceroses",
    "zebras",
    "gazelles",
    "griffon vulture",
    "porcupine",
    "forest cobra",
    "egyptian tortoise",
    "spider ball python",
    "lions",
    "forest hog",
    "african penguin",
    "dusky shark",
    "barb",
    "mole snake",
    "madagascar jacana",
    "donkey",
    "termite",
    "chickens",
    "moorhen",
    "african grey parrot",
    "graylag goose",
    "red deer",
    "hippopotamuses",
    "wryneck",
    "lionfish",
    "leopard",
    "nile crocodile",
    "kudu",
    "tuna",
    "golden oriole",
    "stick insect",
    "aardvarks",
    "european polecat",
    "seal",
    "african bush elephant",
    "red kite",
    "grey mouse lemur",
    "snake",
    "false widow spider",
    "guinea fowl",
    "lechwe antelope",
    "crested crane",
    "blue whale",
    "impalas",
    "stork",
    "rat",
    "forest elephant",
    "puku",
    "civets",
    "bald eagle",
    "orb weaver",
    "mongoose",
    "vine snake",
    "baboon",
    "skink lizard",
    "king cobra",
    "sheep",
    "rats",
    "gerbil",
    "tortoise",
    "pheasant",
    "horned viper",
    "bat-eared fox",
    "silky shark",
    "dolphin",
    "dik-dik",
    "fossa",
    "tigers",
    "tiger beetle",
    "goat",
    "mamba",
    "chicken",
    "manta rays",
    "xerus",
    "camel",
    "sharks",
    "quail",
    "chimpanzee",
    "okapi",
    "senegal parrot",
    "marabou stork",
    "achrioptera manga",
    "brown bear",
    "desert rain frog",
    "monkey",
    "river turtle",
    "african wild dogs",
    "african palm civet",
    "rhinoceros",
    "african gray parrot",
    "stiletto snake",
    "african wild dogs",
    "bush viper",
    "fox",
    "caracals",
    "wild ass",
    "honey badger",
    "rhinos",
    "nigerian goat",
    "rock hyrax",
    "pangolin",
    "peregrine falcon",
    "crane",
    "vulnerable",
    "hippopotamus",
    "crow",
    "song thrush",
    "fire ball python",
    "dinopithecus",
    "electric catfish",
    "lappet-face vulture",
    "banana spider",
    "squirrel",
    "bush baby",
    "marlin",
    "hamadryas baboon",
    "nyala",
    "cat",
    "bushbaby",
    "galago",
    "woodlouse spider",
    "spotted hyena",
    "kestrel",
    "black widow",
    "satanic leaf-tailed gecko",
    "goldcrest",
    "patas",
    "scimitar oryx",
    "desert warthog",
    "barracuda",
    "grasshopper",
    "coton de tulear",
    "egyptian mau",
    "red spitting cobra",
    "camels",
    "hedgehog",
    "duiker",
    "bonobos",
    "housefly",
    "sand viper",
    "mozambique spitting cobra",
    "common hippos",
    "vultures",
    "goliath beetle",
    "porpoise",
    "vulture",
    "oribi",
    "hyraxes",
    "hamster",
    "black rhinoceros",
    "bearded vulture",
    "green bee-eater",
    "fennec fox",
    "gembsok oryx",
    "woodlouse",
    "buffaloes",
    "waterbuck",
    "dumeril’s boa",
    "indri",
    "brown dog tick",
    "black rhinoceroses",
    "common carp",
    "galapagos shark",
    "grant’s gazelle",
    "axanthic ball python",
    "bonobo",
    "fly",
    "buffalos",
    "fennec foxes",
    "african lion",
    "grey heron",
    "panthers",
    "rooster",
    "parakeet",
    "african golden cat",
    "otter",
    "worm",
    "lappet-faced vulture",
    "strawberry hermit crab",
    "redstart",
    "black widow spider",
    "wildebeest",
    "gnu",
    "lichtenstein’s hartebeest",
    "fruit bat",
    "burkina faso",
    "orange baboon tarantula",
    "patas monkey",
    "weasel",
    "termites",
    "cobra",
    "uromastyx",
    "spotted hyenas",
    "sea turtle",
    "agama lizard",
    "turaco",
    "horse",
    "egyptian vulture",
    "sea turtles",
    "giant african land snail",
    "wildebeest",
    "gnu",
    "mongooses",
    "parrots",
    "gorilla",
    "saturniidae moth",
    "sunset ball python",
    "flamingoes",
    "african buffalos",
    "flamingo",
    "addra gazelle",
    "thomson’s gazelle",
    "blind snake",
    "lavender albino ball python",
    "abyssinian",
    "goliath tigerfish",
    "lappet-faced vulture",
    "eastern lowland gorilla",
    "sable bull",
    "sable antelope",
    "goats",
    "european goldfinch",
    "warthog",
    "tick",
    "civet",
    "cichlid",
    "whinchat",
    "jacana",
    "banana cinnamon ball python",
    "bush elephant",
    "mouse",
    "western lowland gorillas",
    "eland",
    "giant ground pangolin",
    "brown-banded cockroach",
    "african elephants",
    "frogs",
    "wild dogs",
    "zonkey",
    "sei whale",
    "slug",
    "scorpions",
    "african jacana",
    "african bullfrog",
    "dugongs",
    "sand cat",
    "hawk moth caterpillar",
    "yellowfin tuna",
    "roan",
    "ibex",
    "madagascar tree boa",
    "african forest elephants",
    "orange dream ball python",
    "viper",
    "elephant shrew",
    "ethiopian wolf",
    "yellow mongoose",
    "bontebok",
    "aidi",
    "sperm whale",
    "kitefin shark",
    "eastern lowland gorillas",
    "tarantula hawk",
    "rodents",
    "white rhinoceros",
    "praying mantis",
    "monkeys",
    "meerkats",
    "gecko",
    "black panther",
    "grasshoppers",
    "minke whale",
    "coryphodon",
    "wildebeests",
    "cheetahs",
    "savannah elephants",
    "honey bee",
    "cicada",
    "eel catfish",
    "hedgehogs",
    "forest elephants",
    "white ferret",
    "albino ferrets",
    "bee-eater",
    "greater kudu",
    "cranes",
    "guinea fowl",
    "africanized bee",
    "killer bee",
    "mallard",
    "gerenuk",
    "waller’s gazelle",
    "kori bustard",
    "dwarf crocodile",
    "african lions",
    "fin whale",
    "pufferfish",
    "bongo",
    "manatees",
    "dormouse",
    "spiny bush viper",
    "tsetse fly",
    "glow worm",
    "bale mountain vervet",
    "vanga",
    "black rhinos",
    "sable antelope",
    "civets",
    "european robin",
    "chameleon",
    "rock python",
    "anteaters",
    "african bush elephants",
    "red fox",
    "owls",
    "peringuey’s adder",
    "bloodhound",
    "snail",
    "european bee-eater",
    "steinbok",
    "herons",
    "goliath frog",
    "fulvous whistling duck",
    "western lowland gorilla",
    "hornbill",
    "crab spider",
    "bed bugs",
    "spider wasp",
    "snouted cobra",
    "humpback whale",
    "avocets",
    "flea",
    "meerkat",
    "wildebeest",
    "myna bird",
    "african savanna elephant",
    "hare",
    "reedbuck",
    "chimpanzees",
    "the blue monkey",
    "gazelle",
    "shoebill stork",
    "woodpeckers",
    "caecilian",
    "pike",
    "dung beetle",
    "zebra",
    "codling moth",
    "beisa oryx",
    "brahminy blindsnake",
    "western gorilla",
    "african forest elephant",
    "bushbuck",
    "cape bushbuck",
    "basenji dog",
    "ant",
    "kudus",
    "mealybug",
    "deathwatch beetle",
    "yellow belly ball python",
    "sitatunga",
    "bale monkey",
    "aardvark",
    "aye-aye",
    "zebra spitting cobra",
    "humpback whales",
    "sable ferret",
    "african fish eagle",
    "rhino",
    "maggot",
    "no see ums",
    "no-see-ums",
    "hyenas",
    "leech",
    "bats",
    "eastern gorilla",
    "thrush",
    "klipspringer",
    "german cockroach",
    "common hippopotamus",
    "colobus monkey",
    "beetle",
    "fruit fly",
    "great dane",
    "nightingale",
    "bees",
    "magpie",
    "mantella frog",
    "sand crab",
    "american cockroach",
    "genet",
    "hognose snake",
    "spotted hyenas",
    "cricket",
    "willow warbler",
    "quelea quelea",
    "beetles",
    "nguni cattle",
    "invertebrate",
    "banana ball python",
    "falcon",
    "antelopes",
    "stingray",
    "earwig",
    "red-billed quelea bird",
    "lemur",
    "barn swallow",
    "african clawed frog",
    "zebra snake",
    "sulcata tortoise",
    "centipede",
    "pythons",
    "osprey",
    "african civets",
    "giraffe",
    "lungfish",
    "monitor lizard",
    "freeway ball python",
    "gaboon viper",
    "stallion",
    "robin",
    "lizards",
    "puss moth",
    "wasp",
    "lesser jacana",
    "mule",
    "mandrill",
    "zorse",
    "peafowls",
    "false cobra",
    "barn owl",
    "smokybrown cockroach",
    "moths",
    "shoebill",
    "whalehead",
    "whale-headed stork",
    "whalebill",
    "shoebill stork",
    "ostriches",
    "salmon",
    "crab",
    "bushpig",
    "mole cricket",
    "wolf spider",
    "baboons",
    "albacore tuna",
    "burchell’s zebra",
    "albatrosses",
    "barbary boar",
    "lemurs",
    "boomslang",
    "seahorse",
    "ortolan bunting",
    "gypsy moth",
    "topi",
    "jackals",
    "desert locust",
    "killer clown ball python",
    "blue monkey",
    "pelicans",
    "spitting cobra",
    "butterfly",
    "leopard tortoise",
    "brazilian treehopper",
    "grey-crowned crane",
    "portuguese man-of-war",
    "freshwater eel",
    "mayfly",
    "blister beetle",
    "madora moth",
    "firefly",
    "common house spider",
    "dog tick",
    "beecroft’s flying squirrels",
    "shrews",
    "black mamba",
    "armyworm",
    "owl",
    "grevy’s zebra",
    "hoopoes",
    "mole",
    "common raven",
    "ibis",
    "tree frog",
    "aardwolf",
    "extinct",
    "lobsters",
    "cape hyrax",
    "locust",
    "serval",
    "rhino viper",
    "caracal",
    "pigeon",
    "needlefish",
    "ground squirrel",
    "pangolins",
    "common buzzard",
    "ligers",
    "cow",
    "reptilian",
    "earthworm",
    "reptile",
    "green mamba",
    "kingfisher",
    "bee",
    "jerboas",
    "addax",
    "white rhino",
    "kenyan sand boa",
    "brookesia micra",
    "spotted hyaena",
    "boas",
    "aye aye",
    "naked mole rat",
    "millipede",
    "caterpillar",
    "senepol cattle",
    "devils coach horse beetle",
    "giraffes",
    "puff adder",
    "dragonflies",
    "glass lizard",
    "west african wild dog",
    "cows",
    "western green mamba",
    "lionfish",
    "cuckoos",
    "goliath frogs",
    "grysbok",
    "jerboa",
    "swan",
    "desert ghost ball python",
    "pelican",
  ];

conflictAndViolenceArray = [
    "violence",
    "conflict",
    "war",
    "atrocious",
    "atrocity",
    "crime",
    "violent",
    "weapon",
    "insurgency",
    "insurgent",
    "insurgence",
    "chaos",
    "chaotic",
    "arm",
    "terror",
    "flee",
    "militia",
    "outbreak",
    "crisis",
    "brutal",
    "collapse",
    "tension",
    "rebel",
    "genocide",
    "tension",
    "clash",
    "dead",
    "eruption",
    "criminal",
    "terrorism",
    "extremist",
    "abuse",
    "escalation",
    "escalate",
    "rape",
    "sanction",
    "corruption",
    "corrupt",
    "radical",
    "ban",
    "bomb",
    "blood",
    "cease-fire",
    "coup",
    "assault",
    "kidnap",
    "battle",
    "abducted",
    "abuses",
    "aggressor",
    "ambushed",
    "anarchy",
    "assassinated",
    "assaults",
    "atrocities",
    "attack",
    "authoritarian",
    "bombing",
    "brigade",
    "carnage",
    "casualties",
    "counterterrorism",
    "decimated",
    "destabilize",
    "destabilized",
    "destabilizing",
    "destroyed",
    "destroying",
    "destroys",
    "destruction",
    "dictator",
    "dictatorship",
    "died",
    "death",
    "disaster",
    "disasters",
    "emergency",
    "enemy",
    "exiles",
    "exploded",
    "explosion",
    "explosions",
    "explosive",
    "extremism",
    "extremists",
    "famine",
    "fascist",
    "fatal",
    "fomenting",
    "fighter",
    "fighters",
    "gang",
    "gangs",
    "gbv",
    "gender-based violence",
    "gender based violence",
    "grenade",
    "grim",
    "gun",
    "gunfire",
    "gunmen",
    "harassment",
    "harrowing",
    "horrific",
    "horrifyingly",
    "horror",
    "horrors",
    "hostage",
    "hostilities",
    "hostility",
    "imprisoned",
    "instability",
    "insurgents",
    "jail",
    "kidnapped",
    "kidnappings",
    "killed",
    "killers",
    "killing",
    "looted",
    "looting",
    "lynch",
    "massacre",
    "massacres",
    "militant",
    "militants",
    "militaries",
    "military",
    "militiamen",
    "militias",
    "missile",
    "murdered",
    "murderous",
    "murders",
    "overthrew",
    "overthrow",
    "paramilitary",
    "paratroopers",
    "persecution",
    "pillage",
    "prison",
    "raided",
    "raids",
    "ravaged",
    "rebellion",
    "rebellious",
    "rebels",
    "retaliation",
    "retaliations",
    "retaliatory",
    "rioting",
    "shooting",
    "slaughter",
    "slaughtered",
    "smuggling",
    "soldiers",
    "suffered",
    "terrorist",
    "trafficked",
    "trafficking",
    "torture",
    "troops",
    "unrest"
]

arrays = {'whitelistArray': whitelistArray, 'generalizationArray': generalizationArray,
          'tribalismArray': tribalismArray, 'natureArray': natureArray, 'conflictAndViolenceArray': conflictAndViolenceArray}
stem_arrays = {'whitelistArray': [], 'generalizationArray': [],
          'tribalismArray': [], 'natureArray': [], 'conflictAndViolenceArray': []}
arrays_length = {'whitelistArray': [], 'generalizationArray': [],
          'tribalismArray': [], 'natureArray': [], 'conflictAndViolenceArray': []}

In [4]:
# create stemmer
stemmer = SnowballStemmer(language='english')
for array_name in arrays.keys():
    # store length of arrays before stemming
    arrays_length[array_name].append('before:' + str(len(arrays[array_name])))
    
    for word in arrays[array_name]:
        stem_wrd = stemmer.stem(word)
        print(word + ' --> ' + stem_wrd)
        if stem_wrd not in stem_arrays[array_name]:
            stem_arrays[array_name].append(stem_wrd)
            
    # store length of arrays after stemming
    arrays_length[array_name].append('after:' + str(len(stem_arrays[array_name])))

# print stemmed arrays and the dict storing the lengths before and after stemming
# print(stem_arrays)
print(stem_arrays['conflictAndViolenceArray'])



pan-african --> pan-african
pan african --> pan african
african studies --> african studi
african union --> african union
south africa --> south africa
west africa --> west africa
north africa --> north africa
east africa --> east africa
southern africa --> southern africa
northern africa --> northern africa
eastern africa --> eastern africa
western africa --> western africa
african american --> african american
africa --> africa
subsaharan --> subsaharan
continent --> contin
sub-saharan --> sub-saharan
ethnic --> ethnic
nomadic --> nomad
bushmen --> bushmen
tribe --> tribe
pygmy --> pygmi
tribal --> tribal
wildlife --> wildlif
fauna --> fauna
habitat --> habitat
habitats --> habitat
conservation --> conserv
animals --> anim
birds --> bird
animal --> anim
flora --> flora
livestock --> livestock
hunting --> hunt
parks --> park
biota --> biota
geese --> gees
egyptian geese --> egyptian gees
egyptian goose --> egyptian goos
goose --> goos
wilderness --> wilder
wild --> wild
park --> park


In [7]:
print(arrays_length)
con_arr_b4_media_cloud =  ["violence",
    "conflict",
    "war",
    "atrocious",
    "atrocity",
    "crime",
    "violent",
    "weapon",
    "insurgency",
    "insurgent",
    "insurgence",
    "chaos",
    "chaotic",
    "arm",
    "terror",
    "flee",
    "militia",
    "outbreak",
    "crisis",
    "brutal",
    "collapse",
    "tension",
    "rebel",
    "genocide",
    "tension",
    "clash",
    "dead",
    "eruption",
    "criminal",
    "terrorism",
    "extremist",
    "abuse",
    "escalation",
    "escalate",
    "rape",
    "sanction",
    "corruption",
    "corrupt",
    "radical",
    "ban",
    "bomb",
    "blood",
    "cease-fire",
    "coup",
    "assault",
    "kidnap",
    "battle"]
print(len(array))

{'whitelistArray': ['before:13', 'after:13', 'before:13', 'after:13'], 'generalizationArray': ['before:4', 'after:4', 'before:4', 'after:4'], 'tribalismArray': ['before:6', 'after:6', 'before:6', 'after:6'], 'natureArray': ['before:866', 'after:724', 'before:866', 'after:724'], 'conflictAndViolenceArray': ['before:163', 'after:125', 'before:163', 'after:125']}
47
