In [1]:
import wptools
from SPARQLWrapper import SPARQLWrapper2, SPARQLWrapper, JSON
import unicodedata
import pandas as pd
import numpy as np
import json
import requests
import time

In [2]:
def get_insee(commune):
    """
    returns the insee code of the commune
    """
    commune_ = unicodedata.normalize('NFD', commune.lower()).encode('ascii', 'ignore').decode("utf-8")
    url = 'https://geo.api.gouv.fr/communes?nom={c}&fields=nom,code&format=json&geometry=centre'\
              .format(c=commune_)
    exists = len(json.loads(requests.get(url).text))
    if exists>0 :
        codes = json.loads(requests.get(url).text)
        result = [code["code"] for code in codes \
                if  unicodedata.normalize('NFD', code["nom"].lower()).encode('ascii', 'ignore').decode("utf-8") == commune_]
        if len(result)>0 :
            return result
        
        return -1
    return -1

In [11]:
def dbPedia_get_wplabel(insee):
    d = SPARQLWrapper2("http://fr.dbpedia.org/sparql")

    d.setQuery(
    """
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT distinct ?label
    WHERE
    {
      ?ville rdfs:label ?label.
      ?ville dbo:country  <http://fr.dbpedia.org/resource/France>.
      ?ville <http://fr.dbpedia.org/property/nom> ?insee.
      filter(regex(?insee,\"^""" +insee+ """\", "i") && lang(?label) = 'fr')
      
    }
    """
    ) 
    communes = set()
    res = d.query().bindings
    for result in res:
        communes.add(result["label"].value)
        
    return communes

#### Getting the wikipedia label for each commune

In [12]:
# labels = {}
# for commune in communes:
#     insee = get_insee(commune)
#     if insee!=-1:
#         result = set()
#         for insee_ in insee:
#             label = dbPedia_get_wplabel(insee_)
#             result = result.union(label)
        
#         if(len(result)>0):
#             labels[commune] = list(result)
#         print(commune, result)
#         print("________________________________________________")
        
# with open("Data/wk_labels2.json", "w") as fp:
#     json.dump(labels,fp)

#from file
json_file = open("Data/wk_labels2.json")
labels = json.load(json_file)

### for each commune label, get the infobox of wikipedia page and extract infobox["gentilé"] 

In [87]:
gentiles = {}
for label in labels :
    wks = labels[label]
    result = set()
    for wk in wks :
        try:
            page = wptools.page(wk, lang='fr')
            parse = page.get_parse()
            infobox = parse.data["infobox"]
            if infobox is not None and "gentilé" in infobox:
                g = infobox["gentilé"]
                result.add(g)
        except:
            print("page does not exist")
            
        if len(result)>0:
            gentiles[label] = list(result)
        print(wks, result)
        
with open("Data/infobox_extracted_demonyms___.json", "w") as fp:
    json.dump(gentiles,fp)

In [64]:
page = wptools.page(wikibase='Q21979655', lang='fr').get()
page.data["infobox"]

www.wikidata.org (wikidata) Q21979655
www.wikidata.org (labels) Q1007427|P910|P242|P17|Q1007523|P1082|Q...
fr.wikipedia.org (query) Val_de_Virvée
fr.wikipedia.org (parse) 9646962
fr.wikipedia.org (restbase) /page/summary/Val de Virvée
fr.wikipedia.org (imageinfo) File:Aubie-et-Espessas Mairie.JPG
Val de Virvée (fr) data
{
  assessments: <dict(1)> Communes de France
  claims: <dict(25)> P17, P31, P571, P374, P2046, P1365, P625, P13...
  description: commune française du département de la Gironde
  exhtml: <str(395)> <p><b>Val de Virvée</b> est, depuis le <time ...
  exrest: <str(294)> Val de Virvée est, depuis le 1er janvier 2016...
  extext: <str(313)> **Val de Virvée** est, depuis le 1er janvier ...
  extract: <str(1641)> <p class="mw-empty-elt"></p><p><b>Val de Vi...
  image: <list(6)> {'file': 'File:Aubie-et-Espessas Mairie.JPG', '...
  infobox: <dict(25)> nom, image, légende, région, département, ar...
  iwlinks: <list(1)> https://fr.wiktionary.org/wiki/Val_de_Virv%C3%A9e
  label: 

{'nom': 'Val de Virvée',
 'image': 'Aubie-et-Espessas Mairie.JPG',
 'légende': 'La mairie.',
 'région': '[[Nouvelle-Aquitaine]]',
 'département': '[[Gironde (département)|Gironde]]',
 'arrondissement': '[[Arrondissement de Blaye|Blaye]]',
 'canton': '[[Canton du Nord-Gironde]]',
 'circonscription législative': '[[Onzième circonscription de la Gironde|Onzième circonscription]]',
 'insee': '33018',
 'cp': '33240',
 'maire': 'Christophe Martial',
 'mandat maire': '[[Élections municipales de 2020 en Gironde|2020]]-2026',
 'intercomm': '[[Communauté de communes du Grand Cubzaguais]]',
 'latitude': '45.02',
 'longitude': '-0.405555555556',
 'alt mini': '13',
 'alt maxi': '64',
 'superficie': '20.77',
 'type': 'Commune rurale',
 'unité urbaine': '[[Unité urbaine de Bordeaux|Bordeaux]] <br><small>([[banlieue]])</small>',
 "aire d'attraction": "[[Aire d'attraction de Bordeaux|Bordeaux]] <br><small>(commune de la couronne)</small>",
 'population': '{{Population de France/dernière_pop}}',
 'année

In [151]:
def wikidata_get_wplabel(name):
    d = SPARQLWrapper("https://query.wikidata.org/sparql", agent="hubeauBot/1.0 (https://github.com/BRGM/hubeau)")
    d.setQuery(
    """
    SELECT distinct ?ville
    WHERE
    { 
      ?ville rdfs:label ?label.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "fr" }.
      ?ville wdt:P17 wd:Q142.
      ?ville wdt:P1448 ?insee.     
      filter(regex(?insee,"^"""+name+"""$", "i")).      
      
    }
    """
    ) 
    communes = set()
    d.setReturnFormat(JSON)
    try:
        res = d.query().convert()
        print(res)
        for result in res["results"]["bindings"]:
            communes.add(result["ville"]["value"].split("/")[-1])
        return list(communes)
    
    except KeyboardInterrupt:
        print("interrupted")
        res = d.query().convert()
        print(res)
        for result in res["results"]["bindings"]:
            communes.add(result["ville"]["value"].split("/")[-1])
        return list(communes)
    
    except:
        print("error")
        time.sleep(120)
        res = d.query().convert()
        print(res)
        for result in res["results"]["bindings"]:
            communes.add(result["ville"]["value"].split("/")[-1])
        return list(communes)
                
    

In [248]:
wikidata_get_wplabel("île-de-batz")

{'head': {'vars': ['ville']}, 'results': {'bindings': [{'ville': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q421927'}}]}}


['Q421927']

In [210]:
df=pd.read_csv("Data/commune2021.csv")
communes_ = np.array(df["LIBELLE"])
communes, counts = np.unique(communes_, return_counts=True)
exists = json.load(open("Data/wikidata_ids1.json"))
exists2 = json.load(open("Data/wikidata_ids2.json"))
exists3 = json.load(open("Data/wikidata_ids3.json"))
exists4 = json.load(open("Data/wikidata_ids4.json"))
exists5 = json.load(open("Data/wikidata_ids5.json"))
exists6 = json.load(open("Data/wikidata_ids6.json"))
exists7 = json.load(open("Data/wikidata_ids7.json"))
exists8 = json.load(open("Data/wikidata_ids8.json"))
exists9 = json.load(open("Data/wikidata_ids9.json"))
exists10 = json.load(open("Data/wikidata_ids10.json"))
exists11 = json.load(open("Data/wikidata_ids11.json"))
exists12 = json.load(open("Data/wikidata_ids12.json"))
exists13 = json.load(open("Data/wikidata_ids13.json"))
exists14 = json.load(open("Data/wikidata_ids14.json"))
exists15 = json.load(open("Data/wikidata_ids15.json"))
exists16 = json.load(open("Data/wikidata_ids16.json"))
exists17 = json.load(open("Data/wikidata_ids17.json"))

In [211]:
def cond(commune):
    if commune not in exists and commune not in exists2 and commune not in exists3 and commune not in exists4 \
    and commune not in exists5 and commune not in exists6 and commune not in exists7 and commune not in exists8 \
    and commune not in exists9 and commune not in exists10 and commune not in exists11 and commune not in exists12\
    and commune not in exists13 and commune not in exists14 and commune not in exists15 and commune not in exists16\
    and commune not in exists17:
        return True
    return False


In [220]:
import re
communes2 = []
for commune in communes:
    if cond(commune):
        print(commune)
        c = re.sub("\s\(.*\)", "", commune)
        c = re.sub("\s[0-9]+(e|er) Arrondissement", "", c)
        print(c)
        communes2.append(c)
        print("________________________________________________")

communes2 = list(set(communes2))
print(communes2)

Allemagne-en-Provence
Allemagne-en-Provence
________________________________________________
Bors (Canton de Charente-Sud)
Bors
________________________________________________
Bors (Canton de Tude-et-Lavalette)
Bors
________________________________________________
Castillon (Canton d'Arthez-de-Béarn)
Castillon
________________________________________________
Castillon (Canton de Lembeye)
Castillon
________________________________________________
Château-Chinon (Campagne)
Château-Chinon
________________________________________________
Château-Chinon (Ville)
Château-Chinon
________________________________________________
Hattonchâtel
Hattonchâtel
________________________________________________
Hautecourt-lès-Broville
Hautecourt-lès-Broville
________________________________________________
Longefoy
Longefoy
________________________________________________
Lyon 1er Arrondissement
Lyon
________________________________________________
Lyon 2e Arrondissement
Lyon
___________________________

In [221]:
wikidata_ids = {}
k = 0
for commune in communes2:
    if cond(commune):
        k=k+1
        ids = wikidata_get_wplabel(commune)      
        if(len(ids)>0):
            wikidata_ids[commune] = ids
        print(commune, ids)
        print("________________________________________________")
        
print(k)   

{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Prangey []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Marbotte []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Château-Chinon []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Hautecourt-lès-Broville []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Hattonchâtel []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Longefoy []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Moulins-en-Bessin []
________________________________________________
{'head': {'vars': ['ville']}, 'results': {'bindings': []}}
Tessens []
________________________________________________
{'head':

In [222]:
wikidata_ids

{}

In [201]:
with open("Data/wikidata_ids17.json", "w") as fp:
    json.dump(wikidata_ids,fp)

In [None]:
ids = ....
gentiles = {}
for label, ids in wikidata_ids.items() :
    result = set()
    for id in ids :
        try:
            page = wptools.page(wikibase=id, lang='fr').get()
            infobox = page.data["infobox"]
            if infobox is not None and "gentilé" in infobox:
                g = infobox["gentilé"]
                result.add(g)
        except:
            print("page does not exist")
          
    if len(result)>0:
        gentiles[label] = list(result)
        
    print(label, result)
        
with open("Data/wikidata_infobox_extracted_demonyms.json", "w") as fp:
    json.dump(gentiles,fp)

### Preprocess extracted text

In [36]:
exists = json.load(open("Data/wikidata_infobox_extracted_demonyms1.json"))
exists2 = json.load(open("Data/wikidata_infobox_extracted_demonyms2.json"))
exists3 = json.load(open("Data/wikidata_infobox_extracted_demonyms3.json"))
exists4 = json.load(open("Data/wikidata_infobox_extracted_demonyms4.json"))
exists5 = json.load(open("Data/wikidata_infobox_extracted_demonyms6.json"))

In [38]:
len(exists)+ len(exists2)+len(exists3)+len(exists4)+len(exists5)

24094

In [39]:
l = [exists, exists2 , exists3 , exists4, exists5]

In [41]:
gentiles  = {k:v for d in l for k,v in d.items() }

In [43]:
with open("Data/wikidata_infobox_extracted_demonyms.json", "w") as fp:
    json.dump(gentiles,fp)

In [44]:
gentiles = json.load(open("Data/wikidata_infobox_extracted_demonyms.json"))

In [46]:
len(gentiles)

24094

In [52]:
import re

gentiles1 = {}
gentiles2 = {}
for com in gentiles:
    list_g = gentiles[com]
    result = set()
    print(com, list_g)
    contains = False
    for g in list_g:
        
        g1 = re.sub("<.*?>", ",", g)
        g2 = re.sub("\sou\s|\set\s|;|/", ",", g1)
        g3 = re.sub("\(.*?\)|\.", "", g2)
        split_g = re.split("\s?,,?\s?", g3)
        print(split_g)
        for split_g_ in split_g:  
            result.add(split_g_)
            if re.search("-", split_g_):
                contains = True
    print("________________________________________________________________")
    result = list(result)
    if contains :
        gentiles1[com] = result
    else:
        gentiles2[com] = result

Aast ['Aastais, Aastaises']
['Aastais', 'Aastaises']
________________________________________________________________
Abainville ['Abainvillois, Abainvilloises']
['Abainvillois', 'Abainvilloises']
________________________________________________________________
Abancourt ['Abancourtois, Abancourtoises']
['Abancourtois', 'Abancourtoises']
________________________________________________________________
Abaucourt ['Abaucourtois, Abaucourtoises']
['Abaucourtois', 'Abaucourtoises']
________________________________________________________________
Abaucourt-Hautecourt ['Abaucourtois, Abaucourtoises']
['Abaucourtois', 'Abaucourtoises']
________________________________________________________________
Abbans-Dessous ['Abbanais, Abbanaises']
['Abbanais', 'Abbanaises']
________________________________________________________________
Abbans-Dessus ['Abbanais, Abbanaises']
['Abbanais', 'Abbanaises']
________________________________________________________________
Abbaretz ['Abbarois, Abbaroises']
[

Baillestavy ['Baillestavyen(ne)s']
['Baillestavyens']
________________________________________________________________
Baillet-en-France ['Baillotais']
['Baillotais']
________________________________________________________________
Bailleul ['Bailleulois', 'Baillotins', 'Bailleulais']
['Bailleulois']
['Baillotins']
['Bailleulais']
________________________________________________________________
Bailleul-Neuville ['Bailleulais']
['Bailleulais']
________________________________________________________________
Bailleul-Sir-Berthoult ['Bailleulois']
['Bailleulois']
________________________________________________________________
Bailleul-la-Vallée ['Bailleullois']
['Bailleullois']
________________________________________________________________
Bailleul-le-Soc ['Bailleulois, Bailleuloises']
['Bailleulois', 'Bailleuloises']
________________________________________________________________
Bailleul-sur-Thérain ['Bailleulois, Bailleuloises']
['Bailleulois', 'Bailleuloises']
___________________

['Bondouflois']
________________________________________________________________
Bondues ['Bonduois']
['Bonduois']
________________________________________________________________
Bondy ['Bondynois, Bondynoise(s)']
['Bondynois', 'Bondynoise']
________________________________________________________________
Bongheat ['Bongheatois']
['Bongheatois']
________________________________________________________________
Bonlieu ['Chiettards, Chiettardes']
['Chiettards', 'Chiettardes']
________________________________________________________________
Bonlieu-sur-Roubion ['Bonilociens, Boniliciennes']
['Bonilociens', 'Boniliciennes']
________________________________________________________________
Bonloc ['Lekuindar']
['Lekuindar']
________________________________________________________________
Bonnac ['Bonnacois', 'Bonnacois, Bonnacoises']
['Bonnacois']
['Bonnacois', 'Bonnacoises']
________________________________________________________________
Bonnat ['Bonnachons, Bonnachonnes']
['Bonnachons', 

Campitello ['Campitellais']
['Campitellais']
________________________________________________________________
Camplong ['Camplonnais']
['Camplonnais']
________________________________________________________________
Camplong-d'Aude ['Camplonnais']
['Camplonnais']
________________________________________________________________
Campneuseville ['Campneusevillois']
['Campneusevillois']
________________________________________________________________
Campouriez ['Campouriézois']
['Campouriézois']
________________________________________________________________
Campremy ['Camprenois, Camprenoises']
['Camprenois', 'Camprenoises']
________________________________________________________________
Camprond ['Campronnais']
['Campronnais']
________________________________________________________________
Camps-la-Source ['Campsois']
['Campsois']
________________________________________________________________
Camps-sur-l'Agly ['Campois, Campoises']
['Campois', 'Campoises']
_________________________

['Castelmeillantais']
________________________________________________________________
Châteauneuf ['Castelnoviens', 'Castelneuvien(ne)', 'Madonencs', 'Castelneuviens', 'Castelneuvois', 'Castelneuvois, Castelneuvoises', 'Castelneuviens et Castelneuviennes']
['Castelnoviens']
['Castelneuvien']
['Madonencs']
['Castelneuviens']
['Castelneuvois']
['Castelneuvois', 'Castelneuvoises']
['Castelneuviens', 'Castelneuviennes']
________________________________________________________________
Châteauneuf-Grasse ['Châteauneuvois (ou Castelnevois)']
['Châteauneuvois ']
________________________________________________________________
Châteauneuf-Miravail ['Castelnovins']
['Castelnovins']
________________________________________________________________
Châteauneuf-Val-Saint-Donat ['Chabannais']
['Chabannais']
________________________________________________________________
Châteauneuf-Val-de-Bargis ['Castelneuvien(ne)']
['Castelneuvien']
________________________________________________________________

________________________________________________________________
Crozes-Hermitage ['Crozois']
['Crozois']
________________________________________________________________
Crozet ['Crozatis']
['Crozatis']
________________________________________________________________
Crozon ['Crozonnais']
['Crozonnais']
________________________________________________________________
Crozon-sur-Vauvre ['Crozonnais']
['Crozonnais']
________________________________________________________________
Cruas ['Cruassiens']
['Cruassiens']
________________________________________________________________
Crucheray ['Cruchérois']
['Cruchérois']
________________________________________________________________
Crugny ['Crugnatiens, Crugnatiennes']
['Crugnatiens', 'Crugnatiennes']
________________________________________________________________
Cruguel ['Cruguellois, Cruguelloise']
['Cruguellois', 'Cruguelloise']
________________________________________________________________
Cruis ['Crussiens']
['Crussiens']
_____

Fontaine-Denis-Nuisy ['Fontenat']
['Fontenat']
________________________________________________________________
Fontaine-Henry ['Fontenois']
['Fontenois']
________________________________________________________________
Fontaine-Lavaganne ['Lavagannais, Lavagannaises']
['Lavagannais', 'Lavagannaises']
________________________________________________________________
Fontaine-Milon ['Milonnais']
['Milonnais']
________________________________________________________________
Fontaine-Mâcon ['Mâconais, Mâconaises']
['Mâconais', 'Mâconaises']
________________________________________________________________
Fontaine-Notre-Dame ['Fontainois(es)', 'Fontenois']
['Fontainois']
['Fontenois']
________________________________________________________________
Fontaine-au-Bois ['Fontagnards, Fontagnardes']
['Fontagnards', 'Fontagnardes']
________________________________________________________________
Fontaine-au-Pire ['Fontenois, Fontenoises']
['Fontenois', 'Fontenoises']
_____________________________

Guyencourt-Saulcourt ['Les Guyancourtois<br/>Les Guyancourtoises']
['Les Guyancourtois', 'Les Guyancourtoises']
________________________________________________________________
Guyencourt-sur-Noye ['Guyencourtois']
['Guyencourtois']
________________________________________________________________
Guyonvelle ['Guyonvellois']
['Guyonvellois']
________________________________________________________________
Guzargues ['Guzarguois']
['Guzarguois']
________________________________________________________________
Guébestroff ['Guébestroffois, Guébestroffoises']
['Guébestroffois', 'Guébestroffoises']
________________________________________________________________
Guéblange-lès-Dieuze ['Guéblangeois, Guéblangeoises']
['Guéblangeois', 'Guéblangeoises']
________________________________________________________________
Guébling ['Guéblingeois, Guéblingeoises']
['Guéblingeois', 'Guéblingeoises']
________________________________________________________________
Guécélard ['Guécélardais']
['Guécélard

La Chapelle-Saint-Aubin ['Capellaubinois']
['Capellaubinois']
________________________________________________________________
La Chapelle-Saint-Florent ['Capello-Florentais']
['Capello-Florentais']
________________________________________________________________
La Chapelle-Saint-Fray ['Capellofrayen']
['Capellofrayen']
________________________________________________________________
La Chapelle-Saint-Jean ['Chapelais']
['Chapelais']
________________________________________________________________
La Chapelle-Saint-Laud ['Capellaudains']
['Capellaudains']
________________________________________________________________
La Chapelle-Saint-Laurian ['Chappelais']
['Chappelais']
________________________________________________________________
La Chapelle-Saint-Luc ['Chapelains']
['Chapelains']
________________________________________________________________
La Chapelle-Saint-Martin ['Chapellans']
['Chapellans']
________________________________________________________________
La Chapelle-Sa

Lassay-les-Châteaux ['Lasséens']
['Lasséens']
________________________________________________________________
Lassay-sur-Croisne ['Lasséens']
['Lasséens']
________________________________________________________________
Lasse ['Lassois', 'Lasar']
['Lassois']
['Lasar']
________________________________________________________________
Lasserre ['Lasserrois, Lasserroises', 'Lasserois']
['Lasserrois', 'Lasserroises']
['Lasserois']
________________________________________________________________
Lasserre-Pradère ['Lasserro-Pradérois']
['Lasserro-Pradérois']
________________________________________________________________
Lasserre-de-Prouille ['Lasserois']
['Lasserois']
________________________________________________________________
Lassicourt ['Lassicourtois, Lassicourtoises']
['Lassicourtois', 'Lassicourtoises']
________________________________________________________________
Lassigny ['Lachenois, Lachenoises']
['Lachenois', 'Lachenoises']
_________________________________________________

Liévin ['Liévinois']
['Liévinois']
________________________________________________________________
Liézey ['Pouhas']
['Pouhas']
________________________________________________________________
Llo ['Llotois']
['Llotois']
________________________________________________________________
Lobsann ['Lobsannais, Lobsannaises']
['Lobsannais', 'Lobsannaises']
________________________________________________________________
Loc-Brévalaire ['Brévalairiens']
['Brévalairiens']
________________________________________________________________
Loc-Eguiner ['Loguisiens']
['Loguisiens']
________________________________________________________________
Loc-Eguiner-Saint-Thégonnec ['Éguinériens']
['Éguinériens']
________________________________________________________________
Loc-Envel ['Locenvellois, Locenvelloise']
['Locenvellois', 'Locenvelloise']
________________________________________________________________
Locarn ['Locarnois']
['Locarnois']
________________________________________________________

Mesgrigny ['Mesgrignons, Mesgrignonnes']
['Mesgrignons', 'Mesgrignonnes']
________________________________________________________________
Meslan ['Meslannais, Meslannaises']
['Meslannais', 'Meslannaises']
________________________________________________________________
Meslay ['Meslaisiens', 'Meslayens']
['Meslaisiens']
['Meslayens']
________________________________________________________________
Meslay-du-Maine ['Meslinois']
['Meslinois']
________________________________________________________________
Meslin ['Meslinois, Meslinoise']
['Meslinois', 'Meslinoise']
________________________________________________________________
Mesmay ['Taquins']
['Taquins']
________________________________________________________________
Mesmont ['Mesmontois, Mesmontoises']
['Mesmontois', 'Mesmontoises']
________________________________________________________________
Mesnac ['Mesnacais']
['Mesnacais']
________________________________________________________________
Mesnard-la-Barotière ['Mesnardais'

['Ménétréolois']
________________________________________________________________
Ménévillers ['Ménévillerois, Ménévilleroises']
['Ménévillerois', 'Ménévilleroises']
________________________________________________________________
Méobecq ['Méobécquois']
['Méobécquois']
________________________________________________________________
Méon ['Méonais']
['Méonais']
________________________________________________________________
Méras ['Mérasiens']
['Mérasiens']
________________________________________________________________
Mérens-les-Vals ['Mérengois']
['Mérengois']
________________________________________________________________
Mérenvielle ['Mérenviellois']
['Mérenviellois']
________________________________________________________________
Mérey-sous-Montrond ['Mérymontois, Mérymontoises']
['Mérymontois', 'Mérymontoises']
________________________________________________________________
Mérial ['Mérialais']
['Mérialais']
________________________________________________________________


Peumerit ['Peumeritois']
['Peumeritois']
________________________________________________________________
Peumerit-Quintin ['Peumeritois, Peumeritoises']
['Peumeritois', 'Peumeritoises']
________________________________________________________________
Peuplingues ['Peuplinguois']
['Peuplinguois']
________________________________________________________________
Peuton ['Peutonnais']
['Peutonnais']
________________________________________________________________
Peux-et-Couffouleux ['Peuleussien(ne)']
['Peuleussien']
________________________________________________________________
Pexiora ['Pexioranais']
['Pexioranais']
________________________________________________________________
Pexonne ['Pexonnois, Pexonnoises']
['Pexonnois', 'Pexonnoises']
________________________________________________________________
Pey ['Pireutch']
['Pireutch']
________________________________________________________________
Peymeinade ['Peymeinadois']
['Peymeinadois']
________________________________________

['Rannéen']
________________________________________________________________
Ranrupt ['Ranruptois(es)']
['Ranruptois']
________________________________________________________________
Rans ['Rantiers, Rantières']
['Rantiers', 'Rantières']
________________________________________________________________
Rantigny ['Rantignysiens, Rantignysiennes']
['Rantignysiens', 'Rantignysiennes']
________________________________________________________________
Rantzwiller ['Rantzwillerois']
['Rantzwillerois']
________________________________________________________________
Ranville ['Ranvillais']
['Ranvillais']
________________________________________________________________
Ranville-Breuillaud ['Ranvillois']
['Ranvillois']
________________________________________________________________
Ranzières ['Ranzièrois']
['Ranzièrois']
________________________________________________________________
Rançonnières ['Rancenais,Rancenaises']
['Rancenais', 'Rancenaises']
___________________________________________

Saint-Chef ['Saint-Cheffois, Saint-Cheffoises']
['Saint-Cheffois', 'Saint-Cheffoises']
________________________________________________________________
Saint-Chinian ['Saint-Chinianais']
['Saint-Chinianais']
________________________________________________________________
Saint-Christaud ['Saint-Christophins, Saint-Christophines', 'Saint-Christaudains']
['Saint-Christophins', 'Saint-Christophines']
['Saint-Christaudains']
________________________________________________________________
Saint-Christol ['Saint-Christolais', 'Saint-Christolain ou Saint-Christolais', 'Christolains, Christolaines']
['Saint-Christolais']
['Saint-Christolain', 'Saint-Christolais']
['Christolains', 'Christolaines']
________________________________________________________________
Saint-Christol-lez-Alès ['Saint-Christolens']
['Saint-Christolens']
________________________________________________________________
Saint-Christoly-Médoc ['Saint-Christolyens']
['Saint-Christolyens']
__________________________________

Saint-Michel-de-Fronsac ['Saint-Michelais']
['Saint-Michelais']
________________________________________________________________
Saint-Michel-de-Lanès ['Saint-Michelais']
['Saint-Michelais']
________________________________________________________________
Saint-Michel-de-Lapujade ['Lapujadais']
['Lapujadais']
________________________________________________________________
Saint-Michel-de-Livet ['Saint-Michelois']
['Saint-Michelois']
________________________________________________________________
Saint-Michel-de-Maurienne ['Saint-Michelains']
['Saint-Michelains']
________________________________________________________________
Saint-Michel-de-Montaigne ['Montaignois']
['Montaignois']
________________________________________________________________
Saint-Michel-de-Montjoie ['Montois']
['Montois']
________________________________________________________________
Saint-Michel-de-Plélan ['Michelois, Micheloise']
['Michelois', 'Micheloise']
__________________________________________________

Sapogne-sur-Marche ['Sapognards, Sapognardes']
['Sapognards', 'Sapognardes']
________________________________________________________________
Sapois ['Sapoisiens, Sapoisiennes', 'Sapoitiers, Sapoitières.']
['Sapoisiens', 'Sapoisiennes']
['Sapoitiers', 'Sapoitières']
________________________________________________________________
Saponay ['Saponéen(ne)s']
['Saponéens']
________________________________________________________________
Saramon ['Saramonais, Saramonaises']
['Saramonais', 'Saramonaises']
________________________________________________________________
Saran ['Saranais']
['Saranais']
________________________________________________________________
Sarceaux ['Sarcellien']
['Sarcellien']
________________________________________________________________
Sarcelles ['Sarcellois']
['Sarcellois']
________________________________________________________________
Sarcey ['Sarceyrois', 'Sarceyots, Sarceyottes']
['Sarceyrois']
['Sarceyots', 'Sarceyottes']
________________________________

['Til-Châtelois']
________________________________________________________________
Tilh ['Tilhois']
['Tilhois']
________________________________________________________________
Tilhouse ['Tilhousais']
['Tilhousais']
________________________________________________________________
Tillay-le-Péneux ['Tiglétins, Tiglétines']
['Tiglétins', 'Tiglétines']
________________________________________________________________
Tilleul-Dame-Agnès ['Tillolais']
['Tillolais']
________________________________________________________________
Tillières ['Tillièrois']
['Tillièrois']
________________________________________________________________
Tillières-sur-Avre ['Tilliérois']
['Tilliérois']
________________________________________________________________
Tilloy-et-Bellay ['Tillotins, Tillotines']
['Tillotins', 'Tillotines']
________________________________________________________________
Tilloy-lez-Cambrai ['Tilloysiens']
['Tilloysiens']
________________________________________________________________


Villeneuve-sur-Cher ['Villeneuvois, Villeneuvoises']
['Villeneuvois', 'Villeneuvoises']
________________________________________________________________
Villeneuve-sur-Conie ['Villeneuvois']
['Villeneuvois']
________________________________________________________________
Villeneuve-sur-Vère ['Villeneuvois, Villeneuvoises']
['Villeneuvois', 'Villeneuvoises']
________________________________________________________________
Villeneuvette ['Villeneuvettois']
['Villeneuvettois']
________________________________________________________________
Villenouvelle ['Villenouvellois, Villenouvelloises']
['Villenouvellois', 'Villenouvelloises']
________________________________________________________________
Villenoy ['Villenoisiens ou Villenoyens']
['Villenoisiens', 'Villenoyens']
________________________________________________________________
Villentrois ['Villentroyens']
['Villentroyens']
________________________________________________________________
Villepail ['Villepaillais']
['Villepaillais

La Seyne-sur-Mer ['Seynois']
['Seynois']
________________________________________________________________
La Souche ['Souchois']
['Souchois']
________________________________________________________________
La Teste-de-Buch ['Testerins/Cazalins/Pylatais']
['Testerins', 'Cazalins', 'Pylatais']
________________________________________________________________
La Tombe ['Tombiers']
['Tombiers']
________________________________________________________________
La Ville-aux-Dames ['Gynépolitains']
['Gynépolitains']
________________________________________________________________
Lalandelle ['Landellois, Landelloises']
['Landellois', 'Landelloises']
________________________________________________________________
Landéda ['Landédaens']
['Landédaens']
________________________________________________________________
Languidic ['Languidiciens']
['Languidiciens']
________________________________________________________________
Lannemezan ['Lannemezanais']
['Lannemezanais']
________________________

In [53]:
len(gentiles1), len(gentiles2)

(1863, 22231)

In [57]:
len(gentiles_)

24094

In [63]:
set1 = json.load(open("Data/temp/gentile1_2_processed2.json"))
set2 = json.load(open("Data/temp/gentile2_2_processed2.json"))

In [64]:
len(set1)+len(set2)

24089

In [65]:
gentiles  = {k:v for d in [set1, set2] for k,v in d.items() }

In [68]:
import re
gentiles_

for com in gentiles:
    list_g = gentiles[com]
    result = set()
    print(com, list_g)
    for g in list_g:
        split_g = re.split("\s?,,?\s?", g)
        for split_g_ in split_g:  
            result.add(split_g_.lower())
    print(result)
    print("________________________________________________________________")
    result = list(result)
    gentiles_[com] = result

Saint-Génis-des-Fontaines ['Saint-Génisiennes', 'Saint-Génisiens']
{'saint-génisiennes', 'saint-génisiens'}
________________________________________________________________
Saint-Jean-Soleymieux ['Saint-Joinards', 'Saint-Joinardes']
{'saint-joinards', 'saint-joinardes'}
________________________________________________________________
Saint-Jeure-d'Ay ["Saint-Jeure-d'Ois"]
{"saint-jeure-d'ois"}
________________________________________________________________
Saint-Nauphary ['Saint-Nauphariens']
{'saint-nauphariens'}
________________________________________________________________
Saint-Privat-de-Vallongue ['Saint-Privatois']
{'saint-privatois'}
________________________________________________________________
Abbévillers ['Coucous']
{'coucous'}
________________________________________________________________
Ailleville ['Aillevillois']
{'aillevillois'}
________________________________________________________________
Ancerville ['Chevreuils', 'Ancervillois']
{'chevreuils', 'ancervillois'}

Saint-Masmes ['Saint-Masmaises', 'Saint-Masmais']
{'saint-masmais', 'saint-masmaises'}
________________________________________________________________
Saint-Matré ['Saint-Matréens']
{'saint-matréens'}
________________________________________________________________
Saint-Maur ['Saint-Mauriens', 'Saint-Mauriennes', 'Saint-Maurois']
{'saint-mauriens', 'saint-mauriennes', 'saint-maurois'}
________________________________________________________________
Saint-Maur-des-Bois ['Saint-Mauréen']
{'saint-mauréen'}
________________________________________________________________
Saint-Maur-des-Fossés ['Saint-Mauriens']
{'saint-mauriens'}
________________________________________________________________
Saint-Maurice-Montcouronne ['Saint-Mauriciens']
{'saint-mauriciens'}
________________________________________________________________
Saint-Maurice-Navacelles ['Saint-Mauriciens']
{'saint-mauriciens'}
________________________________________________________________
Saint-Maurice-d'Ardèche ['Saint-M

________________________________________________________________
Arboras ['Arborassiens']
{'arborassiens'}
________________________________________________________________
Arbot ['Arbeisiens']
{'arbeisiens'}
________________________________________________________________
Arbouet-Sussaute ['Arbotiar']
{'arbotiar'}
________________________________________________________________
Arboussols ['Arboussolois', 'Arboussoloises']
{'arboussoloises', 'arboussolois'}
________________________________________________________________
Arboys en Bugey ['Arbignolans']
{'arbignolans'}
________________________________________________________________
Arbrissel ['Arbrisselois']
{'arbrisselois'}
________________________________________________________________
Arbus ['Arbusiens']
{'arbusiens'}
________________________________________________________________
Arbéost ['Arbéostois']
{'arbéostois'}
________________________________________________________________
Arbérats-Sillègue ['Arberaztar']
{'arberaztar'}
_

________________________________________________________________
Bellenaves ['Bellenavois']
{'bellenavois'}
________________________________________________________________
Bellencombre ['Bellencombrais']
{'bellencombrais'}
________________________________________________________________
Bellenglise ['Bellenglisois']
{'bellenglisois'}
________________________________________________________________
Bellengreville ['Bellengrevillais']
{'bellengrevillais'}
________________________________________________________________
Bellentre ['Bellentrais']
{'bellentrais'}
________________________________________________________________
Belleray ['Bellerois']
{'bellerois'}
________________________________________________________________
Bellerive-sur-Allier ['Bellerivois']
{'bellerivois'}
________________________________________________________________
Belleroche ['Bellerochons']
{'bellerochons'}
________________________________________________________________
Belleserre ['Belleserriens']
{'belleser

{'brivezacois', 'brivezacoise'}
________________________________________________________________
Brix ['Brions']
{'brions'}
________________________________________________________________
Brizambourg ['Brizambourgeois']
{'brizambourgeois'}
________________________________________________________________
Brizeaux ['Brizeautins']
{'brizeautins'}
________________________________________________________________
Brizon ['Brisonniers']
{'brisonniers'}
________________________________________________________________
Brières-les-Scellés ['Briolins']
{'briolins'}
________________________________________________________________
Brié-et-Angonnes ['Briataux']
{'briataux'}
________________________________________________________________
Broc ['Brocois']
{'brocois'}
________________________________________________________________
Brocas ['Brocassais']
{'brocassais'}
________________________________________________________________
Brocourt ['Brocourtois']
{'brocourtois'}
____________________________

{'chartrongeais'}
________________________________________________________________
Chartuzac ['Chatuzacais']
{'chatuzacais'}
________________________________________________________________
Chartèves ['Chartévois']
{'chartévois'}
________________________________________________________________
Chas ['Chassois']
{'chassois'}
________________________________________________________________
Chasnais ['Chanaisien']
{'chanaisien'}
________________________________________________________________
Chasné-sur-Illet ['Chasnéens']
{'chasnéens'}
________________________________________________________________
Chassagne-Saint-Denis ['Chassagnaises', 'Chassagnais']
{'chassagnais', 'chassagnaises'}
________________________________________________________________
Chassaignes ['Cassanois']
{'cassanois'}
________________________________________________________________
Chassant ['chassantaise', 'chassantaises', 'Chassantais']
{'chassantaise', 'chassantaises', 'chassantais'}
______________________________

Creysse ['Creyssois']
{'creyssois'}
________________________________________________________________
Creysseilles ['Creysseilloux']
{'creysseilloux'}
________________________________________________________________
Cricquebœuf ['Cricquebœuvien']
{'cricquebœuvien'}
________________________________________________________________
Cricqueville-en-Bessin ['Cricquevillais']
{'cricquevillais'}
________________________________________________________________
Criel-sur-Mer ['Criélois']
{'criélois'}
________________________________________________________________
Crillon [': Crillonais', 'Crillonaises']
{': crillonais', 'crillonaises'}
________________________________________________________________
Crillon-le-Brave ['Crillonnaises', 'Crillonnais']
{'crillonnais', 'crillonnaises'}
________________________________________________________________
Criquebeuf-en-Caux ['Criquebeuviennes', 'Criquebeuviens']
{'criquebeuviennes', 'criquebeuviens'}
_______________________________________________________

Fauch ['Fauchoises', 'Fauchois']
{'fauchois', 'fauchoises'}
________________________________________________________________
Faucogney-et-la-Mer ['Falconiens']
{'falconiens'}
________________________________________________________________
Faucompierre ['Falconois']
{'falconois'}
________________________________________________________________
Faucon ['Fauconnais']
{'fauconnais'}
________________________________________________________________
Faucon-de-Barcelonnette ['Fauconnais']
{'fauconnais'}
________________________________________________________________
Faucon-du-Caire ['Fauconniers']
{'fauconniers'}
________________________________________________________________
Fauconcourt ['Falconicuriens']
{'falconicuriens'}
________________________________________________________________
Faucoucourt ['Foucaucourtois', 'Fucoldcurtissiens']
{'foucaucourtois', 'fucoldcurtissiens'}
________________________________________________________________
Faudoas ['Faudoasiens']
{'faudoasiens'}
________

Guermantes ['Guermantais']
{'guermantais'}
________________________________________________________________
Guern ['Guernate']
{'guernate'}
________________________________________________________________
Guernanville ['Guernanvillais']
{'guernanvillais'}
________________________________________________________________
Guerny ['Guernysien']
{'guernysien'}
________________________________________________________________
Guerpont ['Guerpontoises', 'Guerpontois']
{'guerpontoises', 'guerpontois'}
________________________________________________________________
Guerquesalles ['Guerquesallais']
{'guerquesallais'}
________________________________________________________________
Guerstling ['Guerslingeois', 'Guerslingeoises']
{'guerslingeoises', 'guerslingeois'}
________________________________________________________________
Guerting ['Guertingeois']
{'guertingeois'}
________________________________________________________________
Guerville ['Guervilloises', 'Guervillois']
{'guervillois', 'gu

________________________________________________________________
La Chapelle-Urée ['Chapelains']
{'chapelains'}
________________________________________________________________
La Chapelle-Vicomtesse ['Vicomtois']
{'vicomtois'}
________________________________________________________________
La Chapelle-Villars ['Chapelards']
{'chapelards'}
________________________________________________________________
La Chapelle-au-Mans ['Capellimansois']
{'capellimansois'}
________________________________________________________________
La Chapelle-au-Moine ['Capellois']
{'capellois'}
________________________________________________________________
La Chapelle-au-Riboul ['Chapellois']
{'chapellois'}
________________________________________________________________
La Chapelle-aux-Brocs ['Chapellois']
{'chapellois'}
________________________________________________________________
La Chapelle-aux-Choux ['Chapellois']
{'chapellois'}
________________________________________________________________
La C

Le Folgoët ['Folgoatiens', 'Folgoétiens']
{'folgoétiens', 'folgoatiens'}
________________________________________________________________
Le Fossat ['Fossatoises', 'Fossatois']
{'fossatoises', 'fossatois'}
________________________________________________________________
Le Fossé ['Fosséens']
{'fosséens'}
________________________________________________________________
Le Fouilloux ['Fouillousains']
{'fouillousains'}
________________________________________________________________
Le Fousseret ['Fousseretoises', 'Fousseretois']
{'fousseretois', 'fousseretoises'}
________________________________________________________________
Le Fraysse ['Frayssols']
{'frayssols'}
________________________________________________________________
Le Fresne ['Fresnois', 'Fresnoises']
{'fresnois', 'fresnoises'}
________________________________________________________________
Le Fresne-Camilly ['Fresnecamillien']
{'fresnecamillien'}
________________________________________________________________
Le Fresne-P

{'luppéens'}
________________________________________________________________
Lupsault ['Supisultins', 'Lupsaultiens']
{'supisultins', 'lupsaultiens'}
________________________________________________________________
Lupstein ['Lupsteinoises', 'Lupsteinois']
{'lupsteinoises', 'lupsteinois'}
________________________________________________________________
Lupé ['Lupéennes', 'Lupéens']
{'lupéennes', 'lupéens'}
________________________________________________________________
Luquet ['Luquetois']
{'luquetois'}
________________________________________________________________
Lurais ['Luraisiens']
{'luraisiens'}
________________________________________________________________
Lurcy ['Luperciens']
{'luperciens'}
________________________________________________________________
Lurcy-Lévis ['Lurcycois', 'Lurcyquois', 'Lurcyquoises', 'Lurcycoises']
{'lurcyquois', 'lurcyquoises', 'lurcycois', 'lurcycoises'}
________________________________________________________________
Lure ['Lurons']
{'lurons'}

________________________________________________________________
Moncetz-l'Abbaye ['Moncéens', 'Moncéennes']
{'moncéens', 'moncéennes'}
________________________________________________________________
Monchaux-Soreng ['Monchaliens', 'Monchaliennes']
{'monchaliennes', 'monchaliens'}
________________________________________________________________
Monchaux-sur-Écaillon ['Moncalciennes', 'Moncalciens']
{'moncalciens', 'moncalciennes'}
________________________________________________________________
Moncheux ['Moncheux']
{'moncheux'}
________________________________________________________________
Monchy-Breton ['Monchois']
{'monchois'}
________________________________________________________________
Monchy-Saint-Éloi ['Monchyssoises', 'Monchyssois']
{'monchyssoises', 'monchyssois'}
________________________________________________________________
Monchy-au-Bois ['Monciaquois']
{'monciaquois'}
________________________________________________________________
Monchy-sur-Eu ['Monchoises', 'Mon

Naussac ['Naussacois']
{'naussacois'}
________________________________________________________________
Naussannes ['Naussannais']
{'naussannais'}
________________________________________________________________
Nauviale ['Nauvialois']
{'nauvialois'}
________________________________________________________________
Naveil ['Naveillois']
{'naveillois'}
________________________________________________________________
Navenne ['Navennois', 'Navennoise']
{'navennois', 'navennoise'}
________________________________________________________________
Naves ['Navarois', 'Navaroises', 'Navois', 'Navoises']
{'navois', 'navaroises', 'navoises', 'navarois'}
________________________________________________________________
Nay ['Nayons', 'Nayais']
{'nayons', 'nayais'}
________________________________________________________________
Nayemont-les-Fosses ['Nayemontais', 'Nayemontaises']
{'nayemontais', 'nayemontaises'}
________________________________________________________________
Nazelles-Négron ['Nazel

{'pleumeurienne', 'pleumeurien'}
________________________________________________________________
Pleurs ['Pleuriots']
{'pleuriots'}
________________________________________________________________
Pleurtuit ['Pleurtuisien']
{'pleurtuisien'}
________________________________________________________________
Pleuven ['Pleuvennois']
{'pleuvennois'}
________________________________________________________________
Pleuvezain ['Pluvuisiens', 'Pluvuisiennes']
{'pluvuisiennes', 'pluvuisiens'}
________________________________________________________________
Pleuville ['Pleuvillois']
{'pleuvillois'}
________________________________________________________________
Pleyben ['Pleybennois']
{'pleybennois'}
________________________________________________________________
Pleyber-Christ ['Pleybériens']
{'pleybériens'}
________________________________________________________________
Plichancourt ['plichancurtiens']
{'plichancurtiens'}
________________________________________________________________
Pliv

{'ronquerollais', 'ronquerollaises'}
________________________________________________________________
Ronsenac ['Ronsenacois']
{'ronsenacois'}
________________________________________________________________
Ronssoy ['Ronssoyens', 'Ronssoyennes']
{'ronssoyens', 'ronssoyennes'}
________________________________________________________________
Ronthon ['Ronthonais']
{'ronthonais'}
________________________________________________________________
Ronvaux ['Ronvalois']
{'ronvalois'}
________________________________________________________________
Roppe ['Roppois']
{'roppois'}
________________________________________________________________
Roppenheim ['Roppenheimois', 'Roppenheimoises']
{'roppenheimois', 'roppenheimoises'}
________________________________________________________________
Roppeviller ['Roppevillerois']
{'roppevillerois'}
________________________________________________________________
Roquebillière ['Roquebilliérois']
{'roquebilliérois'}
_______________________________________

Saint-Suliac ['Suliaçais']
{'suliaçais'}
________________________________________________________________
Saint-Sulpice-d'Excideuil ['Saint Sulpiçois']
{'saint sulpiçois'}
________________________________________________________________
Saint-Sulpice-de-Grimbouville ['Grimbaldien']
{'grimbaldien'}
________________________________________________________________
Saint-Sulpice-de-Guilleragues ['Sulpiciens']
{'sulpiciens'}
________________________________________________________________
Saint-Sulpice-de-Mareuil ['Sulpiciens']
{'sulpiciens'}
________________________________________________________________
Saint-Sulpice-de-Pommeray ['Saint Sulpiciens']
{'saint sulpiciens'}
________________________________________________________________
Saint-Sulpice-de-Roumagnac ['Romagnosulpiciens']
{'romagnosulpiciens'}
________________________________________________________________
Saint-Sulpice-des-Rivoires ['Rivoirins']
{'rivoirins'}
________________________________________________________________
Sa

________________________________________________________________
Taussac-la-Billière ['Taussacois']
{'taussacois'}
________________________________________________________________
Tauxières-Mutry ['Tauxièrois']
{'tauxièrois'}
________________________________________________________________
Tavant ['Tavantais']
{'tavantais'}
________________________________________________________________
Tavaux ['Tavelloise', 'Tavelois', 'Tavellois', 'Taveloises']
{'tavellois', 'tavelloise', 'taveloises', 'tavelois'}
________________________________________________________________
Tavaux-et-Pontséricourt ['Tavelois']
{'tavelois'}
________________________________________________________________
Tavel ['Tavelois']
{'tavelois'}
________________________________________________________________
Taverny ['Tabernaciens']
{'tabernaciens'}
________________________________________________________________
Tavers ['Taversois']
{'taversois'}
________________________________________________________________
Tavey ['Ta

Vias ['Viassois']
{'viassois'}
________________________________________________________________
Vibersviller ['Vibersvillérois', 'Vibersvilléroises']
{'vibersvilléroises', 'vibersvillérois'}
________________________________________________________________
Vibeuf ['Vibeufais', 'Vibeufaises']
{'vibeufais', 'vibeufaises'}
________________________________________________________________
Vibrac ['Vibracois', 'Vibracais']
{'vibracois', 'vibracais'}
________________________________________________________________
Vibraye ['Vibraysien']
{'vibraysien'}
________________________________________________________________
Vic-Fezensac ['Vicois', 'Vicoises']
{'vicois', 'vicoises'}
________________________________________________________________
Vic-en-Bigorre ['Vicquois']
{'vicquois'}
________________________________________________________________
Vic-la-Gardiole ['Vicois']
{'vicois'}
________________________________________________________________
Vic-le-Comte ['Vicomtois']
{'vicomtois'}
___________

________________________________________________________________
Évry ['Évrytons']
{'évrytons'}
________________________________________________________________
Évry-Grégy-sur-Yerre ['Évryciens']
{'évryciens'}
________________________________________________________________
Ézanville ['Ézanvillois']
{'ézanvillois'}
________________________________________________________________
Ézy-sur-Eure ['Ézéen']
{'ézéen'}
________________________________________________________________
Île-Molène ['Molénais']
{'molénais'}
________________________________________________________________
Île-aux-Moines ['Îlois', 'Îloises']
{'îlois', 'îloises'}
________________________________________________________________
Île-d'Aix ['Aixois']
{'aixois'}
________________________________________________________________
Île-d'Arz ['Ildaraise', 'Ildarais']
{'ildaraise', 'ildarais'}
________________________________________________________________
Île-d'Houat ['Houatais']
{'houatais'}
__________________________________

In [69]:
with open("Data/wikidata_infobox_extracted_demonyms_final.json", "w") as fp:
    json.dump(gentiles_,fp)

In [30]:
sett= json.load(open("Data/temp/gentile1_2_processed.json"))

JSONDecodeError: Unexpected UTF-8 BOM (decode using utf-8-sig): line 1 column 1 (char 0)

In [61]:
import codecs

x = json.load(codecs.open('Data/temp/gentiles2_2_processed.json', 'r', 'utf-8-sig'))

In [62]:
with open("Data/temp/gentile2_2_processed2.json", "w") as fp:
    json.dump(x,fp)