Skip to content

Commit

Permalink
Faster routine
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffreyaldebert committed Mar 23, 2023
2 parents d2c5ea2 + 509ec22 commit fb3309d
Show file tree
Hide file tree
Showing 25 changed files with 310 additions and 150 deletions.
169 changes: 82 additions & 87 deletions csv_detective/detect_fields/FR/geo/adresse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,100 +5,95 @@

def _is(val):
'''Repere des adresses'''
voies = [
'Aire',
'Allée',
'Avenue',
'Base',
'Boulevard',
'Cami',
'Carrefour',
'Chemin',
'Cheminement',
'Chaussée',
'Cité',
'Clos',
'Coin',
'Corniche',
'Cote',
'Cour',
'Cours',
'Domaine',
'Descente',
'Ecart',
'Esplanade',
'Faubourg',
'Gare',
# ajouts d'espaces en fin de mots pour s'assurer que le str n'est pas juste une substr d'un mot plus long
voies = {
'Aire ',
'Allee ',
'Avenue ',
'Base ',
'Boulevard ',
'Cami ',
'Carrefour ',
'Chemin ',
'Cheminement ',
'Chaussee ',
'Cite ',
'Clos ',
'Coin ',
'Corniche ',
'Cote ',
'Cour ',
'Cours ',
'Domaine ',
'Descente ',
'Ecart ',
'Esplanade ',
'Faubourg ',
'Gare ',
'Grande Rue',
'Hameau',
'Halle',
'Ilôt',
'Impasse',
'Hameau ',
'Halle ',
'Ilot ',
'Impasse ',
'Lieu dit',
'Lotissement',
'Marché',
'Montée',
'Parc',
'Passage',
'Place',
'Plan',
'Plaine',
'Plateau',
'Pont',
'Port',
'Promenade',
'Parvis',
'Quartier',
'Quai',
'Résidence',
'Ruelle',
'Rocade',
'Lotissement ',
'Marche ',
'Montee ',
'Parc ',
'Passage ',
'Place ',
'Plan ',
'Plaine ',
'Plateau ',
'Pont ',
'Port ',
'Promenade ',
'Parvis ',
'Quartier ',
'Quai ',
'Residence ',
'Ruelle ',
'Rocade ',
'Rond Point',
'Route',
'Rue',
'Route ',
'Rue ',
'Sente - Sentier',
'Square',
'Tour',
'Square ',
'Tour ',
'Terre-plein',
'Traverse',
'Villa',
'Village',
'Voie',
'Traverse ',
'Villa ',
'Village ',
'Voie ',
'Zone artisanale',
'Zone d’aménagement concerté',
'Zone d’aménagement différé',
'Zone d’amenagement concerte',
'Zone d’amenagement differe',
'Zone industrielle',
'Zone',
'r',
'av',
'pl',
'bd',
'cami',
'che',
'chs',
'dom',
'ham',
'ld',
'pro',
'rte',
'vlge',
'za',
'zac',
'zad',
'zi',
'car',
'fg',
'lot',
'imp',
'qu',
'Zone ',
# 'r',
'av ',
'pl ',
'bd ',
'cami ',
# 'che',
'chs ',
'dom ',
'ham ',
'ld ',
# 'pro',
# 'rte',
'vlge ',
'za ',
'zac ',
'zad ',
'zi ',
# 'car',
'fg ',
# 'lot',
'imp ',
# 'qu',
'mte'
]
}

val = _process_text(val)
arrval = val.split(' ')
match = False
for val in arrval:
a = any([val == x.lower() for x in voies])
if a:
match = True
return match
return any([x.lower() in val for x in voies])
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
PROPORTION = 0.75
f = open(join(dirname(__file__), 'code_commune_insee.txt'), 'r')
codes_insee = f.read().split('\n')
# removing empty str du to additionnal line in file
del codes_insee[-1]
codes_insee = set(codes_insee)
f.close()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

def _is(val):
'''Renvoie True si val peut être un code_département, False sinon'''
liste_des_dep = [str(x).zfill(2) for x in range(1, 20)] + \
['2A', '2B', '971', '972', '973', '974', '976', '2a', '2b'] + \
[str(x) for x in range(21, 96)]
liste_des_dep = {str(x).zfill(2) for x in range(1, 20)} | \
{'2A', '2B', '984', '986', '987', '988', '989', '2a', '2b'} | \
{str(x) for x in range(21, 96)} | \
{str(x) for x in range(971, 979)}
return val in liste_des_dep
7 changes: 5 additions & 2 deletions csv_detective/detect_fields/FR/geo/code_postal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

PROPORTION = 0.9
f = open(join(dirname(__file__), 'code_postal.txt'), 'r')
codes_postal = f.read().split('\n')
codes_postaux = f.read().split('\n')
# removing empty str du to additionnal line in file
del codes_postaux[-1]
codes_postaux = set(codes_postaux)
f.close()


Expand All @@ -14,4 +17,4 @@ def _is(val):
if not bool(re.match(regex, val)):
return False

return val in codes_postal
return val in codes_postaux
4 changes: 2 additions & 2 deletions csv_detective/detect_fields/FR/geo/code_region/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def _is(val):
'''Renvoie True si val peut être un code_région, False sinon'''
liste_regions = [
liste_regions = {
'01',
'02',
'03',
Expand All @@ -23,5 +23,5 @@ def _is(val):
'84',
'93',
'94'
]
}
return val in liste_regions
3 changes: 3 additions & 0 deletions csv_detective/detect_fields/FR/geo/commune/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
PROPORTION = 0.9
f = open(join(dirname(__file__), 'commune.txt'), 'r')
codes_commune = f.read().split('\n')
# removing empty str du to additionnal line in file
del codes_commune[-1]
codes_commune = set(codes_commune)
f.close()


Expand Down
3 changes: 3 additions & 0 deletions csv_detective/detect_fields/FR/geo/departement/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
PROPORTION = 0.9
f = open(join(dirname(__file__), 'departement.txt'), 'r')
codes_departement = f.read().split('\n')
# removing empty str du to additionnal line in file
del codes_departement[-1]
codes_departement = set(codes_departement)
f.close()


Expand Down
3 changes: 3 additions & 0 deletions csv_detective/detect_fields/FR/geo/insee_canton/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
PROPORTION = 0.9
f = open(join(dirname(__file__), 'cantons.txt'), 'r')
cantons = f.read().split('\n')
# removing empty str du to additionnal line in file
del cantons[-1]
cantons = set(cantons)
f.close()


Expand Down
1 change: 1 addition & 0 deletions csv_detective/detect_fields/FR/geo/pays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
PROPORTION = 0.6
f = open(join(dirname(__file__), 'pays.txt'), 'r')
pays = f.read().split('\n')
pays = set(pays)
f.close()


Expand Down
11 changes: 7 additions & 4 deletions csv_detective/detect_fields/FR/geo/region/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
from csv_detective.process_text import _process_text

PROPORTION = 1
f = open(join(dirname(__file__), 'region.txt'), 'r')
regions = f.read().split('\n')
# removing empty str du to additionnal line in file
del regions[-1]
regions = set(regions)
f.close()


def _is(val):
'''Match avec le nom des regions'''
f = open(join(dirname(__file__), 'region.txt'), 'r')
liste = f.read().split('\n')
f.close()
val = _process_text(val)
return val in liste
return val in regions
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
def _is(val):
'''Repère les code csp telles que définies par l'INSEE'''
val = _process_text(val)
if not len(val) == 4:
if len(val) != 4:
return False
a = bool(re.match(r'^[123456][0-9]{2}[abcdefghijkl]$', val))
b = val in [
b = val in {
'7100',
'7200',
'7400',
Expand All @@ -22,5 +22,5 @@ def _is(val):
'8400',
'8500',
'8600'
]
}
return a or b
3 changes: 3 additions & 0 deletions csv_detective/detect_fields/FR/other/csp_insee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
PROPORTION = 1
f = open(join(dirname(__file__), 'csp_insee.txt'), 'r')
codes_insee = f.read().split('\n')
# removing empty str du to additionnal line in file
del codes_insee[-1]
codes_insee = set(codes_insee)
f.close()


Expand Down
3 changes: 3 additions & 0 deletions csv_detective/detect_fields/FR/other/insee_ape700/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
PROPORTION = 1
f = open(join(dirname(__file__), 'insee_ape700.txt'), 'r')
condes_insee_ape = f.read().split('\n')
# removing empty str du to additionnal line in file
del condes_insee_ape[-1]
condes_insee_ape = set(condes_insee_ape)
f.close()


Expand Down
2 changes: 1 addition & 1 deletion csv_detective/detect_fields/FR/other/sexe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
def _is(val):
'''Repère le sexe'''
val = _process_text(val)
return val in ['homme', 'femme', 'h', 'f', 'm', 'masculin', 'feminin']
return val in {'homme', 'femme', 'h', 'f', 'm', 'masculin', 'feminin'}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
def _is(val):
'''Renvoie True si les champs peuvent être des jours de la semaine'''
val = val.lower()
jours = [
jours = {
'lundi',
'mardi',
'mercredi',
Expand All @@ -19,5 +19,5 @@ def _is(val):
'ven',
'sam',
'dim'
]
}
return val in jours
4 changes: 2 additions & 2 deletions csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def _is(val):
'''Renvoie True si les champs peuvent être des mois de l'année'''
val = unidecode(val.lower())
mois = [
mois = {
'janvier',
'fevrier',
'mars',
Expand All @@ -33,5 +33,5 @@ def _is(val):
'oct',
'nov',
'dec'
]
}
return val in mois
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

with open(join(dirname(__file__), 'iso_country_code_alpha2.txt'), 'r') as iofile:
liste_pays = iofile.read().split('\n')
liste_pays = set(liste_pays)


def _is(val):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def _is(val):
regex = r'[A-Z]{3}$'
if not bool(re.match(regex, val)):
return False
return val in liste_pays
return val in set(liste_pays)
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

with open(join(dirname(__file__), 'iso_country_code_numeric.txt'), 'r') as iofile:
liste_pays = iofile.read().split('\n')
liste_pays = set(liste_pays)


def _is(val):
Expand Down

0 comments on commit fb3309d

Please sign in to comment.