In [1]:
import logging
import pandas as pd
from iso3166 import countries

from csv import QUOTE_NONNUMERIC
from unicef_schools_attribute_cleaning.pandas.standardize_column_names import standardize_column_names
from unicef_schools_attribute_cleaning.pandas.dataframe_cleaner import dataframe_cleaner

# make INFO logs visible
logging.basicConfig(level=logging.INFO)

In [2]:
src_df = pd.read_csv('../../data/UNICE_schools_raw_2020_Jun/HN-UNICEF-0-government-0.csv', low_memory=False)
# src_df = pd.read_csv('../../data/unicef_fixed/honduras_original_uuid.csv', low_memory=False)

src_df

Unnamed: 0,admin2,admin3,school_id,name,educ_level,environment,address,lon,lat,admin4,speed_connectivity
0,Atlántida,La Ceiba,10100001,ESCUELA GUADALUPE DE QUEZADA,Básica,Urbano,"BARRIO EL CENTRO, AVENIDA SAN ISIDRO",-91.48873,0.00001,,
1,Atlántida,La Ceiba,10100002,JOSE CECILIO DEL VALLE,Básica,Urbano,BELLA VISTA,-86.78323,15.77059,,
2,Atlántida,La Ceiba,10100004,AUGUSTO C COELLO,Básica,Urbano,B INGLES,-86.79567,15.78602,LA CEIBA,0.0
3,Atlántida,La Ceiba,10100005,JOSE TRINIDAD CABAÑAS,Básica,Urbano,LA MERCED,-86.78438,15.77758,LA CEIBA,0.0
4,Atlántida,La Ceiba,10100006,GUSTAVO A CASTAÑEDA,Básica,Urbano,B MEJIA,-86.79670,15.77411,LA CEIBA,0.0
...,...,...,...,...,...,...,...,...,...,...,...
17587,,,80100113,,,,,,,DISTRITO CENTRAL,4.0
17588,,,80100203,,,,,,,DISTRITO CENTRAL,4.0
17589,,,150300138,,,,,,,CATACAMAS,4.0
17590,,,150300038,,,,,,,LAS MESETAS,4.0


In [3]:
country = countries.get('HN')
df = dataframe_cleaner(dataframe=src_df, country=country)
df

INFO:unicef_schools_attribute_cleaning.pandas.dataframe_cleaner:copying dataframe...
INFO:unicef_schools_attribute_cleaning.pandas.dataframe_cleaner:standardizing column names...
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:uuid column not found, generating uuid4
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:adding 33 columns from schema: ['admin0',
 'admin1',
 'admin_code',
 'admin_id',
 'address2',
 'phone_number',
 'person_contact',
 'email',
 'postal_code',
 'altitude',
 'gps_confidence',
 'date',
 'num_students',
 'num_teachers',
 'connectivity',
 'type_connectivity',
 'latency_connectivity',
 'availability_connectivity',
 'num_computers',
 'type_school',
 'num_classrooms',
 'num_sections',
 'water',
 'electricity',
 'num_latrines',
 'description',
 'last_update',
 'tower_dist',
 'tower_type_service',
 'tower_type',
 'tower_code',
 'tower_latitude',
 'tower_longitude']
INFO:unicef_schools_attribute_cleaning.pandas.standardize_colu

Unnamed: 0,country_code,admin0,admin1,admin2,admin3,admin4,admin_code,admin_id,name,address,...,description,last_update,tower_dist,tower_type_service,tower_type,tower_code,tower_latitude,tower_longitude,is_private,uuid
0,HN,,,,La Ceiba,,,"3.6,HND,GID_2=nan",ESCUELA GUADALUPE DE QUEZADA,"BARRIO EL CENTRO, AVENIDA SAN ISIDRO",...,,,,,,,,,True,e21221b8-24ea-42b8-a90c-e7dd13f70dfe
1,HN,Honduras,Atlántida,La Ceiba,La Ceiba,,HND.1.5_1,"3.6,HND,GID_2=HND.1.5_1",JOSE CECILIO DEL VALLE,BELLA VISTA,...,,,,,,,,,True,73a0a729-bc01-4acc-8f51-c6b1fed2a1bd
2,HN,Honduras,Atlántida,La Ceiba,La Ceiba,LA CEIBA,HND.1.5_1,"3.6,HND,GID_2=HND.1.5_1",AUGUSTO C COELLO,B INGLES,...,,,,,,,,,True,c69954da-47e3-4040-9b0b-3623249cb6b5
3,HN,Honduras,Atlántida,La Ceiba,La Ceiba,LA CEIBA,HND.1.5_1,"3.6,HND,GID_2=HND.1.5_1",JOSE TRINIDAD CABAÑAS,LA MERCED,...,,,,,,,,,True,69f76739-3366-4ea5-89ed-cfdee35acf8b
4,HN,Honduras,Atlántida,La Ceiba,La Ceiba,LA CEIBA,HND.1.5_1,"3.6,HND,GID_2=HND.1.5_1",GUSTAVO A CASTAÑEDA,B MEJIA,...,,,,,,,,,True,0e047fee-8e47-43ca-815c-9880eb72058f
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17539,HN,Honduras,Yoro,Yorito,Yorito,,HND.18.10_1,"3.6,HND,GID_2=HND.18.10_1",RENE ALFREDO CHACON,"ALDEA LA ESPERANZA, CASERIO LOS HIGUERITOS, BA...",...,,,,,,,,,True,e7509b78-a4f3-42be-a7bb-476576cb94cd
17540,HN,Honduras,Yoro,Yorito,Yorito,,HND.18.10_1,"3.6,HND,GID_2=HND.18.10_1",LEMPIRA,"ALDEA LUQUIGUE, CASERIO LAGUNITA",...,,,,,,,,,True,cdac804f-e4d7-4433-9b4a-b4c29084964f
17541,HN,Honduras,Yoro,Yorito,Yorito,,HND.18.10_1,"3.6,HND,GID_2=HND.18.10_1",RENACER,"ALDEA LA ESPERANZA, CASERIO HIGUERO QUEMADO",...,,,,,,,,,True,598cbf85-f0cc-4b88-828c-65e7d68925b1
17542,HN,Honduras,Yoro,Yorito,Yorito,,HND.18.10_1,"3.6,HND,GID_2=HND.18.10_1",MANUEL DE JESUS CANTILLANO,COL.NUEVO PARAISO,...,,,,,,,,,True,842330f5-2b8a-4c40-902c-1a8a56b23ce8


In [4]:
df.to_csv('honduras_cleaned.csv', quoting=QUOTE_NONNUMERIC, index=False)

In [5]:
# open in LibreOffice, Excel, other
!open honduras_cleaned.csv