In [1]:
import logging
import pandas as pd
from iso3166 import countries

from csv import QUOTE_NONNUMERIC
from unicef_schools_attribute_cleaning.pandas.dataframe_cleaner import dataframe_cleaner

# make INFO logs visible
logging.basicConfig(level=logging.INFO)

In [2]:
src_df = pd.read_csv('../../data/UNICE_schools_raw_2020_Jun/liquid_schools.csv', low_memory=False)
src_df

Unnamed: 0,Name of School,Province,District,County,Latitude,Longitude,Distance(Km)
0,SOUTHEND ACADEMY,WESTERN,BUNGOMA,BUNGOMA,0.562360,34.561880,0.000038
1,JOY SHIRU PRI SCH,NAIROBI,NAIROBI,NAIROBI,-1.265244,36.750676,0.000144
2,ST EDWARDS HIGH SCH,NAIROBI,STAREHE,,-1.272549,36.822084,0.000158
3,NAIVASHA PRI BOARDING,RIFT VALLEY,NAKURU,NAKURU,-0.704300,36.435070,0.000195
4,MLOLONGO PRI SCH,EASTERN,MACHAKOS,MACHAKOS,-1.388401,36.935328,0.000218
...,...,...,...,...,...,...,...
32480,DAVA INTEGRATED PRI SCH,NORTH EASTERN,MANDERA,MANDERA,3.953030,41.860900,401.815377
32481,DAVA SPECIAL UNIT,NORTH EASTERN,MANDERA,MANDERA,3.953030,41.860900,401.815377
32482,BORDER VIEW ACADEMY PRI SCH,NORTH EASTERN,MANDERA,MANDERA,3.942400,41.867270,402.275520
32483,BURUBURU PRI SCH,NORTH EASTERN,MANDERA,MANDERA,3.947800,41.868850,402.566618


In [3]:
country = countries.get('KE')
df = dataframe_cleaner(
    dataframe=src_df,
    country=country,
    is_private=True,
    provider='LIQUID',
    provider_is_private=True
)
df

INFO:unicef_schools_attribute_cleaning.pandas.dataframe_cleaner:copying dataframe...
INFO:unicef_schools_attribute_cleaning.pandas.dataframe_cleaner:standardizing column names...
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:uuid column not found, generating uuid4
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:renaming columns: {'District': 'admin3',
 'Latitude': 'lat',
 'Longitude': 'lon',
 'Name of School': 'name',
 'Province': 'admin2'}
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:adding 38 columns from schema: ['admin0',
 'admin1',
 'admin4',
 'admin_code',
 'admin_id',
 'address',
 'address2',
 'phone_number',
 'person_contact',
 'email',
 'postal_code',
 'altitude',
 'gps_confidence',
 'date',
 'num_students',
 'num_teachers',
 'connectivity',
 'type_connectivity',
 'speed_connectivity',
 'latency_connectivity',
 'availability_connectivity',
 'num_computers',
 'type_school',
 'educ_level',
 'environment',


Unnamed: 0,country_code,admin0,admin1,admin2,admin3,admin4,admin_code,admin_id,name,address,...,description,last_update,tower_dist,tower_type_service,tower_type,tower_code,tower_latitude,tower_longitude,is_private,uuid
0,KE,Kenya,Bungoma,Kanduyi,Khalaba,,KEN.3.3.4_1,"3.6,KEN,GID_3=KEN.3.3.4_1",SOUTHEND ACADEMY,,...,,,,,,,,,True,7a7fe5d9-107c-43c2-9f27-2f0bb4c2c61d
1,KE,Kenya,Nairobi,Westlands,Kangemi,,KEN.30.17.1_1,"3.6,KEN,GID_3=KEN.30.17.1_1",JOY SHIRU PRI SCH,,...,,,,,,,,,True,d493c470-d941-4d92-896a-eb204d93a738
2,KE,Kenya,Nairobi,Starehe,Ngara,,KEN.30.16.4_1,"3.6,KEN,GID_3=KEN.30.16.4_1",ST EDWARDS HIGH SCH,,...,,,,,,,,,True,d4cf87ae-bbbd-4c4b-8acf-c7a13392adc2
3,KE,Kenya,Nakuru,Naivasha,Viwandani,,KEN.31.6.8_1,"3.6,KEN,GID_3=KEN.31.6.8_1",NAIVASHA PRI BOARDING,,...,,,,,,,,,True,5325fc41-1639-4eb0-8e03-9b3480b1b8fa
4,KE,Kenya,Machakos,Mavoko,Syokimau/Mulolongo,,KEN.22.6.4_1,"3.6,KEN,GID_3=KEN.22.6.4_1",MLOLONGO PRI SCH,,...,,,,,,,,,True,bcd6f786-cb21-40f8-8040-cca803583bf2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32480,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",DAVA INTEGRATED PRI SCH,,...,,,,,,,,,True,8eff753a-2745-4dd7-b5ff-e594fd280ae4
32481,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",DAVA SPECIAL UNIT,,...,,,,,,,,,True,5fbdd742-5a8a-458b-a1c1-58269077ff4c
32482,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",BORDER VIEW ACADEMY PRI SCH,,...,,,,,,,,,True,21ab1c4a-c268-4355-a054-79d3fb39f270
32483,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",BURUBURU PRI SCH,,...,,,,,,,,,True,3ccb1991-017a-47a2-9af0-ec75a8525a9e


In [4]:
df.to_csv('kenya_liquid_cleaned.csv', quoting=QUOTE_NONNUMERIC, index=False)

In [5]:
# open in LibreOffice, Excel, other
!open kenya_liquid_cleaned.csv