In [1]:
import logging
import pandas as pd
from iso3166 import countries

from csv import QUOTE_NONNUMERIC
from unicef_schools_attribute_cleaning.pandas.dataframe_cleaner import dataframe_cleaner

# make INFO logs visible
logging.basicConfig(level=logging.INFO)

In [2]:
src_df = pd.read_csv('../../data/UNICE_schools_raw_2020_Jun/liquid_schools.csv', low_memory=False)
src_df

Unnamed: 0,Name of School,Province,District,County,Latitude,Longitude,Distance(Km)
0,SOUTHEND ACADEMY,WESTERN,BUNGOMA,BUNGOMA,0.562360,34.561880,0.000038
1,JOY SHIRU PRI SCH,NAIROBI,NAIROBI,NAIROBI,-1.265244,36.750676,0.000144
2,ST EDWARDS HIGH SCH,NAIROBI,STAREHE,,-1.272549,36.822084,0.000158
3,NAIVASHA PRI BOARDING,RIFT VALLEY,NAKURU,NAKURU,-0.704300,36.435070,0.000195
4,MLOLONGO PRI SCH,EASTERN,MACHAKOS,MACHAKOS,-1.388401,36.935328,0.000218
...,...,...,...,...,...,...,...
32480,DAVA INTEGRATED PRI SCH,NORTH EASTERN,MANDERA,MANDERA,3.953030,41.860900,401.815377
32481,DAVA SPECIAL UNIT,NORTH EASTERN,MANDERA,MANDERA,3.953030,41.860900,401.815377
32482,BORDER VIEW ACADEMY PRI SCH,NORTH EASTERN,MANDERA,MANDERA,3.942400,41.867270,402.275520
32483,BURUBURU PRI SCH,NORTH EASTERN,MANDERA,MANDERA,3.947800,41.868850,402.566618


In [3]:
country = countries.get('KE')
with open('kenya_liquid_columns_report.txt', mode='w', encoding='utf-8') as filehandle:
    df = dataframe_cleaner(
        dataframe=src_df,
        country=country,
        removed_columns_report=filehandle,
        is_private=True,
        provider='LIQUID',
        provider_is_private=True
    )
df

INFO:unicef_schools_attribute_cleaning.pandas.dataframe_cleaner:copying dataframe...
INFO:unicef_schools_attribute_cleaning.pandas.dataframe_cleaner:standardizing column names...
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:uuid column not found, generating uuid4
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:renaming columns: {'District': 'admin3',
 'Latitude': 'lat',
 'Longitude': 'lon',
 'Name of School': 'name',
 'Province': 'admin2'}
INFO:unicef_schools_attribute_cleaning.pandas.standardize_column_names:adding 40 columns from schema: ['admin0',
 'admin1',
 'admin4',
 'admin_code',
 'admin_id',
 'address',
 'address2',
 'phone_number',
 'person_contact',
 'email',
 'postal_code',
 'altitude',
 'gps_confidence',
 'date',
 'num_students',
 'num_teachers',
 'connectivity',
 'type_connectivity',
 'speed_connectivity',
 'latency_connectivity',
 'availability_connectivity',
 'num_computers',
 'type_school',
 'educ_level',
 'environment',


Unnamed: 0,country_code,admin0,admin1,admin2,admin3,admin4,admin_code,admin_id,name,address,...,tower_dist,tower_type_service,tower_type,tower_code,tower_latitude,tower_longitude,is_private,is_invalid,is_invalid_reason,uuid
0,KE,Kenya,Bungoma,Kanduyi,Khalaba,,KEN.3.3.4_1,"3.6,KEN,GID_3=KEN.3.3.4_1",SOUTHEND ACADEMY,,...,,,,,,,True,False,,228efe38-e81b-42e0-a0eb-faac5b0147e7
1,KE,Kenya,Nairobi,Westlands,Kangemi,,KEN.30.17.1_1,"3.6,KEN,GID_3=KEN.30.17.1_1",JOY SHIRU PRI SCH,,...,,,,,,,True,False,,e4162e9e-9b5d-4170-9a29-612d71ed9cb4
2,KE,Kenya,Nairobi,Starehe,Ngara,,KEN.30.16.4_1,"3.6,KEN,GID_3=KEN.30.16.4_1",ST EDWARDS HIGH SCH,,...,,,,,,,True,False,,36609791-0663-4568-a303-93cb46f64198
3,KE,Kenya,Nakuru,Naivasha,Viwandani,,KEN.31.6.8_1,"3.6,KEN,GID_3=KEN.31.6.8_1",NAIVASHA PRI BOARDING,,...,,,,,,,True,False,,d081c932-c748-47d6-8c22-0b15aa931f89
4,KE,Kenya,Machakos,Mavoko,Syokimau/Mulolongo,,KEN.22.6.4_1,"3.6,KEN,GID_3=KEN.22.6.4_1",MLOLONGO PRI SCH,,...,,,,,,,True,False,,35f4c5e6-2bd1-4292-8b72-f86dbe156409
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32480,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",DAVA INTEGRATED PRI SCH,,...,,,,,,,True,False,,ea343fe7-0fa3-4f84-a5a1-1bd0af2b58b6
32481,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",DAVA SPECIAL UNIT,,...,,,,,,,True,False,,5f3a6c66-a20d-47aa-97bb-1d0d7eedaff3
32482,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",BORDER VIEW ACADEMY PRI SCH,,...,,,,,,,True,False,,32d9f59c-91da-4199-bb01-2a742b987f50
32483,KE,Kenya,Mandera,Mandera East,Township,,KEN.24.3.6_1,"3.6,KEN,GID_3=KEN.24.3.6_1",BURUBURU PRI SCH,,...,,,,,,,True,False,,7bfbe04d-00b6-4f62-8bf3-188b04c756cc


In [4]:
df.to_csv('kenya_liquid_cleaned.csv', quoting=QUOTE_NONNUMERIC, index=False)

In [5]:
# open in LibreOffice, Excel, other
!open kenya_liquid_cleaned.csv