In [118]:
import numpy as np
import pandas as pd
from datetime import datetime

In [87]:
dtype_contacts = {'facility_uid': int,'phone_number':str,'alternate_number':str, 'email_address':str, 'website':str}
dtype_identifiers = {'facility_uid': int,'facility_code':str, 'state_unique_id':str,'registration_no':str,'facility_name':str,
                    'alternate_name':str,'start_date':str,'ownership':str,'ownership_type':str,'facility_level':str,'facility_level_option':str,
                    'days_of_operation':str,'hours_of_operation':str}
dtype_locations ={'facility_uid': int,'state':str, 'lga':str,'ward':str,'physical_location':str,'postal_address':str,
                 'longitude':np.float64, 'latitude':np.float64}
dtype_pages = {'state':str, 'lga':str,'ward':str,'facility_uid': int,'facility_code':str,'facility_name':str,'facility_level':str,'ownership':str }
dtype_personnel = {
    "facility_uid": int,
                "num_of_docs": np.float64, "num_of_pharms": np.float64,
                "num_of_midwifes": np.float64, "num_of_nurses": np.float64,
                "num_of_nurse_midwife": np.float64, "num_of_pharm_technicians": np.float64,
                "num_of_dentists": np.float64, "num_of_health_attendants": np.float64,
                "num_of_env_health_officers": np.float64, "num_of_him_officers":np.float64,
                "num_of_community_health_officer": np.float64,
                "num_of_jun_community_extension_worker": np.float64,
                "num_of_community_extension_workers": np.float64,
                "num_of_dental_technicians": np.float64,
                "num_of_lab_technicians": np.float64,
                "num_of_lab_scientists": np.float64
}

dtype_services = {
    "facility_uid": int,
                "outpatient_service": str, "ambulance_services": str,
                "mortuary_services": str, "onsite_imaging": str,
                "onsite_pharmarcy": str, "onsite_laboratory": str,
                "tot_num_beds": int, "special_service": str,
                "dental_service": str, "pediatrics_service": str,
                "gynecology_service": str, "surgical_service": str,
                "medical_service": str, "inpatient_service": str
}

dtype_status = {"facility_uid": int,
            "operation_status": str,
            "registration_status": str,
            "license_status":str}

### Import the some files and remove NAN

In [88]:
iden_df = pd.read_csv('identifiers.csv')
iden_df.facility_uid = iden_df.facility_uid.fillna(0)
iden_df.to_csv('identifiers2.csv')

per_df = pd.read_csv('personnel.csv')
per_df.facility_uid = per_df.facility_uid.fillna(0)
per_df.to_csv('personnel2.csv')

serv_df = pd.read_csv('services.csv')
serv_df.tot_num_beds = serv_df.tot_num_beds.fillna(0)
serv_df.to_csv('services2.csv')

## import all files

In [89]:
contacts_df = pd.read_csv('contacts.csv', usecols=dtype_contacts.keys(), dtype=dtype_contacts)
identifiers_df = pd.read_csv('identifiers2.csv', usecols=dtype_identifiers.keys(), dtype=dtype_identifiers)
locations_df = pd.read_csv('locations.csv', usecols=dtype_locations.keys(), dtype=dtype_locations)
pages_df = pd.read_csv('page_rows.csv', usecols=dtype_pages.keys(), dtype=dtype_pages)
personnel_df = pd.read_csv('personnel2.csv', usecols=dtype_personnel.keys(), dtype=dtype_personnel)
services_df = pd.read_csv('services2.csv', usecols=dtype_services.keys(), dtype=dtype_services)
status_df = pd.read_csv('status.csv', usecols=dtype_status.keys(), dtype=dtype_status)

## Adding some Transformation

In [90]:
personnel_df = personnel_df.fillna(0)
personnel_df = personnel_df.astype(int)

In [91]:
contacts_df.head(2)

Unnamed: 0,facility_uid,phone_number,alternate_number,email_address,website
0,48319756,0080-338-0170,,,
1,26456575,0080-330-3474,0081-274-7084,,


In [92]:
identifiers_df.head(2)

Unnamed: 0,facility_uid,facility_code,state_unique_id,registration_no,facility_name,alternate_name,start_date,ownership,ownership_type,facility_level,facility_level_option,days_of_operation,hours_of_operation
0,48319756,01/01/1/1/2/0001,,,Aba Holy Wounds Hospital,,2003-10-05,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
1,26456575,01/01/1/1/2/0025,,,Euna Hospital,,2008-12-03,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",


In [95]:
locations_df.head(2)

Unnamed: 0,facility_uid,state,lga,ward,physical_location,postal_address,longitude,latitude
0,48319756,Abia,Aba North,Ariaria,,,7.34134,5.11445
1,26456575,Abia,Aba North,Ariaria,,,7.35312,5.12019


In [93]:
services_df.head(2)

Unnamed: 0,facility_uid,outpatient_service,ambulance_services,mortuary_services,onsite_imaging,onsite_pharmarcy,onsite_laboratory,tot_num_beds,special_service,dental_service,pediatrics_service,gynecology_service,surgical_service,medical_service,inpatient_service
0,48319756,Yes,Yes,,Yes,Yes,Yes,14,,,,,,,Yes
1,26456575,Yes,No,,,Yes,Yes,17,,,,,,,Yes


In [94]:
personnel_df.head(2)

Unnamed: 0,facility_uid,num_of_docs,num_of_pharms,num_of_midwifes,num_of_nurses,num_of_nurse_midwife,num_of_pharm_technicians,num_of_dentists,num_of_health_attendants,num_of_env_health_officers,num_of_him_officers,num_of_community_health_officer,num_of_jun_community_extension_worker,num_of_community_extension_workers,num_of_dental_technicians,num_of_lab_technicians,num_of_lab_scientists
0,48319756,3,0,1,2,2,1,0,0,0,0,2,1,1,0,1,1
1,26456575,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0


In [96]:
pages_df.head(2)

Unnamed: 0,state,lga,ward,facility_uid,facility_code,facility_name,facility_level,ownership
0,Abia,Aba North,Ariaria,48319756,01/01/1/1/2/0001,Aba Holy Wounds Hospital,Secondary,Private
1,Abia,Aba North,Ariaria,26456575,01/01/1/1/2/0025,Euna Hospital,Secondary,Private


In [98]:
pages_df.shape

(22368, 8)

In [97]:
status_df.head(2)

Unnamed: 0,facility_uid,operation_status,registration_status,license_status
0,48319756,Operational,Registered,Licensed
1,26456575,Operational,Registered,Licensed


#### Adding Region to the location dataframe -- Geo-political Zone

In [112]:
def get_region(state):
    region = ''
    if state in ["Benue", "FCT", "Kogi", "Kwara", "Nasarawa", "Niger", "Plateau"]:
        region =  "North Central"
    elif state in ["Adamawa", "Bauchi", "Borno", "Gombe", "Taraba", "Yobe"]:
        region =  "North East"
    elif state in ["Kaduna", "Katsina", "Kano", "Kebbi", "Sokoto", "Jigawa","Zamfara"]:
        region =  "North West"
    elif state in ["Abia", "Anambra", "Ebonyi", "Enugu", "Imo"]:
        region =  "South East"
    elif state in ["Akwa Ibom", "Bayelsa", "Cross River", "Delta", "Edo", "Rivers"]:
        region =  "South South"
    elif state in ["Ekiti", "Lagos", "Osun", "Ondo", "Ogun", "Oyo"]:
        region =  "South West"
    
    return region

In [113]:
locations_df['Region'] = locations_df['state'].apply(get_region)

In [114]:
locations_df

Unnamed: 0,facility_uid,state,lga,ward,physical_location,postal_address,longitude,latitude,Region
0,48319756,Abia,Aba North,Ariaria,,,7.341340,5.114450,South East
1,26456575,Abia,Aba North,Ariaria,,,7.353120,5.120190,South East
2,73938255,Abia,Aba North,Ariaria,,,7.345520,5.115906,South East
3,45969224,Abia,Aba North,Ariaria,,,7.340881,5.113892,South East
4,61180169,Abia,Aba North,Ariaria,74 okwu avenue off faul k rd,,7.358399,5.118717,South East
...,...,...,...,...,...,...,...,...,...
22363,84345661,Zamfara,Kaura Namoda,Sarkin Mafara/Sarkin Barya,Opposite GSS Kaura Along Shinkafi Road,,6.591670,12.610040,North West
22364,18215453,Zamfara,Kaura Namoda,Sarkin Mafara/Sarkin Barya,Tudun Wada Bakin Kasuwa Kaura,,6.591150,12.589690,North West
22365,33362459,Zamfara,Kaura Namoda,Sarkin Mafara/Sarkin Barya,I.O Street Sabuwar Kaura Area,,6.589840,12.584850,North West
22366,12895283,Zamfara,Maradun,Faru/Magami,Sabon Fegi Faru,,6.287590,12.750880,North West


In [122]:
identifiers_df['start_date'] = identifiers_df['start_date'].apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))

In [123]:
identifiers_df.head()

Unnamed: 0,facility_uid,facility_code,state_unique_id,registration_no,facility_name,alternate_name,start_date,ownership,ownership_type,facility_level,facility_level_option,days_of_operation,hours_of_operation
0,48319756,01/01/1/1/2/0001,,,Aba Holy Wounds Hospital,,2003-10-05 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
1,26456575,01/01/1/1/2/0025,,,Euna Hospital,,2008-12-03 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
2,73938255,01/01/1/1/2/0030,,,Goodness & Mercy hospital,,2009-02-06 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
3,45969224,01/01/1/1/2/0041,,,Ivory Specialist Hospital Maternity,,1992-02-04 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
4,61180169,01/01/1/1/2/0054,,,Nnadozie Hospital/ Maternity,,1984-08-08 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",


### Saving to csv

In [125]:
contacts_df.to_csv('contacts_t.csv', index=False)
identifiers_df.to_csv('identifiers_t.csv', index=False)
locations_df.to_csv('locations_t.csv', index=False)
pages_df.to_csv('pages_t.csv', index=False)
personnel_df.to_csv('personnel_t.csv', index=False)
services_df.to_csv('services_t.csv', index=False)
status_df.to_csv('status_t.csv', index=False)