In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import math

In [2]:
dtype_contacts = {'facility_uid': str,'phone_number':str,'alternate_number':str, 'email_address':str, 'website':str}
dtype_identifiers = {'facility_uid': int,'facility_code':str, 'state_unique_id':str,'registration_no':str,'facility_name':str,
                    'alternate_name':str,'start_date':str,'ownership':str,'ownership_type':str,'facility_level':str,'facility_level_option':str,
                    'days_of_operation':str,'hours_of_operation':str}
dtype_locations ={'facility_uid': int,'state':str, 'lga':str,'ward':str,'physical_location':str,'postal_address':str,
                 'longitude':str, 'latitude':str}
dtype_pages = {'state':str, 'lga':str,'ward':str,'facility_uid': int,'facility_code':str,'facility_name':str,'facility_level':str,'ownership':str }
dtype_personnel = {
    "facility_uid": str,
                "num_of_docs": np.float64, "num_of_pharms": np.float64,
                "num_of_midwifes": np.float64, "num_of_nurses": np.float64,
                "num_of_nurse_midwife": np.float64, "num_of_pharm_technicians": np.float64,
                "num_of_dentists": np.float64, "num_of_health_attendants": np.float64,
                "num_of_env_health_officers": np.float64, "num_of_him_officers":np.float64,
                "num_of_community_health_officer": np.float64,
                "num_of_jun_community_extension_worker": np.float64,
                "num_of_community_extension_workers": np.float64,
                "num_of_dental_technicians": np.float64,
                "num_of_lab_technicians": np.float64,
                "num_of_lab_scientists": np.float64
}

dtype_services = {
    "facility_uid": str,
                "outpatient_service": str, "ambulance_services": str,
                "mortuary_services": str, "onsite_imaging": str,
                "onsite_pharmarcy": str, "onsite_laboratory": str,
                "tot_num_beds": int, "special_service": str,
                "dental_service": str, "pediatrics_service": str,
                "gynecology_service": str, "surgical_service": str,
                "medical_service": str, "inpatient_service": str
}

dtype_status = {"facility_uid": str,
            "operation_status": str,
            "registration_status": str,
            "license_status":str}

### Import the some files and perform some transformation

In [3]:
iden_df = pd.read_csv('identifiers.csv')
iden_df.facility_uid = iden_df.facility_uid.fillna(0)
iden_df.start_date = iden_df.start_date.dropna()
iden_df.to_csv('identifiers2.csv', index=False)

per_df = pd.read_csv('personnel.csv')
per_df.facility_uid = per_df.facility_uid.fillna(0)
per_df.to_csv('personnel2.csv', index=False)

serv_df = pd.read_csv('services.csv')
serv_df.tot_num_beds = serv_df.tot_num_beds.fillna(0)
serv_df.to_csv('services2.csv', index=False)

# loc_df = pd.read_csv('locations.csv')
# loc_df['longitude']= loc_df['longitude'].apply(lambda x: x.split(',')[0])
# loc_df['latitude']= loc_df['latitude'].apply(lambda x: x.split(',')[0])

In [4]:
# loc_df['longitude']= loc_df['longitude'].apply(lambda x: np.float64(x))
# loc_df['latitude']= loc_df['latitude'].apply(lambda x: np.float64(x))

# loc_df.to_csv('locations2.csv')

## import all files

In [5]:
contacts_df = pd.read_csv('contacts.csv', usecols=dtype_contacts.keys(), dtype=dtype_contacts)
identifiers_df = pd.read_csv('identifiers2.csv', usecols=dtype_identifiers.keys(), dtype=dtype_identifiers)
locations_df = pd.read_csv('locations.csv', usecols=dtype_locations.keys(), dtype=dtype_locations)
pages_df = pd.read_csv('page_rows.csv', usecols=dtype_pages.keys(), dtype=dtype_pages)
personnel_df = pd.read_csv('personnel2.csv', usecols=dtype_personnel.keys(), dtype=dtype_personnel)
services_df = pd.read_csv('services2.csv', usecols=dtype_services.keys(), dtype=dtype_services)
status_df = pd.read_csv('status.csv', usecols=dtype_status.keys(), dtype=dtype_status)

## Adding some Transformation

In [6]:
personnel_df = personnel_df.fillna(0)
personnel_df = personnel_df.astype(int)

In [7]:
contacts_df.head(2)

Unnamed: 0,facility_uid,phone_number,alternate_number,email_address,website
0,48319756,0080-338-0170,,,
1,63964073,,,,


In [8]:
identifiers_df.head(2)

Unnamed: 0,facility_uid,facility_code,state_unique_id,registration_no,facility_name,alternate_name,start_date,ownership,ownership_type,facility_level,facility_level_option,days_of_operation,hours_of_operation
0,48319756,01/01/1/1/2/0001,,,Aba Holy Wounds Hospital,,2003-10-05,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
1,63964073,01/01/1/1/2/0012,,,Chidima Hospital,,,Private,,Primary,,,


In [9]:
locations_df.head(2)

Unnamed: 0,facility_uid,state,lga,ward,physical_location,postal_address,longitude,latitude
0,48319756,Abia,Aba North,Ariaria,,,7.34134,5.11445
1,63964073,Abia,Aba North,Ariaria,,,,


In [10]:
services_df.head(2)

Unnamed: 0,facility_uid,outpatient_service,ambulance_services,mortuary_services,onsite_imaging,onsite_pharmarcy,onsite_laboratory,tot_num_beds,special_service,dental_service,pediatrics_service,gynecology_service,surgical_service,medical_service,inpatient_service
0,48319756,Yes,Yes,,Yes,Yes,Yes,14,,,,,,,Yes
1,63964073,,No,,,,,0,,,,,,,


In [11]:
personnel_df.head(2)

Unnamed: 0,facility_uid,num_of_docs,num_of_pharms,num_of_midwifes,num_of_nurses,num_of_nurse_midwife,num_of_pharm_technicians,num_of_dentists,num_of_health_attendants,num_of_env_health_officers,num_of_him_officers,num_of_community_health_officer,num_of_jun_community_extension_worker,num_of_community_extension_workers,num_of_dental_technicians,num_of_lab_technicians,num_of_lab_scientists
0,48319756,3,0,1,2,2,1,0,0,0,0,2,1,1,0,1,1
1,63964073,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
pages_df.head(2)

Unnamed: 0,state,lga,ward,facility_uid,facility_code,facility_name,facility_level,ownership
0,Abia,Aba North,Ariaria,48319756,01/01/1/1/2/0001,Aba Holy Wounds Hospital,Secondary,Private
1,Abia,Aba North,Ariaria,63964073,01/01/1/1/2/0012,Chidima Hospital,Primary,Private


In [13]:
pages_df.shape

(41988, 8)

In [14]:
status_df.head(2)

Unnamed: 0,facility_uid,operation_status,registration_status,license_status
0,48319756,Operational,Registered,Licensed
1,63964073,Operational,Unknown,Unknown


#### Adding Region to the location dataframe -- Geo-political Zone

In [15]:
def get_region(state):
    region = ''
    if state in ["Benue", "FCT", "Kogi", "Kwara", "Nasarawa", "Niger", "Plateau"]:
        region =  "North Central"
    elif state in ["Adamawa", "Bauchi", "Borno", "Gombe", "Taraba", "Yobe"]:
        region =  "North East"
    elif state in ["Kaduna", "Katsina", "Kano", "Kebbi", "Sokoto", "Jigawa","Zamfara"]:
        region =  "North West"
    elif state in ["Abia", "Anambra", "Ebonyi", "Enugu", "Imo"]:
        region =  "South East"
    elif state in ["Akwa Ibom", "Bayelsa", "Cross River", "Delta", "Edo", "Rivers"]:
        region =  "South South"
    elif state in ["Ekiti", "Lagos", "Osun", "Ondo", "Ogun", "Oyo"]:
        region =  "South West"
    
    return region

In [16]:
locations_df['Region'] = locations_df['state'].apply(get_region)

In [28]:
locations_df.head()

Unnamed: 0,facility_uid,state,lga,ward,physical_location,postal_address,longitude,latitude,Region
0,48319756,Abia,Aba North,Ariaria,,,7.34134,5.11445,South East
1,63964073,Abia,Aba North,Ariaria,,,,,South East
2,26456575,Abia,Aba North,Ariaria,,,7.35312,5.12019,South East
3,73938255,Abia,Aba North,Ariaria,,,7.34552002,5.115905762,South East
4,40844348,Abia,Aba North,Ariaria,,,,,South East


In [18]:
identifiers_df['start_date'] = identifiers_df['start_date'].apply(lambda x: str(x).strip())

In [19]:
identifiers_df[identifiers_df['start_date'] == 'nan'].shape[0] # number with nan

5586

In [20]:
identifiers_df = identifiers_df[identifiers_df['start_date'] != 'nan']  # remove nan rows

In [21]:
identifiers_df['start_date'] = identifiers_df['start_date'].apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))

In [22]:
identifiers_df.head()

Unnamed: 0,facility_uid,facility_code,state_unique_id,registration_no,facility_name,alternate_name,start_date,ownership,ownership_type,facility_level,facility_level_option,days_of_operation,hours_of_operation
0,48319756,01/01/1/1/2/0001,,,Aba Holy Wounds Hospital,,2003-10-05 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
2,26456575,01/01/1/1/2/0025,,,Euna Hospital,,2008-12-03 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
3,73938255,01/01/1/1/2/0030,,,Goodness & Mercy hospital,,2009-02-06 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
5,45969224,01/01/1/1/2/0041,,,Ivory Specialist Hospital Maternity,,1992-02-04 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",
8,61180169,01/01/1/1/2/0054,,,Nnadozie Hospital/ Maternity,,1984-08-08 00:00:00,Private,For Profit,Secondary,,"Monday,Tuesday,Wednesday,Thursday,Friday,Satur...",


In [23]:
pages_df.columns

Index(['state', 'lga', 'ward', 'facility_uid', 'facility_code',
       'facility_name', 'facility_level', 'ownership'],
      dtype='object')

In [24]:

pages_df1 = pages_df[['facility_uid','state', 'lga', 'ward', 'facility_code',
       'facility_name', 'facility_level', 'ownership']]

In [25]:
pages_df = pages_df1

### Saving to csv

In [26]:
contacts_df.to_csv('contacts_t.csv', index=False)
identifiers_df.to_csv('identifiers_t.csv', index=False)
locations_df.to_csv('locations_t.csv', index=False)
pages_df.to_csv('pages_t.csv', index=False)
personnel_df.to_csv('personnel_t.csv', index=False)
services_df.to_csv('services_t.csv', index=False)
status_df.to_csv('status_t.csv', index=False)

In [27]:
pages_df.head()

Unnamed: 0,facility_uid,state,lga,ward,facility_code,facility_name,facility_level,ownership
0,48319756,Abia,Aba North,Ariaria,01/01/1/1/2/0001,Aba Holy Wounds Hospital,Secondary,Private
1,63964073,Abia,Aba North,Ariaria,01/01/1/1/2/0012,Chidima Hospital,Primary,Private
2,26456575,Abia,Aba North,Ariaria,01/01/1/1/2/0025,Euna Hospital,Secondary,Private
3,73938255,Abia,Aba North,Ariaria,01/01/1/1/2/0030,Goodness & Mercy hospital,Secondary,Private
4,40844348,Abia,Aba North,Ariaria,01/01/1/1/2/0031,Green Land Hospital,Primary,Private
