# Data Sources

- WHO

- ISS

- PLM

In [57]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fiona
import geopandas as gpd
import folium
import os
from folium.plugins import MarkerCluster
from math import radians, sin, cos, acos

In [2]:
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)

# Import data

In [68]:
dataDir = r'C:\\Users\\DUANYUEYUN\\Documents\\ArcGIS\\Projects\\GRID3\\Sub-Saharan_health_facilities.gdb'
df = gpd.read_file(dataDir, driver='FileGDB', layer='HDX_WHO_sub_saharan_health_facilities')

# Re-project to WGS84
df['geometry'] = df['geometry'].to_crs(epsg=4326)
df_WHO = df[df['Country']=='Mozambique']
df_WHO.reset_index(inplace=True)

In [4]:
dataDir = r'C:\\Users\\DUANYUEYUN\\Documents\\ArcGIS\\Projects\\GRID3\\Sub-Saharan_health_facilities.gdb'
df = gpd.read_file(dataDir, driver='FileGDB', layer='ISS_sub_saharan')
# Re-project to WGS84
df['geometry'] = df['geometry'].to_crs(epsg=4326)
df_ISS = df[df['countries']=='MOZAMBIQUE']
df_ISS.reset_index(inplace=True)
df_ISS.drop(columns=['F27', 'name_of_facility_visited_other',
       'F28', 'F29', 'F_index', 'F_parent_table_name', 'F_parent_index',
       'F_tags', 'F_notes', 'F_version', 'F_duration', 'F_submitted_by',
       'F_xform_id'], inplace=True)

In [5]:
country = 'Mozambique'
dataDir = r"C:\Users\DUANYUEYUN\Documents\ArcGIS\Projects\GRID3\Healthsites"
filename = country + '-node.shp'
path = os.path.join(dataDir, country, filename)
df_HS = gpd.read_file(path)
# Re-project to WGS84
df_HS['geometry'] = df_HS['geometry'].to_crs(epsg=4326)
df_HS['country'] = country
df_HS.drop(columns=['healthcare',  'operator', 
       'speciality', 'operator_ty', 'contact_num', 'operational',
       'opening_hou', 'beds', 'staff_docto', 'staff_nurse', 'health_amen',
       'dispensing', 'wheelchair', 'emergency', 'insurance', 'water_sourc',
       'electricity', 'is_in_healt', 'url', 'addr_housen', 'addr_street',
       'addr_postco', 'addr_city'], inplace=True)

In [6]:
df_PLM = pd.read_excel(io=r"C:\Users\DUANYUEYUN\Documents\GRID3\Health facilities\PLM Data\PLM_Masterfile_v1_May_2020.xlsx",
                       sheet_name = 2, usecols = 'A:D,F:H,J:K')

In [50]:
# Common variables

clean_types = ['Centro de Saúde', 'Posto de Saúde', 'Hospital Rurais', 
               'Hospital Provincial', 'Hospital Distrital', 'Hospital Geral', 
               'Hospital Central', 'Hospital Especializado', 'Other']

saveDir = r"C:\Users\DUANYUEYUN\Documents\GRID3\Health facilities\Cleaned data\MOZ"

# Dataset overview

1. Number of data points

In [7]:
print("Dataset: WHO")
print("Number of data points:", df_WHO.shape[0])

print("Dataset: ISS")
print("Number of data points:", df_ISS.shape[0])

print("Dataset: HealthSites")
print("Number of data points:", df_HS.shape[0])

print("Dataset: Project last mile")
print("Number of data points:", df_PLM.shape[0])

Dataset: WHO
Number of data points: 1579
Dataset: ISS
Number of data points: 1430
Dataset: HealthSites
Number of data points: 836
Dataset: Project last mile
Number of data points: 1712


2. Number of missing values in name and type columns

In [8]:
print("WHO:")
df_WHO[['Facility_n', 'Facility_t']].isna().sum()

WHO:


Facility_n    0
Facility_t    0
dtype: int64

In [9]:
print("ISS:")
df_ISS[['name_of_facility_visited', 'type_of_facility_visited']].isna().sum()

ISS:


name_of_facility_visited    0
type_of_facility_visited    0
dtype: int64

In [10]:
print("Healthsites:")
df_HS[['name', 'amenity']].isna().sum()

Healthsites:


name       28
amenity     1
dtype: int64

In [11]:
print("PLM:")
df_PLM[['UNIT', 'TYPE OF US']].isna().sum()

PLM:


UNIT          0
TYPE OF US    0
dtype: int64

3. Unique facility type and counts

In [12]:
print("Dataset: WHO")
print("Facility types and the corresponding counts:")
print(df_WHO['Facility_t'].value_counts())

Dataset: WHO
Facility types and the corresponding counts:
Centro de Saúde Rural II    982
Posto de Saúde              262
Centro de Saúde Rural I     130
Centro de Saúde Urbano B     56
Centro de Saúde Urbano C     49
Centro de Saúde Urbano A     39
Hospital Rural               29
Hospital Distrital           16
Hospital Provincial           8
Hospital Geral                5
Hospital Central              3
Name: Facility_t, dtype: int64


In [13]:
print("Dataset: ISS")
print("Facility types and the corresponding counts:")
print(df_ISS['type_of_facility_visited'].value_counts())

Dataset: ISS
Facility types and the corresponding counts:
HEALTH_FACILITY     1205
HEALTH_POST           95
DISTRICT_HOSP         59
RURAL_HOSPITAL        23
GENERAL_HOSP          18
CENTRAL_HOSPITAL      16
others                 7
SPIRITUAL_HEALER       3
PROVINCIAL_HOSP        2
PRIVATE_FACILITY       2
Name: type_of_facility_visited, dtype: int64


In [14]:
print("Dataset: HealthSites")
print("Facility types and the corresponding counts:")
print(df_HS['amenity'].value_counts())

Dataset: HealthSites
Facility types and the corresponding counts:
clinic      699
pharmacy     72
doctors      47
hospital     15
dentist       2
Name: amenity, dtype: int64


In [15]:
print("Dataset: PLM")
print("Facility types and the corresponding counts:")
print(df_PLM['TYPE OF US'].value_counts())

Dataset: PLM
Facility types and the corresponding counts:
CS    1511
PS     129
HD      24
HR      23
HM       9
HP       7
HG       5
HC       2
HE       2
Name: TYPE OF US, dtype: int64


# WHO

## Name Cleaning

Format of facility names in WHO dataset: name followed by facility type. Extract the name by eliminating the facility type in `Facility_n`.

In [69]:
WHO_cols = ['Admin1', 'Facility_n', 'Facility_t']
df_WHO[WHO_cols].sample(5)

Unnamed: 0,Admin1,Facility_n,Facility_t
1176,Sofala,Nsituculo Centro de Saúde Rural II,Centro de Saúde Rural II
575,Maputo Provincia,Matalane Centro de Saúde Rural II,Centro de Saúde Rural II
252,Inhambane,Belane Centro de Saúde Rural II,Centro de Saúde Rural II
184,Gaza,Mamonho Centro de Saúde Rural II,Centro de Saúde Rural II
1281,Tete,Lifidzi Centro de Saúde Rural II,Centro de Saúde Rural II


In [70]:
# Name cleaning
short_names = []
for idx, row in df_WHO.iterrows():
    short_name = row['Facility_n'].replace(row['Facility_t'], '')
    short_names.append(short_name.strip())
df_WHO['name_short'] = short_names
df_WHO['name_short'] = df_WHO['name_short'].str.replace("^de", '', regex=True) \
.str.strip()

In [71]:
df_WHO[WHO_cols+['name_short']].sample(5)

Unnamed: 0,Admin1,Facility_n,Facility_t,name_short
622,Maputo Provincia,Zona Verde Posto de Saúde,Posto de Saúde,Zona Verde
272,Inhambane,Cruzamento Posto de Saúde,Posto de Saúde,Cruzamento
1305,Tete,M'Saua Centro de Saúde Rural II,Centro de Saúde Rural II,M'Saua
364,Inhambane,Teles Posto de Saúde,Posto de Saúde,Teles
281,Inhambane,Homoine Centro de Saúde Rural I,Centro de Saúde Rural I,Homoine


## Type Cleaning

In [72]:
# dictionary mapping WHO type to clean types
who_type_dict = {'Centro de Saúde Rural I': 'Centro de Saúde', 'Posto de Saúde':'Posto de Saúde',
            'Centro de Saúde Rural II':'Centro de Saúde', 'Centro de Saúde Urbano C':'Centro de Saúde',
            'Hospital Rural':'Hospital Rurais', 'Centro de Saúde Urbano B':'Centro de Saúde',
            'Hospital Provincial':'Hospital Provincial', 'Hospital Distrital':'Hospital Distrital',
            'Centro de Saúde Urbano A':'Centro de Saúde', 'Hospital Geral':'Hospital Geral', 
            'Hospital Central':'Hospital Central'}
df_WHO['type_cleaned'] = [who_type_dict[facility_t] for facility_t in df_WHO['Facility_t']]

In [73]:
print("results:")
df_WHO['type_cleaned'].value_counts()

results:


Centro de Saúde        1256
Posto de Saúde          262
Hospital Rurais          29
Hospital Distrital       16
Hospital Provincial       8
Hospital Geral            5
Hospital Central          3
Name: type_cleaned, dtype: int64

## Removing Duplicates

In [74]:
def distance(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    lat1 = radians(lat1)
    lat2 = radians(lat2)
    lon1 = radians(lon1)
    lon2 = radians(lon2)
    dist = 6371.01 * acos(sin(lat1)*sin(lat2) + cos(lat1)*cos(lat2)*cos(lon1 - lon2))
    return dist

In [75]:
# Facilities in the same admin1 region and with the same name 
counts = df_WHO.groupby(['Admin1', 'Facility_n']).agg(count=('index','count'))\
.sort_values(by='count',ascending = False)
counts = counts[counts['count']>1]
counts.reset_index(inplace=True)

# Calculate distance for each pair
distances = []
for idx, row in counts.iterrows():
    
    test_df = df_WHO[(df_WHO['Admin1']==row['Admin1']) & \
                     (df_WHO['Facility_n']==row['Facility_n'])]
    
    points = []
    for idx2, row2 in test_df.iterrows():
        lat = row2['Lat']
        lon = row2['Long']
        points.append((lat, lon))
    
    origin = points[0]
    destination = points[1]
    dist = distance(origin, destination)
    distances.append(dist)

In [76]:
print("Minimum distance between facilities in the same admin1 region and with the same name: ")
print(round(min(distances), 3), 'km')

Minimum distance between facilities in the same admin1 region and with the same name: 
17.309 km


In [77]:
df_WHO.head()

Unnamed: 0,index,Country,Admin1,Facility_n,Facility_t,Ownership,Lat,Long,LL_source,geometry,name_short,type_cleaned
0,49849,Mozambique,Cabo Delgado,Ancuabe Centro de Saúde Rural I,Centro de Saúde Rural I,,-12.9669,39.8572,GPS,POINT (39.85720 -12.96690),Ancuabe,Centro de Saúde
1,49850,Mozambique,Cabo Delgado,Balama Centro de Saúde Rural I,Centro de Saúde Rural I,,-13.3483,38.5669,GPS,POINT (38.56690 -13.34830),Balama,Centro de Saúde
2,49851,Mozambique,Cabo Delgado,Bilibiza Centro de Saúde Rural I,Centro de Saúde Rural I,,-12.5639,40.2853,GPS,POINT (40.28530 -12.56390),Bilibiza,Centro de Saúde
3,49852,Mozambique,Cabo Delgado,Bilibiza Posto de Saúde,Posto de Saúde,,-13.5795,39.7356,GPS,POINT (39.73560 -13.57950),Bilibiza,Posto de Saúde
4,49853,Mozambique,Cabo Delgado,Cagembe Centro de Saúde Rural II,Centro de Saúde Rural II,,-12.38,40.2642,GPS,POINT (40.26420 -12.38000),Cagembe,Centro de Saúde


## Change Column Names

In [66]:
cols = ['id', 'admin1', 'admin2', 'name_full', 'name_short', 'type_original', 'type_cleaned', 
        'latitude', 'longitude', 'source']

In [78]:
df_WHO.drop(columns='Country', inplace=True)
df_WHO.rename(columns={'index':'id', 'Admin1':'admin1',
                       'Facility_n':'name_full', 'Facility_t':'type_original'}, 
              inplace=True)

In [79]:
df_WHO['longitude'] = df_WHO['geometry'].x
df_WHO['latitude'] = df_WHO['geometry'].y

df_WHO['source'] = 'WHO'
df_WHO['admin2'] = None

In [80]:
df_WHO = df_WHO[cols]
df_WHO.head()

Unnamed: 0,id,admin1,admin2,name_full,name_short,type_original,type_cleaned,latitude,longitude,source
0,49849,Cabo Delgado,,Ancuabe Centro de Saúde Rural I,Ancuabe,Centro de Saúde Rural I,Centro de Saúde,-12.9669,39.8572,WHO
1,49850,Cabo Delgado,,Balama Centro de Saúde Rural I,Balama,Centro de Saúde Rural I,Centro de Saúde,-13.3483,38.5669,WHO
2,49851,Cabo Delgado,,Bilibiza Centro de Saúde Rural I,Bilibiza,Centro de Saúde Rural I,Centro de Saúde,-12.5639,40.2853,WHO
3,49852,Cabo Delgado,,Bilibiza Posto de Saúde,Bilibiza,Posto de Saúde,Posto de Saúde,-13.5795,39.7356,WHO
4,49853,Cabo Delgado,,Cagembe Centro de Saúde Rural II,Cagembe,Centro de Saúde Rural II,Centro de Saúde,-12.38,40.2642,WHO


In [81]:
#df_WHO.to_csv(saveDir + '\WHO_cleaned_0729.csv')

# ISS

## Name Cleaning

Facility name in ISS dataset are formatted as prefix (abbreviation of facility type) followed by name. Split `name_of_facility_visited` using the first whitespace to obtain prefix and name.

In [41]:
ISS_cols = ['states','districts', 'name_of_facility_visited', 'type_of_facility_visited']
df_ISS[ISS_cols].sample(5)

Unnamed: 0,states,districts,name_of_facility_visited,type_of_facility_visited
651,Zambezia,Mulevala,CS JAJO,HEALTH_FACILITY
1315,Zambezia,Derre,CS GUERRISSA,HEALTH_FACILITY
1174,Zambezia,Alto Molocue,CS NIVAVA,HEALTH_FACILITY
825,Niassa,Mecanhelas,CS SALE,HEALTH_FACILITY
1017,Zambezia,Pebane,CS MULIGODE,HEALTH_POST


In [42]:
# obtain the prefix and facility name
ISS_split = df_ISS['name_of_facility_visited'].str.split(expand=True,n=1)
df_ISS['prefix'] = ISS_split[0]
df_ISS['name_short'] = ISS_split[1]

In [43]:
# Obtain unique values of prefix
df_ISS['prefix'].unique()

array(['CS', 'HD', 'PS', 'HC', 'HR', 'HG', 'Cs', 'CS.'], dtype=object)

In [44]:
# replace prefix Cs, CS. with CS for consistency
df_ISS['prefix'] = df_ISS['prefix'].str.replace('CS.|Cs', 'CS')

In [47]:
# Capitalize 1st letter 
df_ISS['name_short'] = df_ISS['name_short'].str.title()
df_ISS['states'] = df_ISS['states'].str.title()
df_ISS['districts'] = df_ISS['districts'].str.title()

In [48]:
df_ISS[ISS_cols+['prefix', 'name_short']].sample(5)

Unnamed: 0,states,districts,name_of_facility_visited,type_of_facility_visited,prefix,name_short
53,Zambezia,Mopeia,CS SANGALAZA,RURAL_HOSPITAL,CS,Sangalaza
1303,Nampula,Malema,CS NATALEIA,HEALTH_FACILITY,CS,Nataleia
828,Zambezia,Chinde,CS MADAL,HEALTH_FACILITY,CS,Madal
494,Manica,Tambara,CS NHACAFULA,HEALTH_FACILITY,CS,Nhacafula
699,Zambezia,Pebane,CS 7 DE ABRIL,HEALTH_FACILITY,CS,7 De Abril


## Type Cleaning

In [121]:
iss_type_dict = {'CENTRAL_HOSPITAL':'Hospital Central', 'others':'Other', 
                 'DISTRICT_HOSP':'Hospital Distrital', 'HEALTH_FACILITY':'Centro de Saúde',
                 'HEALTH_POST':'Posto de Saúde', 'SPIRITUAL_HEALER':'Other', 
                 'PRIVATE_FACILITY':'Other', 'RURAL_HOSPITAL':'Hospital Rurais', 
                 'GENERAL_HOSP':'Hospital Geral', 'PROVINCIAL_HOSP':'Hospital Provincial'}
df_ISS['type_cleaned'] = [iss_type_dict[facility_t] for facility_t in df_ISS['type_of_facility_visited']]

In [122]:
print("results:")
df_ISS["type_cleaned"].value_counts()

results:


Centro de Saúde        1205
Posto de Saúde           95
Hospital Distrital       59
Hospital Rurais          23
Hospital Geral           18
Hospital Central         16
Other                    12
Hospital Provincial       2
Name: type_cleaned, dtype: int64

## Remove Duplicates

Use `date_of_visit` as the time when data is collected.

In [123]:
print("Date range: ")
df_ISS['date'] = pd.to_datetime(df_ISS['date_of_visit'])
print("start:", df_ISS['date'].min().date(), 
      "\nend:", df_ISS['date'].max().date())

Date range: 
start: 2017-12-04 
end: 2020-04-08


In [124]:
df_ISS.sort_values(by=['states', 'districts', 
                       'name_of_facility_visited', 'date'],
                  inplace=True)

In [125]:
df_ISS2 = df_ISS[~df_ISS.duplicated(
    subset=['states', 'districts', 'name_of_facility_visited'],
    keep = 'last')]

In [126]:
print("Number of data points after removing duplicates:", df_ISS2.shape[0])

Number of data points after removing duplicates: 382


In [127]:
counts = df_ISS2.groupby('name_short').agg(count=('index','count'))\
.sort_values('count', ascending=False)

counts = counts[counts['count']>1]
counts.reset_index(inplace=True)

In [128]:
# Calculate distance for each pair
distances = []
for idx, row in counts.iterrows():
    
    test_df = df_ISS2[df_ISS2['name_short']==row['name_short']]
    
    points = []
    for idx2, row2 in test_df.iterrows():
        lat = row2['geometry'].x
        lon = row2['geometry'].y
        points.append((lat, lon))
    
    origin = points[0]
    destination = points[1]
    dist = distance(origin, destination)
    distances.append(dist)

In [129]:
counts['distance'] = distances

# suspect duplicates
suspect_dups = counts[counts['distance']<0.1]

suspect_dups

Unnamed: 0,name_short,count,distance
4,Morrumbala,2,0.029245
8,Cuamba,2,0.045346
11,Gurue,2,0.082024


In [130]:
for name in suspect_dups['name_short']:
    print(name)
    print(df_ISS2[df_ISS2['name_short']==name]['type_cleaned'].unique())

Morrumbala
['Centro de Saúde' 'Hospital Distrital']
Cuamba
['Centro de Saúde' 'Hospital Distrital']
Gurue
['Centro de Saúde' 'Hospital Distrital']


## Change Column Names

In [135]:
df_ISS2['longitude'] = df_ISS2['geometry'].x
df_ISS2['latitude'] = df_ISS2['geometry'].y

In [140]:
df_ISS2.rename(columns={'index':'id', 'states':'admin1',
                        'districts':'admin2',
                       'name_of_facility_visited':'name_full', 
                        'type_of_facility_visited':'type_original'}, 
              inplace=True)
df_ISS2['source'] = 'ISS'

In [141]:
df_ISS = df_ISS2[cols]

In [142]:
df_ISS.head()

Unnamed: 0,id,admin1,admin2,name_full,name_short,type_original,type_cleaned,latitude,longitude,source
982,63644,Cabo Delgado,Meluco,CS MELUCO,Meluco,HEALTH_FACILITY,Centro de Saúde,-12.540123,39.640971,ISS
1382,64044,Cabo Delgado,Metuge,CS MIEZE,Mieze,HEALTH_FACILITY,Centro de Saúde,-13.10724,40.453321,ISS
80,62742,Cabo Delgado,Mocimboa Da Praia,CS NANDUADUA,Nanduadua,HEALTH_FACILITY,Centro de Saúde,-11.349539,40.351586,ISS
601,63263,Cabo Delgado,Montepuez,CS MONTEPUEZ,Montepuez,HEALTH_FACILITY,Centro de Saúde,-13.128291,38.99872,ISS
1381,64043,Cabo Delgado,Namuno,CS NAMUNO,Namuno,HEALTH_FACILITY,Centro de Saúde,-13.618362,38.816279,ISS


In [143]:
#df_ISS.to_csv(saveDir + '\ISS_cleaned_0729.csv')

# Project Last Mile

## Name Cleaning

Remove prefix in the name column to get short name.

In [144]:
df_PLM.head()

Unnamed: 0,PLM ID,PROVINCE,District Layer (Admin2),DISTRICT,US CODE,UNIT,TYPE OF US,Longitude,Latitude
0,HF1,Cabo Delgado,Ancuabe,Ancuabe,1020206,CS Ancuabe,CS,39.8572,-12.9669
1,HF2,Cabo Delgado,Ancuabe,Ancuabe,1020207,CS Mariri,CS,39.5931,-13.0917
2,HF3,Cabo Delgado,Ancuabe,Ancuabe,1020210,CS Mesa,CS,39.5522,-13.0319
3,HF4,Cabo Delgado,Ancuabe,Ancuabe,1020209,CS Metoro,CS,39.8742,-13.105
4,HF5,Cabo Delgado,Ancuabe,Ancuabe,1020211,CS Minhewene,CS,39.4675,-12.9939


In [146]:
prefixes = ['Posto Medico','Centro Formação Saude','Posto Médico','Posto Medico Militar - ',
            'Clinica', 'Posto saude', 'H Militar', 'Hospital Distrital', 'Hospital Militar',
           '^CS', '^Cs', '^HD', '^HR', '^HP', '^PS', '^HG', '^HE', '^HC', '^HM']
df_PLM['name_short'] = df_PLM['UNIT'].str.replace('|'.join(prefixes), '', regex=True) \
.str.strip() \
.str.replace('^de|^da', '', regex=True) \
.str.strip() \
.str.title()

In [149]:
df_PLM.sample(5)

Unnamed: 0,PLM ID,PROVINCE,District Layer (Admin2),DISTRICT,US CODE,UNIT,TYPE OF US,Longitude,Latitude,name_short
40,HF41,Cabo Delgado,Cidade De Pemba,Cidade De Pemba,1020113,CS Paquite,CS,40.4867,-12.9625,Paquite
872,HF873,Nampula,Murrupula,Murrupula,1031606,CS Murrupula,CS,38.676396,-15.456229,Murrupula
1284,HF1286,Sofala,Muanza,Muanza,1325832,CS Tipo III Galinha,CS,35.039608,-19.229029,Tipo Iii Galinha
1379,HF1381,Tete,Macanga,Macanga,1050710,CS Mutchoncho,CS,33.9259,-14.9416,Mutchoncho
98,HF99,Cabo Delgado,Muidumbe,Muidumbe,1021206,CS Muambula,CS,39.836519,-11.83496,Muambula


In [150]:
plm_type_dict = {'CS':'Centro de Saúde', 'HD':'Hospital Distrital', 
                 'HR':'Hospital Rurais', 'HP':'Hospital Provincial', 
                 'PS':'Posto de Saúde', 'HG':'Hospital Geral', 
                 'HE':'Hospital Especializado', 'HC':'Hospital Central', 
                 'HM':'Hospital Especializado'}
df_PLM['type_cleaned'] = [plm_type_dict[facility_t] for facility_t in df_PLM['TYPE OF US']]

In [151]:
print("results:")
df_PLM['type_cleaned'].value_counts()

results:


Centro de Saúde           1511
Posto de Saúde             129
Hospital Distrital          24
Hospital Rurais             23
Hospital Especializado      11
Hospital Provincial          7
Hospital Geral               5
Hospital Central             2
Name: type_cleaned, dtype: int64

## Change Column Names

In [152]:
df_PLM.rename(columns={'PLM ID':'id', 'PROVINCE ':'admin1', 'DISTRICT ':'admin2',
                       'US CODE    ': 'US code', 'UNIT':'name_full', 'TYPE OF US':'type_original',
                      'Longitude':'longitude', 'Latitude':'latitude'},
             inplace=True)
df_PLM['source'] = 'PLM'

In [153]:
df_PLM = df_PLM[cols]

In [154]:
df_PLM.sample(5)

Unnamed: 0,id,admin1,admin2,name_full,name_short,type_original,type_cleaned,latitude,longitude,source
74,HF75,Cabo Delgado,Montepuez,CS Mapupulo,Mapupulo,CS,Centro de Saúde,-13.2233,38.8808,PLM
602,HF603,Maputo Provincia,Cidade Da Matola,CS Boquisso,Boquisso,CS,Centro de Saúde,-25.73904,32.539166,PLM
519,HF520,Manica,Sussundenga,CS Sussundenga,Sussundenga,CS,Centro de Saúde,-19.409628,33.293803,PLM
524,HF525,Manica,Tambara,CS Nhacafula,Nhacafula,CS,Centro de Saúde,-16.870467,34.076305,PLM
621,HF622,Maputo Provincia,Cidade Da Matola,PS Cadeia Feminina,Cadeia Feminina,PS,Posto de Saúde,-25.871666,32.538915,PLM


In [155]:
#df_PLM.to_csv(saveDir + '\PLM_cleaned_0729.csv')

In [156]:
#df = pd.concat([df_WHO, df_ISS, df_PLM], axis=0)
#df.to_csv(saveDir + '\MOZ_raw_0729.csv')