Clean up and export mWater filters download data

In [1]:
import pandas as pd

## Config

In [2]:
# in_file = "/Users/datascience/Desktop/mwater-ashanti,eastern,volta-pipedwater.csv"
in_file = "/Users/datascience/Desktop/mwater-brongahafo-pipedwater.csv"

## Load Data

In [3]:
df = pd.read_csv(in_file)
df.shape

(928, 75)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 928 entries, 0 to 927
Data columns (total 75 columns):
 #   Column                                                              Non-Null Count  Dtype  
---  ------                                                              --------------  -----  
 0   Name                                                                926 non-null    object 
 1   Description                                                         792 non-null    object 
 2   Type                                                                928 non-null    object 
 3   GPS Location (latitude)                                             928 non-null    float64
 4   GPS Location (longitude)                                            928 non-null    float64
 5   Administrative region                                               928 non-null    object 
 6   Location Accuracy                                                   741 non-null    float64
 7   Location Altitude

## Clean Data

### Split admin location data

In [6]:
df[["DISTRICT", "REGION", "COUNTRY"]] = df['Administrative region'].str.strip().str.split(",", n=2, expand=True)

In [7]:
df.head(3)

Unnamed: 0,Name,Description,Type,GPS Location (latitude),GPS Location (longitude),Administrative region,Location Accuracy,Location Altitude,Altitude Accuracy,Unique ID,...,Added by user,Date added,Last modified by user,Date last modified,Site Private,Previous mWater IDs,Pending Approvals,DISTRICT,REGION,COUNTRY
0,Kenyasi No 2 small town water system SP 12,Behind Asutifi Rural Bank,Piped into public tap / standpipe / basin,6.981086,-2.388023,"Asutifi North, Brong Ahafo, Ghana",3.74,,,436276103,...,oopoku_wvgh_enum,2024-03-28T10:32:26.048Z,,,False,,True,Asutifi North,Brong Ahafo,Ghana
1,Kenyasi No 2 small town water system SP 11,Kwaadaso opposite Adum public toilet,Piped into public tap / standpipe / basin,6.979728,-2.389147,"Asutifi North, Brong Ahafo, Ghana",4.8,,,436276110,...,oopoku_wvgh_enum,2024-03-28T10:50:19.556Z,,,False,,True,Asutifi North,Brong Ahafo,Ghana
2,Kenyasi No 2 Adum Florita Newman water supply,Adum opposite Church of Christ,Piped into public tap / standpipe / basin,6.979303,-2.38968,"Asutifi North, Brong Ahafo, Ghana",2.0,,,436276134,...,oopoku_wvgh_enum,2024-03-28T11:05:30.312Z,,,False,,True,Asutifi North,Brong Ahafo,Ghana


### Add Source

In [8]:
df["SOURCE"] = "Public-mWater"

### Clean up columns as needed

In [9]:
df["Description"] = df["Description"].fillna("")
df["Name"] = df["Name"].fillna("")

In [10]:
df.head(3)

Unnamed: 0,Name,Description,Type,GPS Location (latitude),GPS Location (longitude),Administrative region,Location Accuracy,Location Altitude,Altitude Accuracy,Unique ID,...,Date added,Last modified by user,Date last modified,Site Private,Previous mWater IDs,Pending Approvals,DISTRICT,REGION,COUNTRY,SOURCE
0,Kenyasi No 2 small town water system SP 12,Behind Asutifi Rural Bank,Piped into public tap / standpipe / basin,6.981086,-2.388023,"Asutifi North, Brong Ahafo, Ghana",3.74,,,436276103,...,2024-03-28T10:32:26.048Z,,,False,,True,Asutifi North,Brong Ahafo,Ghana,Public-mWater
1,Kenyasi No 2 small town water system SP 11,Kwaadaso opposite Adum public toilet,Piped into public tap / standpipe / basin,6.979728,-2.389147,"Asutifi North, Brong Ahafo, Ghana",4.8,,,436276110,...,2024-03-28T10:50:19.556Z,,,False,,True,Asutifi North,Brong Ahafo,Ghana,Public-mWater
2,Kenyasi No 2 Adum Florita Newman water supply,Adum opposite Church of Christ,Piped into public tap / standpipe / basin,6.979303,-2.38968,"Asutifi North, Brong Ahafo, Ghana",2.0,,,436276134,...,2024-03-28T11:05:30.312Z,,,False,,True,Asutifi North,Brong Ahafo,Ghana,Public-mWater


### Drop unneeded cols

In [11]:
keep_cols = [
    'Name', 'Description', 'Type',
    'DISTRICT', 'REGION', 'COUNTRY',
    'GPS Location (latitude)', 'GPS Location (longitude)',
    'Location Accuracy', 'Location Altitude', 'Altitude Accuracy',
    'Managed By', 'Location type',
    'Implementing organization', 'Funder or donor', 'Installer or driller',
    'Date of installation', 'Date added', 'Date last modified',
    'Drilling method', 'Pump/lifting device',
    'Supply for groundwater system', 'Supply for piped system', 'Treatment works', 'Depth (m)',
    'Photos',
    'SOURCE',
]

In [12]:
df = df[keep_cols].copy()
df.shape

(928, 27)

In [13]:
df.head(3)

Unnamed: 0,Name,Description,Type,DISTRICT,REGION,COUNTRY,GPS Location (latitude),GPS Location (longitude),Location Accuracy,Location Altitude,...,Date added,Date last modified,Drilling method,Pump/lifting device,Supply for groundwater system,Supply for piped system,Treatment works,Depth (m),Photos,SOURCE
0,Kenyasi No 2 small town water system SP 12,Behind Asutifi Rural Bank,Piped into public tap / standpipe / basin,Asutifi North,Brong Ahafo,Ghana,6.981086,-2.388023,3.74,,...,2024-03-28T10:32:26.048Z,,,,,,,,https://api.mwater.co/v3/images/e5f30a694fec4d...,Public-mWater
1,Kenyasi No 2 small town water system SP 11,Kwaadaso opposite Adum public toilet,Piped into public tap / standpipe / basin,Asutifi North,Brong Ahafo,Ghana,6.979728,-2.389147,4.8,,...,2024-03-28T10:50:19.556Z,,,,,,,,https://api.mwater.co/v3/images/db6390eb9ec94c...,Public-mWater
2,Kenyasi No 2 Adum Florita Newman water supply,Adum opposite Church of Christ,Piped into public tap / standpipe / basin,Asutifi North,Brong Ahafo,Ghana,6.979303,-2.38968,2.0,,...,2024-03-28T11:05:30.312Z,,,,,,,,https://api.mwater.co/v3/images/515803bb2f3642...,Public-mWater


### Rename Columns

In [14]:
rename_dict = {
    "Name": "Water System",
    "GPS Location (latitude)": "LATITUDE",
    "GPS Location (longitude)": "LONGITUDE",
}

In [15]:
df = df.rename(columns=rename_dict)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 928 entries, 0 to 927
Data columns (total 27 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Water System                   928 non-null    object 
 1   Description                    928 non-null    object 
 2   Type                           928 non-null    object 
 3   DISTRICT                       928 non-null    object 
 4   REGION                         928 non-null    object 
 5   COUNTRY                        928 non-null    object 
 6   LATITUDE                       928 non-null    float64
 7   LONGITUDE                      928 non-null    float64
 8   Location Accuracy              741 non-null    float64
 9   Location Altitude              361 non-null    float64
 10  Altitude Accuracy              78 non-null     float64
 11  Managed By                     928 non-null    object 
 12  Location type                  258 non-null    obj

## Export

In [16]:
df.head(3)

Unnamed: 0,Water System,Description,Type,DISTRICT,REGION,COUNTRY,LATITUDE,LONGITUDE,Location Accuracy,Location Altitude,...,Date added,Date last modified,Drilling method,Pump/lifting device,Supply for groundwater system,Supply for piped system,Treatment works,Depth (m),Photos,SOURCE
0,Kenyasi No 2 small town water system SP 12,Behind Asutifi Rural Bank,Piped into public tap / standpipe / basin,Asutifi North,Brong Ahafo,Ghana,6.981086,-2.388023,3.74,,...,2024-03-28T10:32:26.048Z,,,,,,,,https://api.mwater.co/v3/images/e5f30a694fec4d...,Public-mWater
1,Kenyasi No 2 small town water system SP 11,Kwaadaso opposite Adum public toilet,Piped into public tap / standpipe / basin,Asutifi North,Brong Ahafo,Ghana,6.979728,-2.389147,4.8,,...,2024-03-28T10:50:19.556Z,,,,,,,,https://api.mwater.co/v3/images/db6390eb9ec94c...,Public-mWater
2,Kenyasi No 2 Adum Florita Newman water supply,Adum opposite Church of Christ,Piped into public tap / standpipe / basin,Asutifi North,Brong Ahafo,Ghana,6.979303,-2.38968,2.0,,...,2024-03-28T11:05:30.312Z,,,,,,,,https://api.mwater.co/v3/images/515803bb2f3642...,Public-mWater


In [17]:
df.to_excel("/Users/datascience/Desktop/AFPW-mWater-PipedWaterSystems-BrongAhafo.xlsx", index=False)