Clean up and export mWater filters download data

In [3]:
import pandas as pd

## Config

In [4]:
in_file = "./data/mWater - GHA - Piped Systems.csv"

## Load Data

In [5]:
df = pd.read_csv(in_file)
df.shape

  df = pd.read_csv(in_file)


(11285, 75)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11285 entries, 0 to 11284
Data columns (total 75 columns):
 #   Column                                                              Non-Null Count  Dtype  
---  ------                                                              --------------  -----  
 0   Name                                                                11222 non-null  object 
 1   Description                                                         4697 non-null   object 
 2   Type                                                                11285 non-null  object 
 3   GPS Location (latitude)                                             11285 non-null  float64
 4   GPS Location (longitude)                                            11285 non-null  float64
 5   Administrative region                                               11285 non-null  object 
 6   Location Accuracy                                                   9633 non-null   float64
 7   Location Alti

## Clean Data

### Split admin location data

In [8]:
df[["DISTRICT", "REGION", "COUNTRY"]] = df['Administrative region'].str.strip().str.split(",", n=2, expand=True)

In [9]:
df.head(3)

Unnamed: 0,Name,Description,Type,GPS Location (latitude),GPS Location (longitude),Administrative region,Location Accuracy,Location Altitude,Altitude Accuracy,Unique ID,...,Added by user,Date added,Last modified by user,Date last modified,Site Private,Previous mWater IDs,Pending Approvals,DISTRICT,REGION,COUNTRY
0,Washing Bay,,Piped into yard/plot,4.988517,-2.593267,"Jomoro, Western, Ghana",,,,1094101,...,ftzomayi,2017-10-11T22:24:11.744Z,admin,2023-10-25T21:12:01.386Z,False,,False,Jomoro,Western,Ghana
1,Kennedy Koudgzi,,Piped into dwelling,5.42837,-1.632521,"Wassa East, Western, Ghana",6.000906,,3.0,646847434,...,j.nkrumah,2024-03-21T07:31:29.541Z,snagyekum1,2024-07-19T13:42:46.447Z,False,,True,Wassa East,Western,Ghana
2,Mary Alidu,,Piped into dwelling,5.428823,-1.632139,"Wassa East, Western, Ghana",6.000906,,3.0,666130433,...,j.nkrumah,2024-03-28T12:32:02.072Z,snagyekum1,2024-07-19T13:43:31.989Z,False,,True,Wassa East,Western,Ghana


### Add Source

In [10]:
df["SOURCE"] = "Public-mWater"

### Clean up columns as needed

In [11]:
df["Description"] = df["Description"].fillna("")
df["Name"] = df["Name"].fillna("")

In [12]:
df.head(3)

Unnamed: 0,Name,Description,Type,GPS Location (latitude),GPS Location (longitude),Administrative region,Location Accuracy,Location Altitude,Altitude Accuracy,Unique ID,...,Date added,Last modified by user,Date last modified,Site Private,Previous mWater IDs,Pending Approvals,DISTRICT,REGION,COUNTRY,SOURCE
0,Washing Bay,,Piped into yard/plot,4.988517,-2.593267,"Jomoro, Western, Ghana",,,,1094101,...,2017-10-11T22:24:11.744Z,admin,2023-10-25T21:12:01.386Z,False,,False,Jomoro,Western,Ghana,Public-mWater
1,Kennedy Koudgzi,,Piped into dwelling,5.42837,-1.632521,"Wassa East, Western, Ghana",6.000906,,3.0,646847434,...,2024-03-21T07:31:29.541Z,snagyekum1,2024-07-19T13:42:46.447Z,False,,True,Wassa East,Western,Ghana,Public-mWater
2,Mary Alidu,,Piped into dwelling,5.428823,-1.632139,"Wassa East, Western, Ghana",6.000906,,3.0,666130433,...,2024-03-28T12:32:02.072Z,snagyekum1,2024-07-19T13:43:31.989Z,False,,True,Wassa East,Western,Ghana,Public-mWater


### Drop unneeded cols

In [13]:
keep_cols = [
    'Name', 'Description', 'Type',
    'DISTRICT', 'REGION', 'COUNTRY',
    'GPS Location (latitude)', 'GPS Location (longitude)',
    'Location Accuracy', 'Location Altitude', 'Altitude Accuracy',
    'Managed By', 'Location type',
    'Implementing organization', 'Funder or donor', 'Installer or driller',
    'Date of installation', 'Date added', 'Date last modified',
    'Drilling method', 'Pump/lifting device',
    'Supply for groundwater system', 'Supply for piped system', 'Treatment works', 'Depth (m)',
    'Photos',
    'SOURCE',
]

In [14]:
df = df[keep_cols].copy()
df.shape

(11285, 27)

In [15]:
df.head(3)

Unnamed: 0,Name,Description,Type,DISTRICT,REGION,COUNTRY,GPS Location (latitude),GPS Location (longitude),Location Accuracy,Location Altitude,...,Date added,Date last modified,Drilling method,Pump/lifting device,Supply for groundwater system,Supply for piped system,Treatment works,Depth (m),Photos,SOURCE
0,Washing Bay,,Piped into yard/plot,Jomoro,Western,Ghana,4.988517,-2.593267,,,...,2017-10-11T22:24:11.744Z,2023-10-25T21:12:01.386Z,,,,,,,,Public-mWater
1,Kennedy Koudgzi,,Piped into dwelling,Wassa East,Western,Ghana,5.42837,-1.632521,6.000906,,...,2024-03-21T07:31:29.541Z,2024-07-19T13:42:46.447Z,,,,Borehole or tubewell,,,https://api.mwater.co/v3/images/c8ab0d6aea9445...,Public-mWater
2,Mary Alidu,,Piped into dwelling,Wassa East,Western,Ghana,5.428823,-1.632139,6.000906,,...,2024-03-28T12:32:02.072Z,2024-07-19T13:43:31.989Z,,,,Borehole or tubewell,,,https://api.mwater.co/v3/images/63e676fc780941...,Public-mWater


### Rename Columns

In [16]:
rename_dict = {
    "Name": "Water System",
    "GPS Location (latitude)": "LATITUDE",
    "GPS Location (longitude)": "LONGITUDE",
}

In [17]:
df = df.rename(columns=rename_dict)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11285 entries, 0 to 11284
Data columns (total 27 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Water System                   11285 non-null  object 
 1   Description                    11285 non-null  object 
 2   Type                           11285 non-null  object 
 3   DISTRICT                       11285 non-null  object 
 4   REGION                         11285 non-null  object 
 5   COUNTRY                        11285 non-null  object 
 6   LATITUDE                       11285 non-null  float64
 7   LONGITUDE                      11285 non-null  float64
 8   Location Accuracy              9633 non-null   float64
 9   Location Altitude              2798 non-null   float64
 10  Altitude Accuracy              1248 non-null   float64
 11  Managed By                     11285 non-null  object 
 12  Location type                  5655 non-null  

## Export

In [18]:
df.head(3)

Unnamed: 0,Water System,Description,Type,DISTRICT,REGION,COUNTRY,LATITUDE,LONGITUDE,Location Accuracy,Location Altitude,...,Date added,Date last modified,Drilling method,Pump/lifting device,Supply for groundwater system,Supply for piped system,Treatment works,Depth (m),Photos,SOURCE
0,Washing Bay,,Piped into yard/plot,Jomoro,Western,Ghana,4.988517,-2.593267,,,...,2017-10-11T22:24:11.744Z,2023-10-25T21:12:01.386Z,,,,,,,,Public-mWater
1,Kennedy Koudgzi,,Piped into dwelling,Wassa East,Western,Ghana,5.42837,-1.632521,6.000906,,...,2024-03-21T07:31:29.541Z,2024-07-19T13:42:46.447Z,,,,Borehole or tubewell,,,https://api.mwater.co/v3/images/c8ab0d6aea9445...,Public-mWater
2,Mary Alidu,,Piped into dwelling,Wassa East,Western,Ghana,5.428823,-1.632139,6.000906,,...,2024-03-28T12:32:02.072Z,2024-07-19T13:43:31.989Z,,,,Borehole or tubewell,,,https://api.mwater.co/v3/images/63e676fc780941...,Public-mWater


In [19]:
df.to_excel("./export/AFPW-PipedWaterSystems-mWater.xlsx", index=False)