In [1]:
import pandas as pd
import numpy as np
import string
import json
import uuid

In [2]:
## Static Config
## Static Config
locations = [
    {
        "column": "Woreda",
        "name": "level1"
    },
    {
        "column": "Kebele",
        "name": "level2"
    }
]

latlong = ["latitude", "longitude"]

## Marker Detail
marker = {
    "name" : "Site Name",
    "color": [
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": "Functional",
            "and": None,
            "and_value": None,
            "color": "green"
        },
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": "Non functional",
            "and": None,
            "and_value": None,
            "color": "red"
        },
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": "No data",
            "and": None,
            "and_value": None,
            "color": "grey"
        },
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": None,
            "and": None,
            "and_value": None,
            "color": "grey"
        },
    ],
    "detail": [
        {
            "name": "Source Type",
            "column": "Water Source Type",
            "action": "select",
            "type": "string",
            "value": None,
            "and": None,
            "and_value": None,
        },
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": None,
            "and": None,
            "and_value": None,
        },
    ]
}

main = {
    'name':'Non-Functional Facility',
    'column': 'Site Name',
    'select': {
        'column': "Functionality Status",
        'value': [
            'Non functional',
            'No data'
        ]
    },
    'indicators': [{
        'name': 'Number of Users',
        'column': 'Number of Users'
    },{
        'name': 'Water Source Type',
        'column': 'Water Source Type'
    },{
        'name': 'Energy Source',
        'column': 'Souce of Energy'
    }]
}

In [3]:
df = pd.read_csv('eth_wp_data.csv')
if 'Identifier' not in list(df):
    df['Identifier'] = [str(uuid.uuid4()) for _ in range(len(df.index))]
    df = df[['Identifier'] + list(df)[0:-1]]

In [4]:
def getEstimatedUser(x:int):
    try:
        if int(x) > 100:
            return 'more than 100'
        if int(x) > 49:
            return '50-100'
        if int(x) > 24:
            return '25-50'
        return 'less than 25'
    except:
        pass
    return 'no data'
df['Number of Users'] = df['Estimated Number of Users'].apply(lambda x: getEstimatedUser(x))
df['Number of Users'].unique()

array(['more than 100', '50-100', 'less than 25', '25-50', 'no data'],
      dtype=object)

In [5]:
# Remove duplicated values
df = df[~df.duplicated(subset=['Identifier'], keep="last")].reset_index()
df = df.drop(columns=['index'])

In [6]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [7]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num
df['Yield'] = 0

In [8]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
df[list(df_str)] = df_str

In [9]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [10]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

marker_config = {}
latlong_config = {}
for key in configs:
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})
    if(configs[key].lower() == marker['name'].lower()):
        marker_config = { "name" : key}
        
### Locations
loc_config = {}
for loc in locations:
    column_index = ""
    for key in configs:
        if loc['column'].lower() == configs[key].lower():
            column_index = key
    loc_config.update({loc['name'].lower(): column_index})

#### Marker Color
marker_color = [];
for mk in marker['color']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_color.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value'],
        "color": mk['color']
    })
marker_config['color'] = marker_color


#### Marker Detail
marker_detail = [];
for mk in marker['detail']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_detail.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value']
    })
marker_config['detail'] = marker_detail
    
## Main Table
main_config = {'name':main['name']}
main_selector = {}
indicators = []
for key in configs:
    if configs[key] == main['column']:
        main_config.update({'key':key})
    if configs[key] == main['select']['column']:
        main_selector.update({'key':key,'value':main['select']['value']})
    for i in main['indicators']:
        if configs[key] == i['column']:
            indicators.append({'name':i['name'],'key':key})
main_config.update({'indicators':indicators,'select':main_selector})

configs['marker'] = marker_config
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['main'] = main_config

In [11]:
### Replace Dataset Columns
df.columns = index

In [12]:
configs

{'A': 'Identifier',
 'B': 'Woreda',
 'C': 'Kebele',
 'D': 'Village',
 'E': 'Site Name',
 'F': 'Latitude',
 'G': 'Longitude',
 'H': 'Water Source Type_1',
 'I': 'Water Source Type',
 'J': 'Functionality Status',
 'K': 'Souce of Energy',
 'L': 'Estimated Number of Users',
 'M': 'Hand Pump Type',
 'N': 'Organisation that installed the Water Pump',
 'O': 'Year Commisioned',
 'P': 'Reason for Inventory',
 'Q': 'Functional Taps In The Scheme',
 'R': 'Number of Non FunctionalTaps In The Scheme',
 'S': 'Depth',
 'T': 'Yield',
 'U': 'Yield.1',
 'V': 'Number of Users',
 'marker': {'name': 'E',
  'color': [{'name': 'Functionality Status',
    'column': 'J',
    'action': 'select',
    'type': 'string',
    'value': 'Functional',
    'and': None,
    'and_value': None,
    'color': 'green'},
   {'name': 'Functionality Status',
    'column': 'J',
    'action': 'select',
    'type': 'string',
    'value': 'Non functional',
    'and': None,
    'and_value': None,
    'color': 'red'},
   {'name': 'Fun

In [13]:
data = list(df.T.to_dict().values())
data[:2]

[{'A': '0a701d7b-538d-44ae-8e9c-9f79bdf3d5f9',
  'B': 'Shashemene',
  'C': 'Wetera Shegule',
  'D': 'Ardano',
  'E': 'Ardano',
  'F': 7.212806,
  'G': 38.7258,
  'H': 'Hand dug Well fitted with pump or windlass',
  'I': 'Hand dug Well',
  'J': 'Functional',
  'K': 'Manual operation',
  'L': '180',
  'M': 'Afridev',
  'N': '',
  'O': 2000,
  'P': '',
  'Q': 0,
  'R': 0,
  'S': 0,
  'T': 0,
  'U': 0,
  'V': 'more than 100'},
 {'A': 'd2a84811-43ac-4ad0-b35f-93400ce51ec4',
  'B': 'Shashemene\n',
  'C': 'Hagugeta Keni',
  'D': 'Qali',
  'E': 'Qali',
  'F': 7.250937,
  'G': 38.62908,
  'H': 'Hand dug Well fitted with pump or windlass',
  'I': 'Hand dug Well',
  'J': 'Functional',
  'K': 'Manual operation',
  'L': '312',
  'M': 'Afridev',
  'N': 'CDA',
  'O': 2009,
  'P': '',
  'Q': 0,
  'R': 0,
  'S': 0,
  'T': 0,
  'U': 0,
  'V': 'more than 100'}]

In [14]:
### to json
df.to_json('../resources/js/data/eth_wp_data.json', orient="records", indent=2)

with open('../resources/js/data/eth_wp_config.json', 'w') as outfile:
    json.dump(configs, outfile)