In [1]:
import pandas as pd
import numpy as np
import string
import json
import uuid

In [2]:
## Static Config
## Static Config
locations = [
    {
        "column": "Woreda",
        "name": "level1"
    },
    {
        "column": "Kebele",
        "name": "level2"
    }
]

latlong = ["latitude", "longitude"]

## Marker Detail
marker = {
    "name" : "Site Name",
    "color": [
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": "Functional",
            "and": None,
            "and_value": None,
            "color": "green"
        },
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": "Non functional",
            "and": None,
            "and_value": None,
            "color": "red"
        },
        {
            "name": "Functionality Status",
            "column": "Is the water point currently functional?",
            "action": "select",
            "type": "string",
            "value": "No data",
            "and": None,
            "and_value": None,
            "color": "grey"
        },
        {
            "name": "Functionality Status",
            "column": "Is the water point currently functional?",
            "action": "select",
            "type": "string",
            "value": None,
            "and": None,
            "and_value": None,
            "color": "grey"
        },
    ],
    "detail": [
        {
            "name": "Source Type",
            "column": "Water Source Type",
            "action": "select",
            "type": "string",
            "value": None,
            "and": None,
            "and_value": None,
        },
        {
            "name": "Functionality Status",
            "column": "Functionality Status",
            "action": "select",
            "type": "string",
            "value": None,
            "and": None,
            "and_value": None,
        },
    ]
}

main = {
    'name':'Non-Functional Facility',
    'column': 'Site Name',
    'select': {
        'column': "Functionality Status",
        'value': 'Non functional'
    },
    'indicators': [{
        'name': 'Number of Users',
        'column': 'Number of Users'
    },{
        'name': 'Water Source Type',
        'column': 'Water Source Type'
    },{
        'name': 'Energy Source',
        'column': 'Souce of Energy'
    }]
}

In [3]:
df = pd.read_csv('eth_wp_data.csv')
df['Identifier'] = [str(uuid.uuid4()) for _ in range(len(df.index))]

In [4]:
df

Unnamed: 0,Woreda,Kebele,Village,Site Name,Latitude,Longitude,Water Source Type_1,Water Source Type,Functionality Status,Souce of Energy,...,Hand Pump Type,Organisation that installed the Water Pump,Year Commisioned,Reason for Inventory,Functional Taps In The Scheme,Number of Non FunctionalTaps In The Scheme,Depth,Yield,Yield.1,Identifier
0,Shashemene,Wetera Shegule,Ardano,Ardano,7.212806,38.72580,Hand dug Well fitted with pump or windlass,Hand dug Well,Functional,Manual operation,...,Afridev,,2000.0,,,,,0.28,0.28,93037f42-b45f-4bf1-b576-ab4087318c30
1,Shashemene\n,Hagugeta Keni,Qali,Qali,7.250937,38.62908,Hand dug Well fitted with pump or windlass,Hand dug Well,Functional,Manual operation,...,Afridev,CDA,2009.0,,,,,0.12,0.12,428e5177-bc03-4632-b357-387901f906f1
2,Arsi Negele,Kerero,A/shifaa,A/shifaa,7.487828,38.73458,Protected Spring,Protected Spring,Non Functional,Gravity,...,,,2000.0,Inspection,,,,0.50,0.50,ab9237b6-476a-4d88-b6b9-e73def680e32
3,Shashemene,Hursa Sinbo,aredano,Ardano,7.122700,38.72439,Hand dug Well fitted with pump or windlass,Hand dug Well,Non Functional,Manual operation,...,Afridev,CDA,2009.0,,,,,0.12,0.12,ccdc048b-e812-4c4b-a132-f148bbe26cc2
4,Shashemene,Hursa Sinbo,shifa,shifa,7.126625,38.73907,Hand dug Well fitted with pump or windlass,Hand dug Well,Non Functional,Manual operation,...,Afridev,CDA,2009.0,,,,,0.12,0.12,842fdfbe-e9bf-4962-a859-fbd71fe82fe6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,Arsi Negele,Gude Dura,,,7.196388,38.78040,,Hand dug well,Functional,Manual operation,...,,,,National WASH inventory,,,,,,33960414-3128-44a6-8193-c78f184ea630
278,Arsi Negele,Gude Dura,,,7.196338,38.77942,,Hand dug well,Functional,Manual operation,...,,,,National WASH inventory,,,,,,dbfc5fef-7441-4a70-9908-c088ef1e029b
279,Arsi Negele,Gude Dura,,,7.192750,38.78396,,Hand dug well,Functional,Manual operation,...,,,,National WASH inventory,,,,,,291f0cc2-5331-4966-b78d-508e85078731
280,Arsi Negele,Gude Dura,,,7.196477,38.78578,,Hand dug well,Functional,Manual operation,...,,,,National WASH inventory,,,,,,6aec24d6-1596-4f63-8b5c-1aa7adbb57ae


In [5]:
def getEstimatedUser(x:int):
    try:
        if int(x) > 100:
            return 'more than 100'
        if int(x) > 49:
            return '50-100'
        if int(x) > 24:
            return '25-50'
        return 'less than 25'
    except:
        pass
    return 'no data'
df['Number of Users'] = df['Estimated Number of Users'].apply(lambda x: getEstimatedUser(x))
df['Number of Users'].unique()

array(['more than 100', '50-100', 'less than 25', '25-50', 'no data'],
      dtype=object)

In [6]:
# Remove duplicated values
df = df[~df.duplicated(subset=['Identifier'], keep="last")].reset_index()
df = df.drop(columns=['index'])

In [7]:
## Delete Column with no name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [8]:
### Fill Empty Integer Values
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
df_num = df.select_dtypes(include=numerics).drop(columns=['Latitude', 'Longitude'])
df_num = df_num.fillna(0.0).astype(np.int32)
df[list(df_num)] = df_num
df['Yield'] = 0

In [9]:
### Replace Empty String Values
df_str = df.select_dtypes(include=['object']).fillna("")
df[list(df_str)] = df_str

In [10]:
## Generate Settings 
chars =list(string.ascii_uppercase)
chars_col = chars + [x+y for x in chars for y in chars]
keyname = lambda x,y: {a:y[b] if (a != 'data_point_id') else 'data_point_id' for b, a in enumerate(x)}

columns_length = len(list(df))
column_names = list(df)

In [11]:
### Generate configs
index = chars_col[:columns_length]
configs = keyname(index, column_names)

marker_config = {}
latlong_config = {}
for key in configs:
    if(configs[key].lower() in latlong):
        latlong_config.update({configs[key].lower(): key})
    if(configs[key].lower() == marker['name'].lower()):
        marker_config = { "name" : key}
        
### Locations
loc_config = {}
for loc in locations:
    column_index = ""
    for key in configs:
        if loc['column'].lower() == configs[key].lower():
            column_index = key
    loc_config.update({loc['name'].lower(): column_index})

#### Marker Color
marker_color = [];
for mk in marker['color']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_color.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value'],
        "color": mk['color']
    })
marker_config['color'] = marker_color


#### Marker Detail
marker_detail = [];
for mk in marker['detail']:
    column = None
    andColumn = None
    for key in configs:
        if configs[key].lower() == mk['column'].lower():
            column = key
        if mk['and'] != None and configs[key].lower() == mk['and'].lower():
            andColumn = key
    marker_detail.append({
        "name": mk['name'],
        "column": column,
        "action": mk['action'],
        "type": mk['type'],
        "value": mk['value'],
        "and": andColumn,
        "and_value": mk['and_value']
    })
marker_config['detail'] = marker_detail
    
## Main Table
main_config = {'name':main['name']}
main_selector = {}
indicators = []
for key in configs:
    if configs[key] == main['column']:
        main_config.update({'key':key})
    if configs[key] == main['select']['column']:
        main_selector.update({'key':key,'value':main['select']['value']})
    for i in main['indicators']:
        if configs[key] == i['column']:
            indicators.append({'name':i['name'],'key':key})
main_config.update({'indicators':indicators,'select':main_selector})

configs['marker'] = marker_config
configs['locations'] = loc_config
configs['latlong'] = latlong_config
configs['main'] = main_config

In [12]:
### Replace Dataset Columns
df.columns = index

In [13]:
configs

{'A': 'Woreda',
 'B': 'Kebele',
 'C': 'Village',
 'D': 'Site Name',
 'E': 'Latitude',
 'F': 'Longitude',
 'G': 'Water Source Type_1',
 'H': 'Water Source Type',
 'I': 'Functionality Status',
 'J': 'Souce of Energy',
 'K': 'Estimated Number of Users',
 'L': 'Hand Pump Type',
 'M': 'Organisation that installed the Water Pump',
 'N': 'Year Commisioned',
 'O': 'Reason for Inventory',
 'P': 'Functional Taps In The Scheme',
 'Q': 'Number of Non FunctionalTaps In The Scheme',
 'R': 'Depth',
 'S': 'Yield',
 'T': 'Yield.1',
 'U': 'Identifier',
 'V': 'Number of Users',
 'marker': {'name': 'D',
  'color': [{'name': 'Functionality Status',
    'column': 'I',
    'action': 'select',
    'type': 'string',
    'value': 'Functional',
    'and': None,
    'and_value': None,
    'color': 'green'},
   {'name': 'Functionality Status',
    'column': 'I',
    'action': 'select',
    'type': 'string',
    'value': 'Non functional',
    'and': None,
    'and_value': None,
    'color': 'red'},
   {'name': 'Fun

In [14]:
data = list(df.T.to_dict().values())
data[:2]

[{'A': 'Shashemene',
  'B': 'Wetera Shegule',
  'C': 'Ardano',
  'D': 'Ardano',
  'E': 7.212806,
  'F': 38.7258,
  'G': 'Hand dug Well fitted with pump or windlass',
  'H': 'Hand dug Well',
  'I': 'Functional',
  'J': 'Manual operation',
  'K': '180',
  'L': 'Afridev',
  'M': '',
  'N': 2000,
  'O': '',
  'P': 0,
  'Q': 0,
  'R': 0,
  'S': 0,
  'T': 0,
  'U': UUID('93037f42-b45f-4bf1-b576-ab4087318c30'),
  'V': 'more than 100'},
 {'A': 'Shashemene\n',
  'B': 'Hagugeta Keni',
  'C': 'Qali',
  'D': 'Qali',
  'E': 7.2509369999999995,
  'F': 38.62908,
  'G': 'Hand dug Well fitted with pump or windlass',
  'H': 'Hand dug Well',
  'I': 'Functional',
  'J': 'Manual operation',
  'K': '312',
  'L': 'Afridev',
  'M': 'CDA',
  'N': 2009,
  'O': '',
  'P': 0,
  'Q': 0,
  'R': 0,
  'S': 0,
  'T': 0,
  'U': UUID('428e5177-bc03-4632-b357-387901f906f1'),
  'V': 'more than 100'}]

In [15]:
### to json
df.to_json('../resources/js/data/nep_wp_data.json', orient="records", indent=2)

with open('../resources/js/data/nep_wp_config.json', 'w') as outfile:
    json.dump(configs, outfile)

OverflowError: Unsupported UTF-8 sequence length when encoding string