# Create sectorial data

This file aggregates the data of a sector in authority level, and saves it as pickle file.

**Import Libraries**

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import json
import pandas as pd
import urllib
from tqdm.notebook import tqdm
import networkx as nx
from kando import kando_client

**Global Variables**

In [None]:
RESAMPLE = '15min'
WINDOW = 12
THRESHOLD = 0.05
NODE = 1377
SECTOR = 'Main Collector*'

**Functions**

In [None]:
def connect():
    """
    connect to Kando API with json file
    :return: client
    """

    with open('../key.json') as f:
        api_login = json.load(f)

    url = "https://kando-staging.herokuapp.com"
    client = kando_client.client(url, api_login['key'], api_login['secret'])
    return client

In [None]:
def _parser(node, graph):
    """
    :param node: point ID
    :param graph: networkx.DiGraph()
    :return: recursively build graph from end node
    """

    if len(node['children']) == 0:
        graph.add_node(node['point_id'], name=node['point']['name'])
        return
    for child in node['children']:
        graph.add_edges_from([(node['point_id'], child['point_id'])],
                             weight=child['parent_distance'])
        _parser(child, graph)

In [None]:
def get_graph(point_id):
    """
    :param point_id: point ID
    :return: nx.DiGraph() after using  _parser(g, G) to recursively build graph from the point ID
    """

    g = client.network_graph(point_id)
    G = nx.DiGraph()
    _parser(g, G)
    return G.reverse()

In [None]:
def create_sectorial_dataframe(sites_list, start_date=2020):
    """
    :param sites_list: list of points ID
    :param start_date: start day of data
    :return: pandas DataFrame 
    """

    for i, site in tqdm(enumerate(sites_list)):
        print(f'getting info from {site}')
        site_dic = client.get_all(point_id=site, start=start_date)            
        if len(site_dic['samplings'])>0:
            print(f'creating a dataframe')
            sector_tmp = get_data_for_sectorial_motif_detection(site_dic)
            print(f'adding datafram to the sectorial dataframe')
            if i==0:
                sector = sector_tmp
            else:
                sector = pd.concat([sector, sector_tmp])
        else:
            print(f'No data about site {site}')
    sector.reset_index(drop=True, inplace=True)
    return sector

In [None]:
def get_data_for_sectorial_motif_detection(site_dic):
    """
    :param sites_list: list of points ID
    :return: pandas DataFrame 
    """

    df = pd.DataFrame.from_dict(site_dic['samplings'], orient='index')[[
        'DateTime', 'PH', 'EC', 'ORP', 'TEMPERATURE'
    ]]
    df['date'] = df['DateTime']
    df['DateTime'] = pd.to_datetime(df['DateTime'], unit='s')
    df = df.set_index('DateTime')

    impute_nulls_with_time_interpolation(df, df.columns , '5min')
    df['date'] = pd.to_datetime(df['date'], unit='s')
    df['weekday'] = df.date.apply(lambda x: x.weekday())
    df['month'] = df.date.apply(lambda x: x.month)
    df['hour'] = df.date.apply(lambda x: x.hour)
    
    values = [
        site_dic['point_id'], site_dic['point']['pipe_info']['channel_shape'],
        site_dic['point']['pipe_info']['diameter'],
        site_dic['point']['group']['water_authority']['id'],
        site_dic['point']['group']['sector']['id']
    ]
    df[['point_id', 'channel_shape', 'diameter', 'water_authority',
        'sector']] = values

    return (df)

**Connect to API**

In [None]:
client = connect()

**Create DF of chosen sector in chosen water authority**

In [None]:
G = get_graph(NODE)

In [None]:
gichon_nodes = nx.get_node_attributes(G, 'name')
gichon_nodes

In [None]:
mifal = {k:v for k,v in gichon_nodes.items() if client.get_data(k)['point']['group']['sector']['name'] == SECTOR}    

In [None]:
sector = list(mifal.keys())
sector

In [None]:
df = create_sectorial_dataframe(sector)
df

In [None]:
df.to_pickle(f"./gichon_{SECTOR}.pkl")