In [48]:
# this script is created by A. Biricz, last modified 03.01.2021.

## Arguments:
# --source_folder: where processed polygons data located
# --target_folder: save folder (same as source by default)

# Example for running:
# python3 .py --source_folder '/media/Data_storage/Mobilcell/DayPolygonData/' --target_folder '/media/Data_storage/Mobilcell/DayPolygonData/'

import numpy as np
import pandas as pd
from itertools import product
import os
from tqdm import tqdm

In [2]:
source = '/media/Data_storage/Mobilcell/DayPolygonData/'
#target = '/media/Data_storage/Mobilcell/DayEventData/

In [3]:
# already encoded daily data of polygons
files_pol_enc = np.array( sorted( [ i for i in 
                            os.listdir( source ) if 'encoded' in i ] ) )

In [4]:
# all unique ids of polygons for the whole year
tower_id_all_global = np.load( source+'unique-tower-id_all.npy' )[:,0] # 0th column contains the ids
tower_to_int_all = dict( zip( tower_id_all_global, np.arange( tower_id_all_global.shape[0] ) ) )

In [9]:
tower_id_all_global.shape

(43384,)

In [10]:
# This seems to be the global grid of rasters, same for every day!
start_x = 48262
end_x = 362968
start_y = 426468
end_y = 934214
num_x = int( ( end_x - start_x ) / 127 )
num_y = int( ( end_y - start_y ) / 127 )

# this raster encoding is universal then
raster_x = np.arange(start_x, end_x+127, 127, dtype=np.int32)
raster_y = np.arange(start_y, end_y+127, 127, dtype=np.int32)

# get coordinate vector
raster_coords = np.array( list(product( raster_x, raster_y )) )

In [35]:
pol_enc = np.load( source + files_pol_enc[2] )

sort_idx = np.argsort( pol_enc[:,0] )
pol_enc_tower = pol_enc[ sort_idx ]

pol_enc_diff_idx = np.where( np.diff( pol_enc_tower[:,0] ) )[0]+1
# insert first element (zero) ## otherwise left out!
pol_enc_idx = np.insert(pol_enc_diff_idx, 0, 0, axis=0)
# insert last element (size of array) ## otherwise left out!
pol_enc_idx = np.append( pol_enc_idx, pol_enc.shape[0] )

all_idx_tower = np.vstack( ( pol_enc_idx[:-1], pol_enc_idx[1:] ) ).T

In [36]:
pol_enc_tower[:10]

array([[  71892, 6379446],
       [  71892, 6395447],
       [  71892, 6395446],
       [  71892, 6395445],
       [  71892, 6395444],
       [  71892, 6395443],
       [  71892, 6395442],
       [  71892, 6395448],
       [  71892, 6395441],
       [  71892, 6395439]])

In [37]:
all_idx_tower[:3]

array([[   0, 3956],
       [3956, 6706],
       [6706, 8644]])

In [38]:
tower_id_all = []
tower_coords_all = []
for j in all_idx_tower:
    tower_id_all.append( pol_enc_tower[:,0][ j[0] ] )
    tower_coords_all.append( np.mean( raster_coords[ pol_enc_tower[:,1][ j[0]:j[1] ] ], axis=0 ) )
tower_id_all = np.array( tower_id_all )
tower_coords_all = np.array( tower_coords_all )

In [39]:
tower_id_all.shape, tower_coords_all.shape

((42785,), (42785, 2))

In [45]:
np.concatenate( (tower_id_all.reshape(-1,1), np.array([ tower_to_int_all[ k ] for k in tower_id_all ]).reshape(-1,1), 
                     tower_coords_all.astype(int) ), axis=1 )

array([[    71892,         0,    250856,    557281],
       [    72307,         1,    243355,    558407],
       [    72922,         2,    247247,    549713],
       ...,
       [553841622,     43381,    288496,    826877],
       [553841698,     43382,    286254,    826714],
       [553841790,     43383,    287470,    824406]])

In [46]:
pd.read_csv( source+'fixed_tower_locations.csv' )

Unnamed: 0,original_id,tower_id,mean_x,mean_y,std_x,std_y,perc_10_x,perc_10_y,perc_50_x,perc_50_y,perc_90_x,perc_90_y,perc_99_x,perc_99_y
0,71892,0,48797,429016,2,3,48796,429014,48796,429018,48800,429018,48803,429018
1,72307,1,48905,431421,0,0,48905,431421,48905,431421,48905,431421,48905,431421
2,72922,2,48906,432519,1,15,48905,432484,48908,432529,48908,432529,48908,432529
3,73325,3,49123,432576,0,22,49122,432563,49124,432563,49124,432607,49124,432642
4,140903,4,49247,432691,4,0,49247,432691,49247,432691,49247,432691,49262,432691
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43371,553841508,43379,358778,932530,3,43,358777,932496,358778,932519,358786,932642,358786,932649
43372,553841557,43380,358805,932809,5,92,358803,932653,358803,932889,358815,932889,358819,932889
43373,553841622,43381,358908,933641,1,346,358908,932889,358908,933779,358910,933915,358915,933915
43374,553841698,43382,358971,933976,4,73,358969,933915,358969,933915,358977,934064,358985,934064


In [60]:


# function that does the calculations for all days
def calc_tower_coords_day( source, files_pol_enc ):
    pol_enc = np.load( source + files_pol_enc )

    sort_idx = np.argsort( pol_enc[:,0] )
    pol_enc_tower = pol_enc[ sort_idx ]

    pol_enc_diff_idx = np.where( np.diff( pol_enc_tower[:,0] ) )[0]+1
    # insert first element (zero) ## otherwise left out!
    pol_enc_idx = np.insert(pol_enc_diff_idx, 0, 0, axis=0)
    # insert last element (size of array) ## otherwise left out!
    pol_enc_idx = np.append( pol_enc_idx, pol_enc.shape[0] )

    all_idx_tower = np.vstack( ( pol_enc_idx[:-1], pol_enc_idx[1:] ) ).T

    tower_id_all = []
    tower_coords_all = []
    for j in all_idx_tower:
        tower_id_all.append( pol_enc_tower[:,0][ j[0] ] )
        tower_coords_all.append( np.mean( raster_coords[ pol_enc_tower[:,1][ j[0]:j[1] ] ], axis=0 ) )
    tower_id_all = np.array( tower_id_all )
    tower_coords_all = np.array( tower_coords_all )

    return np.concatenate( (np.array([ tower_to_int_all[ k ] for k in tower_id_all ]).reshape(-1,1), 
                     tower_coords_all.astype(int) ), axis=1 )

# calling the function for the whole year
tower_coords_year = []
for s in tqdm( range( files_pol_enc.shape[0] )[:5] ):
    tower_coords_year.append( calc_tower_coords_day( source, files_pol_enc[s] ) )

100%|██████████| 5/5 [00:12<00:00,  2.53s/it]


In [61]:
# collecting, sorting results
all_tower_coords_year = np.concatenate( tower_coords_year )
sorting = np.argsort( all_tower_coords_year[:,0] )
all_tower_coords_year = all_tower_coords_year[sorting]

In [66]:
all_tower_coords_year[:100]

array([[     0, 250856, 557281],
       [     0, 250856, 557281],
       [     0, 250856, 557281],
       [     0, 250856, 557281],
       [     0, 250856, 557281],
       [     1, 243355, 558407],
       [     1, 243355, 558407],
       [     1, 243355, 558407],
       [     1, 243355, 558407],
       [     1, 243355, 558407],
       [     2, 247247, 549713],
       [     2, 247247, 549713],
       [     2, 247247, 549713],
       [     2, 247247, 549713],
       [     2, 247247, 549713],
       [     3, 248652, 551070],
       [     3, 248652, 551070],
       [     3, 248652, 551070],
       [     3, 248652, 551070],
       [     3, 248652, 551070],
       [     4, 273635, 796106],
       [     4, 273635, 796106],
       [     4, 273635, 796106],
       [     4, 273635, 796106],
       [     4, 273635, 796106],
       [     5, 270283, 645289],
       [     5, 270283, 645289],
       [     5, 270283, 645289],
       [     5, 270283, 645289],
       [     5, 270283, 645289],
       [  

In [63]:
# calculating fix coordinates (and do some analysis) for the towers for the whole year
all_tower_coords_year_diff_idx = np.where( np.diff( all_tower_coords_year[:,0] ) )[0]+1
all_tower_coords_year_diff_idx = np.insert(all_tower_coords_year_diff_idx, 0, 0, axis=0)
all_tower_coords_year_diff_idx = np.append( all_tower_coords_year_diff_idx, all_tower_coords_year.shape[0] )

all_tower_coords_year_idx = np.vstack( ( all_tower_coords_year_diff_idx[:-1], all_tower_coords_year_diff_idx[1:] ) ).T


In [68]:
all_tower_coords_year_idx

array([[     0,      5],
       [     5,     10],
       [    10,     15],
       ...,
       [213910, 213915],
       [213915, 213920],
       [213920, 213925]])

In [69]:
tower_data_year = []
for l, j in enumerate( all_tower_coords_year_idx ):
    mean_val = np.mean( all_tower_coords_year[ j[0]:j[1], 1: ], axis=0 )
    std_val = np.std( all_tower_coords_year[ j[0]:j[1], 1: ], axis=0 )
    perc_val_10 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 10, axis=0 )
    perc_val_50 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 50, axis=0 )
    perc_val_90 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 90, axis=0 )
    perc_val_99 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 99, axis=0 )
    tower_data_year.append( np.concatenate( ( [ all_tower_coords_year[ j[0], 0 ] ], 
                                              mean_val, std_val, 
                                              perc_val_10, perc_val_50, 
                                              perc_val_90, perc_val_99, 
                                            ) 
                                          ).astype(np.int64) )
tower_data_year = np.array( tower_data_year )

In [70]:
tower_data_year

array([[     0, 250856, 557281, ..., 557281, 250856, 557281],
       [     1, 243355, 558407, ..., 558407, 243355, 558407],
       [     2, 247247, 549713, ..., 549713, 247247, 549713],
       ...,
       [ 43381, 288496, 826877, ..., 826877, 288496, 826877],
       [ 43382, 286254, 826714, ..., 826714, 286254, 826714],
       [ 43383, 287470, 824406, ..., 824406, 287470, 824406]])

In [None]:
# collecting, sorting results
all_tower_coords_year = np.concatenate( tower_coords_year )
all_tower_coords_year = np.sort( all_tower_coords_year, axis=0 )

# calculating fix coordinates (and do some analysis) for the towers for the whole year
all_tower_coords_year_diff_idx = np.where( np.diff( all_tower_coords_year[:,0] ) )[0]+1
all_tower_coords_year_diff_idx = np.insert(all_tower_coords_year_diff_idx, 0, 0, axis=0)
all_tower_coords_year_diff_idx = np.append( all_tower_coords_year_diff_idx, all_tower_coords_year.shape[0] )

all_tower_coords_year_idx = np.vstack( ( all_tower_coords_year_diff_idx[:-1], all_tower_coords_year_diff_idx[1:] ) ).T

tower_data_year = []
for l, j in enumerate( all_tower_coords_year_idx ):
    mean_val = np.mean( all_tower_coords_year[ j[0]:j[1], 1: ], axis=0 )
    std_val = np.std( all_tower_coords_year[ j[0]:j[1], 1: ], axis=0 )
    perc_val_10 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 10, axis=0 )
    perc_val_50 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 50, axis=0 )
    perc_val_90 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 90, axis=0 )
    perc_val_99 = np.percentile( all_tower_coords_year[ j[0]:j[1], 1: ], 99, axis=0 )
    tower_data_year.append( np.concatenate( ( [ all_tower_coords_year[ j[0], 0 ] ], 
                                              mean_val, std_val, 
                                              perc_val_10, perc_val_50, 
                                              perc_val_90, perc_val_99, 
                                            ) 
                                          ).astype(np.int64) )
tower_data_year = np.array( tower_data_year )

# prepare data to be saved and used later on to build a fixed graph
int_to_towers = { v: k for k, v in tower_to_int_all.items() }

pd.DataFrame( np.concatenate( ( np.array([ int_to_towers[i] for 
                                           i in tower_data_year[:,0] ]).reshape(-1,1),
              tower_data_year ), axis=1 ), 
              columns=[ ['original_id', 'tower_id', 'mean_x',
              'mean_y', 'std_x', 'std_y', 'perc_10_x', 'perc_10_y',
              'perc_50_x', 'perc_50_y', 'perc_90_x', 'perc_90_y',
              'perc_99_x', 'perc_99_y' ] ] ).to_csv( target+'fixed_tower_locations.csv', 
                                                       index=False )

