In [1]:
import numpy as np
import pandas as pd
from itertools import product
import os
from tqdm import tqdm

In [2]:
import matplotlib.pyplot as plt

### Locate data

In [3]:
# Locate files
#source = '/media/Data_storage/Mobilcell/Data/'
source = '/home/abiricz/Projects/Mobilcell/Data/'
target = '/media/Data_storage/Mobilcell/DayPolygonData/'

In [4]:
files_poligons = np.array( sorted([ i for i in os.listdir(source) if 'POLIGONS' in i]) )
files_poligons.shape, files_poligons[:3]

((365,), array(['MT_MTA_NETWORK_POLIGONS_20181201.csv.gz',
        'MT_MTA_NETWORK_POLIGONS_20181202.csv.gz',
        'MT_MTA_NETWORK_POLIGONS_20181203.csv.gz'], dtype='<U39'))

### Calculate universal grid for rasters

In [5]:
# This seems to be the global grid of rasters, same for every day!
start_x = 48262 # 48210 + 127 - ( 48210 % 127 )
end_x = 362968 # 366660 + 127 - ( 366660 % 127 )

start_y = 426468 # 426341 + ( 426341 % 127 )
end_y = 934214 # 934219 + ( 934219 % 127 )

num_x = int( ( end_x - start_x ) / 127 )
num_y = int( ( end_y - start_y ) / 127 )

print( start_x, end_x, start_y, end_y, num_x, num_y )

# this raster encoding is universal then
raster_x = np.arange(start_x, end_x+127, 127, dtype=np.int32)
raster_y = np.arange(start_y, end_y+127, 127, dtype=np.int32)

# get coordinate vector
raster_coords = np.array( list(product( raster_x, raster_y )) )

48262 362968 426468 934214 2478 3998


### Load data

In [6]:
poligons_path = source + files_poligons[100]
poligons_df = pd.read_csv( poligons_path, delimiter=';' )

# drop poligons outside of the country
poligons_df = poligons_df[ np.logical_and( poligons_df.eovx.values < 366660, 
                                   poligons_df.eovx.values > 48210 ) ]
poligons_df = poligons_df[ np.logical_and( poligons_df.eovy.values < 934219, 
                                   poligons_df.eovy.values > 426341 ) ]

In [7]:
poligons_df.head()

Unnamed: 0,network_identifier,eovx,eovy,dx,dy,network_element_type
0,10071347,216918,496318,127,127,D
1,10071347,216918,496445,127,127,D
2,10071347,217045,496191,127,127,D
3,10071347,217045,496318,127,127,D
4,10071347,217045,496572,127,127,D


### Match raster index with the universal grid

In [8]:
# calculate raster encodings
poligons_df['eovx_num'] = ( (poligons_df.eovx - start_x) / 127 ).astype(int)
poligons_df['eovy_num'] = ( (poligons_df.eovy - start_y) / 127 ).astype(int)
poligons_df['eov_idx'] = poligons_df.eovx_num * (num_y+1) + poligons_df.eovy_num

In [9]:
poligons_df.head()

Unnamed: 0,network_identifier,eovx,eovy,dx,dy,network_element_type,eovx_num,eovy_num,eov_idx
0,10071347,216918,496318,127,127,D,1328,550,5311222
1,10071347,216918,496445,127,127,D,1328,551,5311223
2,10071347,217045,496191,127,127,D,1329,549,5315220
3,10071347,217045,496318,127,127,D,1329,550,5315221
4,10071347,217045,496572,127,127,D,1329,552,5315223


In [10]:
raster_coords[ 5311223 ]

array([216918, 496445], dtype=int32)

### Investigate network id and its rasters for more days

In [11]:
poligons_df[ ["network_identifier", "eov_idx"] ]

Unnamed: 0,network_identifier,eov_idx
0,10071347,5311222
1,10071347,5311223
2,10071347,5315220
3,10071347,5315221
4,10071347,5315223
...,...,...
26053295,99591498,4635914
26053296,99591498,4635915
26053297,99591498,4635916
26053298,99591498,4639913


In [12]:
target + 'polygon_compressed_'+files_poligons[0].split('_')[-1][:8]

'/media/Data_storage/Mobilcell/DayPolygonData/polygon_compressed_20181201'

In [13]:
for i in tqdm( range(files_poligons.shape[0]) ):
    # load file
    poligons_path = source + files_poligons[i]
    poligons_df = pd.read_csv( poligons_path, delimiter=';' )
    
    # drop poligons outside of the country
    poligons_df = poligons_df[ np.logical_and( poligons_df.eovx.values < 366660, 
                                       poligons_df.eovx.values > 48210 ) ]
    poligons_df = poligons_df[ np.logical_and( poligons_df.eovy.values < 934219, 
                                       poligons_df.eovy.values > 426341 ) ]
    
    # calculate raster encodings
    poligons_df['eovx_num'] = ( (poligons_df.eovx - start_x) / 127 ).astype(int)
    poligons_df['eovy_num'] = ( (poligons_df.eovy - start_y) / 127 ).astype(int)
    poligons_df['eov_idx'] = poligons_df.eovx_num * (num_y+1) + poligons_df.eovy_num
    
    # save encoded data
    savename = target+'polygon_encoded_'+files_poligons[i].split('_')[-1][:8]
    save_df = poligons_df[ ["network_identifier", "eov_idx"] ]
    np.save( savename, save_df.values )
    
    # saving csv is very slow!!
    #save_df.to_csv( savename, index=False, )

100%|██████████| 365/365 [1:19:29<00:00, 13.07s/it]
