In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import pyproj
from pathlib import Path
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt

In [3]:
raw_data_path = Path.joinpath(Path.cwd(),'raw')
metadata_path = Path.joinpath(Path.cwd(),'metadata').joinpath('New_Celltable_final_converted.xls')
sel_links_path = Path('/Users/adameshel/Documents/Python_scripts/process_cml_rawdata/selected_links.txt')

In [4]:
def cellcom_ids(site_id):
    ''' remove IP numbers from cellcom site_a_id/site_b_id,
    and also convert all letters to lower case'''
    if type(site_id) == float:
        return np.nan
    elif len(site_id.strip('.0123456789;')) == 0:
        return np.nan
    elif '; ' in site_id:
        return site_id.split('; ')[1]
    else:
        return site_id[0:4]
        
def process_cellcom(xlfile, col_names):
    ''' Process metadata for cellcom '''
#     xl = pd.ExcelFile(xlfile)

#     df = xl.parse('Sheet1', skiprows=1) # skip the first row of the excel file
    if '.xls' in xlfile:
        df = pd.read_excel(xlfile)
    else:
        df = pd.read_csv(xlfile)
    cols = ['LINK_NO', 'STATUS', 'TX_FREQ_HIGH_MHZ', 'TX_FREQ_LOW_MHZ', 'POL', 
            'LENGTH_KM', 'SITE1_NAME', 'ID_SITE1', 'EAST1', 'NORTH1',
            'HEIGHT_ABOVE_SEA1_M', 'SITE2_NAME', 'ID_SITE2', 'EAST2',
            'NORTH2', 'HEIGHT_ABOVE_SEA2_M']
    df = df[cols]
    df['link_id'] = df['ID_SITE1'] + '-' + df['ID_SITE2']
    df['link_id'] = df['link_id'].str.lower()
    df.insert(16,'SLOTS', '')
    df.insert(0,'SP', 'cellcom')
    df.columns = col_names
    
    # convert EAST/NORTH to LAT/LON decimal
    bng = pyproj.Proj(init='epsg:2039')
    wgs84 = pyproj.Proj(init='epsg:4326')
    # lon, lat = pyproj.transform(from,to,easting,northing)
    df['LON1'],df['LAT1'] = pyproj.transform(bng, wgs84, df['LON1'].values, df['LAT1'].values)
    df['LON2'],df['LAT2'] = pyproj.transform(bng, wgs84, df['LON2'].values, df['LAT2'].values)
    
    # process cellcom ids to fix problems
    df['SITE1_ID'] = df['SITE1_ID'].apply(cellcom_ids)
    df['SITE2_ID'] = df['SITE2_ID'].apply(cellcom_ids)
    
    # remove '-X' from cml_id
    df['Link_num'] = df['Link_num'].str.partition('-')[0]
    return df


In [5]:
# process all the metadata 
col_names = ['SP', 'Link_num', 'Status', 'Frequency1', 
             'Frequency2', 'Polarization', 'Length_KM', 
             'SITE1_Name', 'SITE1_ID', 'LON1', 'LAT1', 
             'Height_above_sea1', 'SITE2_Name', 'SITE2_ID', 
             'LON2', 'LAT2', 'Height_above_sea2','SLOTS', 'link id']
MD = process_cellcom(str(metadata_path), col_names)

# convert object to numeric values with additional processing
#MD.loc[:,'Link_num'] = pd.to_string(MD.loc[:,'Link_num'], errors='coerce')

MD.loc[:,'Frequency1'] = pd.to_numeric(MD.loc[:,'Frequency1'], errors='coerce')*1e9/1000 # convert MHz to GHz
MD.loc[:,'Frequency2'] = pd.to_numeric(MD.loc[:,'Frequency2'], errors='coerce')*1e9/1000  

MD.loc[:,'LAT1'] = pd.to_numeric(MD.loc[:,'LAT1'], errors='coerce')
MD.loc[:,'LON1'] = pd.to_numeric(MD.loc[:,'LON1'], errors='coerce')
MD.loc[:,'LAT2'] = pd.to_numeric(MD.loc[:,'LAT2'], errors='coerce')
MD.loc[:,'LON2'] = pd.to_numeric(MD.loc[:,'LON2'], errors='coerce')
MD.loc[:,'Length_KM'] = pd.to_numeric(MD.loc[:,'Length_KM'], errors='coerce')

MD.loc[:,'Height_above_sea1'] = pd.to_numeric(MD.loc[:,'Height_above_sea1'], errors='coerce')
MD.loc[:,'Height_above_sea2'] = pd.to_numeric(MD.loc[:,'Height_above_sea2'], errors='coerce')

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  df['LON1'],df['LAT1'] = pyproj.transform(bng, wgs84, df['LON1'].values, df['LAT1'].values)
  df['LON2'],df['LAT2'] = pyproj.transform(bng, wgs84, df['LON2'].values, df['LAT2'].values)


In [6]:
# select raw-data files to open
only_files = sorted([f for f in listdir(raw_data_path) if '.txt' in f])
# only_files = ['cellcom/SOEM-M_HC_RADIO_SINK_20150121_001500.txt',
#              'cellcom/SOEM-M_HC_RADIO_SOURCE_20150121_001500.txt'] 

# select specific links (by link number from cellcom meta-data file)
# sel_links = ['5099','9409','9406']
sel_links = list(np.genfromtxt(str(sel_links_path),dtype='str'))
sites = []
for i,link in enumerate(sel_links):
    l = str(link).partition('-')
    sites.append(l[0].lower())
    sites.append(l[-1].lower())

In [7]:
MD.head()

Unnamed: 0,SP,Link_num,Status,Frequency1,Frequency2,Polarization,Length_KM,SITE1_Name,SITE1_ID,LON1,LAT1,Height_above_sea1,SITE2_Name,SITE2_ID,LON2,LAT2,Height_above_sea2,SLOTS,link id
0,cellcom,10000,Active,19232500000.0,18222500000.0,V,5.31,CURE MEDICAL EMEK HEFER,1U22,34.898434,32.404579,11.0,MOSHAV BITAN AHARON NORTH,E122,34.863604,32.366931,12.6,,1u22-e122
1,cellcom,10001,Active,22525000000.0,21325000000.0,V,1.48,PAPER PLANET HADERA,r019,34.980534,31.919844,161.2,SCHUNAT SHIMSHONY MODIEIN NEW,a019,34.992734,31.911523,212.4,,r019-a019
2,cellcom,10003,Shut Down,23370000000.0,22170000000.0,V,3.29,INDUSTRIES ALON TAVOR,81B0,35.370571,32.632104,306.5,SHMUEL MEIR AND SONS TRADE AFULA,810S,35.3356,32.634112,167.2,,81b0-810s
3,cellcom,10005,Shut Down,39228000000.0,37968000000.0,H,0.81,A T SDEROT TZOMET AVIVIM,4G06,34.608263,31.521649,95.3,NESTLE SDEROT,4S06,34.610377,31.528685,87.7,,4g06-4s06
4,cellcom,10009,Active,23250000000.0,22050000000.0,V,2.04,PARVITAL GRANIT Ltd. EMEK HEFER,0R34,34.895486,32.402371,9.0,EKALIPTUS COPSE KFAR VITKIN,0A34,34.891009,32.384406,5.9,,0r34-0a34


In [8]:
sites.append('143a')
sites.append('a451')
sites.append('a349')
sites.append('a273')


In [16]:
# to_process = MD.loc[MD['link id'].isin(sel_links)] # take only metadata for links in sel_links
# 'link id' is a combination of the names of "site1-site2"

# sel_links = [i.split('-', 1)[0] for i in sel_links] # take only number before the '-' in each link number
    
# find raw-data for the links
RD_rx = [] # gather all RADIO_SINK
RD_tx = [] # gather all RADIO_SOURCE

for rdfile in only_files:
    rdfile = str(raw_data_path.joinpath(rdfile))
    RD = pd.read_csv(rdfile,  index_col = False)
    RD.insert(6,'Site', '')
    RD['Site'] = RD['NeAlias'].str.partition('_')[0]
    RD['Site'] = RD['Site'].str.lower()
    RD['NeAlias'] = RD['NeAlias'].str.rpartition('_')[2]
    RD['NeAlias'] = RD['NeAlias'].str.rpartition('.')[0]
#     RD['MeasuringSite_hop'] = RD['Site'] + '_' + RD['NeAlias']
#     print(RD['MeasuringSite_hop'])
    # find specific links in the file
#     RD = RD.loc[RD['NeAlias'].isin(sel_links)]
#     print(type(RD['Site'].values[0]))
    RD = RD.loc[RD['Site'].isin(sites)]
    
    
    # separate to RX and TX
    if str.find(rdfile, 'RADIO_SINK') != -1:
        RD = RD[['Time','Interval','Site','NeAlias','PowerRLTMmin','PowerRLTMmax']]
        RD_rx.append(RD)
        
    elif str.find(rdfile, 'RADIO_SOURCE') != -1:
        RD = RD[['Time','Interval','Site','NeAlias','PowerTLTMmin','PowerTLTMmax']]
        RD_tx.append(RD)

RD_rx = pd.concat(RD_rx) # the min/max RSL
RD_tx = pd.concat(RD_tx) # the min/max TSL

# replace NeAlias with link_number
RD_rx = RD_rx.rename(columns={'NeAlias': 'Hop_number', 'Site': 'Measuring_site'})
RD_tx = RD_tx.rename(columns={'NeAlias': 'Hop_number', 'Site': 'Measuring_site'})

# take only 15 minute data
RD_rx = RD_rx[RD_rx['Interval'] == 15]
RD_tx = RD_tx[RD_tx['Interval'] == 15]

hops = []
# hops.append(RD_rx['Hop_number'].unique())
hops.append(RD_tx['Hop_number'].unique())
hops = list(hops[0])

RD_rx['Link_number'] = '-'
RD_tx['Link_number'] = '-'
for h,hop in enumerate(hops):
    rsl = RD_rx[RD_rx['Hop_number'] == hop]
    tsl = RD_tx[RD_tx['Hop_number'] == hop]
    ###########################
    #### THIS SHOULD BE != ####
    ###########################
    RD_rx['Link_number'] = np.where(
        (RD_rx['Hop_number'] == hop) & 
        (rsl['Measuring_site'].values[0] != tsl['Measuring_site'].values[0]),
        RD_tx['Measuring_site'] + '-' + RD_rx['Measuring_site'], 
        RD_rx['Link_number']
     )
    RD_tx['Link_number'] = np.where(
        (RD_tx['Hop_number'] == hop) & 
        (tsl['Measuring_site'].values[0] != rsl['Measuring_site'].values[0]),
        RD_tx['Measuring_site'] + '-' + RD_rx['Measuring_site'], 
        RD_tx['Link_number']
     )


# # If you want you can select RSL and TSL for a specific link
# RSL = RD_rx[RD_rx['Hop_number'] == '6446']
# TSL = RD_tx[RD_tx['Hop_number'] == '6446']

# # Take only time and min/max RSL or TSL
# RSL = RSL[['Time','PowerRLTMmin','PowerRLTMmax']]
# TSL = TSL[['Time','PowerTLTMmin','PowerTLTMmax']]
# print('Done')

  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.rea

  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.read_csv(rdfile,  index_col = False)
  RD = pd.rea

In [17]:
RD_rx.where(RD_rx['Link_number'] != '-')

Unnamed: 0,Time,Interval,Measuring_site,Hop_number,PowerRLTMmin,PowerRLTMmax,Link_number
4,2015-11-30 00:15:00,15,a349,5062,-45.8,-45.2,a349-a349
9,2015-11-30 00:15:00,15,143a,6446,-50.8,-50.2,143a-143a
22,2015-11-30 00:15:00,15,a451,5055,-47.7,-46.1,a451-a451
62,2015-11-30 00:30:00,15,a349,5062,-45.8,-45.2,a349-a349
66,2015-11-30 00:30:00,15,143a,6446,-50.8,-50.2,143a-143a
...,...,...,...,...,...,...,...
5323,2015-12-30 23:30:00,15,a451,5055,-48.3,-46.4,a451-a451
5362,2015-12-30 23:45:00,15,a349,5062,-45.8,-45.2,a349-a349
5380,2015-12-30 23:45:00,15,a451,5055,-48.0,-46.4,a451-a451
5417,2015-12-31 00:00:00,15,a349,5062,-45.8,-45.2,a349-a349


In [18]:
RD_rx[RD_rx['Hop_number']=='5062']

Unnamed: 0,Time,Interval,Measuring_site,Hop_number,PowerRLTMmin,PowerRLTMmax,Link_number
4,2015-11-30 00:15:00,15,a349,5062,-45.8,-45.2,a349-a349
62,2015-11-30 00:30:00,15,a349,5062,-45.8,-45.2,a349-a349
120,2015-11-30 00:45:00,15,a349,5062,-45.8,-45.2,a349-a349
177,2015-11-30 01:00:00,15,a349,5062,-45.8,-45.2,a349-a349
234,2015-11-30 01:15:00,15,a349,5062,-45.8,-45.2,a349-a349
...,...,...,...,...,...,...,...
5190,2015-12-30 23:00:00,15,a349,5062,-45.8,-45.2,a349-a349
5250,2015-12-30 23:15:00,15,a349,5062,-45.8,-45.2,a349-a349
5303,2015-12-30 23:30:00,15,a349,5062,-45.8,-45.2,a349-a349
5362,2015-12-30 23:45:00,15,a349,5062,-45.8,-45.2,a349-a349
