In [1]:
import re

import numpy as np
import pandas as pd
import os
import featuretools as ft
from shl.prepare import normalize_epoch_time, normalize_lat_long
import plotly.express as px
from sklearn.feature_extraction.text import TfidfVectorizer
import requests

from shl.features import WifiFeature

%load_ext autoreload
%autoreload 2

In [2]:
train_cell = normalize_epoch_time(pd.read_parquet('../data/train/Cells.parquet'), 'epoch_time')
train_location = normalize_epoch_time(pd.read_parquet('../data/train/Location.parquet'), 'epoch_time')
test_cell = normalize_epoch_time(pd.read_parquet('../data/test/Cells.parquet'), 'epoch_time')
test_location = normalize_epoch_time(pd.read_parquet('../data/test/Location.parquet'), 'epoch_time')
validate_cell = normalize_epoch_time(pd.read_parquet('../data/validate/Cells.parquet'), 'epoch_time')
validate_location = normalize_epoch_time(pd.read_parquet('../data/validate/Location.parquet'), 'epoch_time')

train_label = normalize_epoch_time(pd.read_parquet('../data/train/Label.parquet'), 'epoch_time')
test_label = normalize_epoch_time(pd.read_parquet('../data/test/Label_idx.parquet'), 'epoch_time')
validate_label = normalize_epoch_time(pd.read_parquet('../data/validate/Label.parquet'), 'epoch_time')

In [6]:
def fetch_location(cells: pd.DataFrame):
    request_body = {
        "cellTowers": [{
            "cellId": int(cell.ci),
            "locationAreaCode": int(cell.TAC),
            "mobileCountryCode": int(cell.MCC),
            "mobileNetworkCode": int(cell.MNC)
        } for _, cell in cells.iterrows()]
    }
    resp = requests.post('https://backend.radiocells.org', json=request_body)
    if resp.status_code == 200:
        # {"location": {"lat": 48.85702, "lng": 2.29520}, "accuracy": 30}
        location = resp.json()
        return {"Latitude": location['location']['lat'], "Longitude": location['location']['lng'], "accuracy": location['accuracy']}
    else:
        return None

fetch_location(train_cell.query('epoch_time == 1490448212563'))

{'Latitude': 51.472396, 'Longitude': -0.4120165, 'accuracy': 1000}

In [16]:
train_cell.merge(train_location, on='epoch_time_id', how="left").sort_values(by='epoch_time_id').query('accuracy > 0 and cell_type == "LTE"')

Unnamed: 0,epoch_time_x,num_entries,cell_type,isRegistered,ci,MCC,MNC,PCI,TAC,asuLevel,...,lac,PSC,epoch_time_id,epoch_time_y,Ignore1,Ignore2,accuracy,Latitude,Longitude,Altitude
88,1490448206591,1,LTE,1,1.280593e+08,234,10,425.0,4.096000e+03,32,...,,,1490448207000,1.490448e+12,1.733751e+13,2.426589e+08,24.0,51.469703,-0.412526,70.498640
90,1490448212563,2,LTE,0,2.147484e+09,234,10,424.0,2.147484e+09,39,...,,,1490448213000,1.490448e+12,1.734306e+13,2.426589e+08,8.0,51.470240,-0.411670,70.672157
89,1490448212563,2,LTE,1,1.280593e+08,234,10,425.0,4.096000e+03,41,...,,,1490448213000,1.490448e+12,1.734306e+13,2.426589e+08,8.0,51.470240,-0.411670,70.672157
92,1490448218571,2,LTE,0,2.147484e+09,234,10,424.0,2.147484e+09,40,...,,,1490448219000,1.490448e+12,1.734908e+13,2.426589e+08,6.0,51.470672,-0.410402,71.402222
91,1490448218571,2,LTE,1,1.280593e+08,234,10,425.0,4.096000e+03,42,...,,,1490448219000,1.490448e+12,1.734908e+13,2.426589e+08,6.0,51.470672,-0.410402,71.402222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3040228,1499268276823,9,LTE,1,1.280004e+08,234,10,20.0,1.440000e+02,17,...,,,1499268277000,1.499268e+12,2.556436e+13,1.496746e+11,12.0,50.846796,-0.133278,117.764547
3040231,1499268277847,9,LTE,0,2.147484e+09,234,10,20.0,2.147484e+09,10,...,,,1499268278000,1.499268e+12,2.556536e+13,1.496746e+11,12.0,50.846796,-0.133278,117.764547
3040230,1499268277847,9,LTE,1,1.280004e+08,234,10,20.0,1.440000e+02,18,...,,,1499268278000,1.499268e+12,2.556536e+13,1.496746e+11,12.0,50.846796,-0.133278,117.764547
3040233,1499268278855,9,LTE,0,2.147484e+09,234,10,20.0,2.147484e+09,10,...,,,1499268279000,1.499268e+12,2.556636e+13,1.496746e+11,12.0,50.846796,-0.133278,117.764547


In [18]:
train_cell.query('epoch_time == 1490448212563')

Unnamed: 0,epoch_time,num_entries,cell_type,isRegistered,ci,MCC,MNC,PCI,TAC,asuLevel,dBm,level,cid,lac,PSC,epoch_time_id
89,1490448212563,2,LTE,1,128059300.0,234,10,425.0,4096.0,41,-99,3,,,,1490448213000
90,1490448212563,2,LTE,0,2147484000.0,234,10,424.0,2147484000.0,39,-101,3,,,,1490448213000


In [3]:
validate_cell_with_label = validate_label.merge(train_cell, how='left', on='epoch_time_id').sort_values(['epoch_time_id', 'cell_type', 'dBm'])[['epoch_time_id', 'label', 'cell_type', 'dBm', 'asuLevel', 'ci']]

#.loc[:, ['epoch_time', 'label', 'SSID', 'RSSI']].sort_values(['epoch_time', 'RSSI'], ascending=[True, False]).groupby('epoch_time').agg({'label' : 'first', 'SSID' : 'first'}).reset_index()

In [10]:
validate_cell_with_label.to_csv('./validate_cell_with_label.csv')