In [1]:
%load_ext autoreload
%autoreload 2

%load_ext dotenv
%dotenv

In [2]:
import re

import numpy as np
import pandas as pd
import os
import featuretools as ft
from shl.prepare import normalize_epoch_time, normalize_lat_long, calculate_window, calculate_shift, fillna_agg_by_label
import plotly.express as px
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import DistanceMetric
from sklearn.metrics.pairwise import haversine_distances
from sklearn.metrics import pairwise_distances_argmin_min
from shapely.geometry import Polygon, LineString, Point, MultiLineString
from tqdm import tqdm
import geopandas as gpd
import pickle

from shl.features import WifiFeature
import matplotlib
import folium
import branca.colormap as cm

In [3]:
train_location = normalize_epoch_time(pd.read_parquet('../data/train/Location.parquet'), 'epoch_time')
test_location = normalize_epoch_time(pd.read_parquet('../data/test/Location.parquet'), 'epoch_time')
validate_location = normalize_epoch_time(pd.read_parquet('../data/validate/Location.parquet'), 'epoch_time')
train_label = normalize_epoch_time(pd.read_parquet('../data/train/Label.parquet'), 'epoch_time')
test_label = normalize_epoch_time(pd.read_parquet('../data/test/Label_idx.parquet'), 'epoch_time')
validate_label = normalize_epoch_time(pd.read_parquet('../data/validate/Label.parquet'), 'epoch_time')

In [4]:
with open('../additional_data/bus_stops.pickle', 'rb') as f:
    bus_stops = pickle.load(f)

feature_points = {
    'London': [
        [51.509865, -0.118092]
    ],
    'Brighton': [
        [50.827778, -0.152778]
    ],
    'parks': [
        # Hollingbury Golf Course
        [50.85423803467499, -0.12791258170001926],
        # St Ann’s Well Gardens
        [50.829876823789675, -0.15525600010959892],
        # Preston Park
        [50.839694335541274, -0.1462660790420134],
        # Waterwall conservation area
        [50.8659, -0.1750],
        # Withdean park
        [50.8546, -0.1497],
        # Stanmer park
        [50.8678, -0.0968],
        # Upper Lodge Wood
        [50.8751, -0.1177],
        # Pudding bag
        [50.8710, -0.1161],
        # Great Wood
        [50.8653, -0.1036],
        # Grubbings
        [50.8729, -0.0971],
        # Milbark wood
        [50.8783, -0.0982],
        # High park wood
        [50.8849, -0.1078],
        # Green broom
        [50.8833, -0.1107],
        # Moon's plantations
        [50.8774, -0.0840],
        # Friston forest
        [50.7783, 0.1894],
        # Malthouse wood
        [51.0042, -0.2044],
        # Bedgebury forest
        [51.0694, 0.4649],
        # Swinley forest
        [51.3726, -0.7292],
        # Crowthore wood
        [51.3808, -0.7598],
        # Queen Elizabeth Country Parh
        [50.9651, -0.9695],
        # Hurth wood
        [51.1854, -0.4278],
        # Winterfold wood
        [51.1764, -0.4564],
        # Queen's park
        [50.8249, -0.1248],
    ],
    'bus_stops': bus_stops,
    'shopping malls': [

    ],
    'stations': [

    ]
}

def calculate_minimal_distance(data: pd.DataFrame, points):
    from sklearn.neighbors import BallTree
    tree = BallTree(np.array(points), leaf_size=15)
    distances, indices = tree.query(data[['Latitude','Longitude']], k=1)
    return distances
    # return pairwise_distances_argmin_min(data[['Latitude','Longitude']], np.array(points))

def create_point_distance_features(data: pd.DataFrame):
    features = data[['epoch_time_id']]
    for name, points in feature_points.items():
        if len(points) > 0:
            if type(points[0]) == list: 
                features[f'distance_{name}'] = calculate_minimal_distance(data, points)
            elif type(points[0]) == dict:
                for point in points:
                    point_name, point_loc = list(point.keys())[0], list(point.values())[0]
                    features[f'distance_{name}_{point_name}'] = calculate_minimal_distance(data, [point_loc])

    return features

settings = {
    'fill_limit': 30,
    'window_sizes': [60, 300, 600],
    'window_center': True,
    # 'window_functions': ['mean', 'std'],
    'columns': None, #['distance_London', 'distance_Brighton', 'distance_parks', 'bus_stops'],
    'functions': ['mean', 'std', 'median'],
}
shift_settings = {
    'periods': [60, 300, 600],
    'columns_patterns': ['window_'],
    'fill_limit': 30,
}
train_distances_to_points_with_windows = fillna_agg_by_label(calculate_shift(calculate_window(create_point_distance_features(train_location), **settings), **shift_settings), train_label)
display(train_distances_to_points_with_windows)

train_distances_to_points_with_windows.to_parquet('../data/train/features_distances.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_point_distance_features(test_location), **settings), **shift_settings), test_label).to_parquet('../data/test/features_distances.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_point_distance_features(validate_location), **settings), **shift_settings), validate_label).to_parquet('../data/validate/features_distances.parquet')

Unnamed: 0,epoch_time_id,distance_London,distance_Brighton,distance_parks,distance_bus_stops,distance_London_window_60_mean,distance_London_window_60_std,distance_London_window_60_median,distance_Brighton_window_60_mean,distance_Brighton_window_60_std,...,distance_London_window_600_median_shift_60_future,distance_Brighton_window_600_mean_shift_60_future,distance_Brighton_window_600_std_shift_60_future,distance_Brighton_window_600_median_shift_60_future,distance_parks_window_600_mean_shift_60_future,distance_parks_window_600_std_shift_60_future,distance_parks_window_600_median_shift_60_future,distance_bus_stops_window_600_mean_shift_60_future,distance_bus_stops_window_600_std_shift_60_future,distance_bus_stops_window_600_median_shift_60_future
45,1490431628000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,0.025165,0.000528,...,,,,,,,,,,
46,1490431629000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,0.025165,0.000528,...,,,,,,,,,,
47,1490431630000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,0.025165,0.000528,...,,,,,,,,,,
48,1490431631000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,0.025165,0.000528,...,,,,,,,,,,
49,1490431632000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,0.025165,0.000528,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230778,1499268278000,0.663243,0.027238,0.009174,0.001605,0.663223,0.000012,0.663219,0.027241,0.000072,...,,,,,,,,,,
1230779,1499268279000,0.663243,0.027238,0.009175,0.001605,0.663223,0.000012,0.663219,0.027241,0.000072,...,,,,,,,,,,
1230780,1499268280000,0.663243,0.027238,0.009175,0.001605,0.663223,0.000012,0.663219,0.027241,0.000072,...,,,,,,,,,,
1230781,1499268281000,0.663243,0.027238,0.009175,0.001605,0.663223,0.000012,0.663219,0.027241,0.000072,...,,,,,,,,,,


In [5]:
train_distances_to_points_with_windows_with_label = train_label.merge(train_distances_to_points_with_windows, on='epoch_time_id')
train_distances_to_points_with_windows_with_label

Unnamed: 0,epoch_time,label,epoch_time_id,distance_London,distance_Brighton,distance_parks,distance_bus_stops,distance_London_window_60_mean,distance_London_window_60_std,distance_London_window_60_median,...,distance_London_window_600_median_shift_60_future,distance_Brighton_window_600_mean_shift_60_future,distance_Brighton_window_600_std_shift_60_future,distance_Brighton_window_600_median_shift_60_future,distance_parks_window_600_mean_shift_60_future,distance_parks_window_600_std_shift_60_future,distance_parks_window_600_median_shift_60_future,distance_bus_stops_window_600_mean_shift_60_future,distance_bus_stops_window_600_std_shift_60_future,distance_bus_stops_window_600_median_shift_60_future
0,1490431628000,4,1490431628000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,...,,,,,,,,,,
1,1490431629000,4,1490431629000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,...,,,,,,,,,,
2,1490431630000,4,1490431630000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,...,,,,,,,,,,
3,1490431631000,4,1490431631000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,...,,,,,,,,,,
4,1490431632000,4,1490431632000,0.665536,0.025955,0.010956,0.001025,0.666540,0.000391,0.666682,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
707329,1499267848000,5,1499267848000,0.664882,0.026065,0.010585,0.001078,0.664930,0.001234,0.664913,...,0.663269,0.024100,0.004625,0.02697,0.009277,0.001752,0.009343,0.001439,0.000647,0.001758
707330,1499267849000,5,1499267849000,0.664818,0.026094,0.010540,0.001078,0.664872,0.001226,0.664850,...,0.663269,0.024120,0.004611,0.02697,0.009277,0.001752,0.009343,0.001441,0.000646,0.001758
707331,1499267850000,5,1499267850000,0.664751,0.026121,0.010495,0.001086,0.664813,0.001215,0.664785,...,0.663269,0.024141,0.004596,0.02697,0.009278,0.001752,0.009343,0.001443,0.000645,0.001758
707332,1499267851000,5,1499267851000,0.664687,0.026146,0.010453,0.001099,0.664754,0.001201,0.664719,...,0.663269,0.024161,0.004581,0.02697,0.009278,0.001752,0.009343,0.001445,0.000644,0.001758


In [6]:
railways = gpd.read_file('../additional_data/railways.json')
display(railways)

Unnamed: 0,geometry
0,"LINESTRING (-0.14132 50.82902, -0.14151 50.830..."
1,"LINESTRING (-0.93849 50.85162, -0.93633 50.851..."
2,"LINESTRING (-0.14087 50.82901, -0.14089 50.829..."
3,"LINESTRING (0.27687 50.80288, 0.27665 50.80432..."
4,"LINESTRING (-0.14108 50.82902, -0.14104 50.829..."
5,"LINESTRING (-0.16576 51.23988, -0.16623 51.239..."
6,"LINESTRING (-0.17003 51.23156, -0.16928 51.233..."
7,"LINESTRING (-0.16563 51.23996, -0.15608 51.255..."
8,"LINESTRING (-0.08851 51.38370, -0.08783 51.384..."
9,"LINESTRING (-0.07169 51.40712, -0.07087 51.406..."


In [7]:
subway = gpd.read_file('../additional_data/subway.json')
display(subway)

Unnamed: 0,geometry
0,"LINESTRING (0.07963 51.54034, 0.07729 51.54146..."
1,"LINESTRING (0.01027 51.51106, 0.00955 51.51240..."
2,"LINESTRING (-0.18799 51.57114, -0.19263 51.572..."
3,"LINESTRING (-0.23859 51.58982, -0.24056 51.590..."
4,"LINESTRING (-0.30260 51.61922, -0.30124 51.617..."
5,"LINESTRING (-0.37130 51.57519, -0.36957 51.574..."
6,"LINESTRING (-0.37130 51.57515, -0.36744 51.574..."
7,"LINESTRING (-0.22702 51.49365, -0.22505 51.493..."
8,"LINESTRING (-0.19207 51.48249, -0.19288 51.481..."
9,"LINESTRING (0.09334 51.60333, 0.09079 51.59488..."


In [8]:
from shapely.ops import unary_union, cascaded_union, linemerge

feature_lines = {
    # 'bus_routes': gpd.read_file('../additional_data/bus_routes.json'),
    'bus_routes': unary_union(gpd.read_file('../additional_data/bus_routes.json').loc[:, 'geometry'].to_list()),
    'subway': gpd.read_file('../additional_data/subway.json'),
    'railways': gpd.read_file('../additional_data/railways.json'),
}

def calculate_min_distance_to_lines(point: Point, lines: gpd.GeoDataFrame):
    # return min(map(point.distance, lines.loc[:, 'geometry']))
    if isinstance(lines, gpd.GeoDataFrame):
        lines = MultiLineString(lines.loc[:, 'geometry'].to_list())
    return point.distance(lines)

def create_distance_to_lines_features(data: pd.DataFrame):
    features = {
        'epoch_time_id': data['epoch_time_id'],
    }
    for name, lines in feature_lines.items():
        distances = []
        for _, row in tqdm(data.iterrows(), total=data.shape[0]):
            distances.append(calculate_min_distance_to_lines(Point(row['Longitude'], row['Latitude']), lines))
        features[f'distance_{name}'] = distances

    return pd.DataFrame(features)

In [9]:
settings = {
    'fill_limit': 30,
    'window_sizes': [60, 300, 600],
    'window_center': True,
    # 'window_functions': ['mean', 'std'],
    'columns': None, #['distance_bus_routes', 'distance_subway', 'distance_railways'],
    'functions': ['mean', 'std', 'median'],
}
shift_settings = {
    'periods': [60, 300, 600],
    'columns_patterns': ['window_'],
    'fill_limit': 30,
}
train_distances_to_lines_with_windows = fillna_agg_by_label(calculate_shift(calculate_window(create_distance_to_lines_features(train_location), **settings), **shift_settings), train_label)
display(train_distances_to_lines_with_windows)

train_distances_to_lines_with_windows.to_parquet('../data/train/features_distances_to_lines_with_windows.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_distance_to_lines_features(test_location), **settings), **shift_settings), test_label).to_parquet('../data/test/features_distances_to_lines_with_windows.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_distance_to_lines_features(validate_location), **settings), **shift_settings), validate_label).to_parquet('../data/validate/features_distances_to_lines_with_windows.parquet')


100%|██████████| 911109/911109 [33:00<00:00, 459.95it/s]  
100%|██████████| 911109/911109 [07:50<00:00, 1938.37it/s]
100%|██████████| 911109/911109 [05:52<00:00, 2587.22it/s]


Unnamed: 0,epoch_time_id,distance_bus_routes,distance_subway,distance_railways,distance_bus_routes_window_60_mean,distance_bus_routes_window_60_std,distance_bus_routes_window_60_median,distance_subway_window_60_mean,distance_subway_window_60_std,distance_subway_window_60_median,...,distance_railways_window_300_median_shift_60_future,distance_bus_routes_window_600_mean_shift_60_future,distance_bus_routes_window_600_std_shift_60_future,distance_bus_routes_window_600_median_shift_60_future,distance_subway_window_600_mean_shift_60_future,distance_subway_window_600_std_shift_60_future,distance_subway_window_600_median_shift_60_future,distance_railways_window_600_mean_shift_60_future,distance_railways_window_600_std_shift_60_future,distance_railways_window_600_median_shift_60_future
45,1490431628000,0.000746,0.581761,0.006968,0.00016,0.000238,0.000039,0.582727,0.000372,0.582794,...,,,,,,,,,,
46,1490431629000,0.000746,0.581761,0.006968,0.00016,0.000238,0.000039,0.582727,0.000372,0.582794,...,,,,,,,,,,
47,1490431630000,0.000746,0.581761,0.006968,0.00016,0.000238,0.000039,0.582727,0.000372,0.582794,...,,,,,,,,,,
48,1490431631000,0.000746,0.581761,0.006968,0.00016,0.000238,0.000039,0.582727,0.000372,0.582794,...,,,,,,,,,,
49,1490431632000,0.000746,0.581761,0.006968,0.00016,0.000238,0.000039,0.582727,0.000372,0.582794,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230778,1499268278000,0.000548,0.579433,0.009272,0.00056,0.000098,0.000542,0.579411,0.000019,0.579412,...,,,,,,,,,,
1230779,1499268279000,0.000548,0.579433,0.009272,0.00056,0.000098,0.000542,0.579411,0.000019,0.579412,...,,,,,,,,,,
1230780,1499268280000,0.000548,0.579433,0.009272,0.00056,0.000098,0.000542,0.579411,0.000019,0.579412,...,,,,,,,,,,
1230781,1499268281000,0.000548,0.579433,0.009272,0.00056,0.000098,0.000542,0.579411,0.000019,0.579412,...,,,,,,,,,,


100%|██████████| 562565/562565 [13:34<00:00, 690.86it/s]
100%|██████████| 562565/562565 [04:05<00:00, 2288.97it/s]
100%|██████████| 562565/562565 [04:20<00:00, 2161.93it/s]
100%|██████████| 101524/101524 [03:37<00:00, 467.62it/s]
100%|██████████| 101524/101524 [00:59<00:00, 1702.04it/s]
100%|██████████| 101524/101524 [00:53<00:00, 1915.43it/s]


In [10]:
train_label.merge(train_distances_to_lines_with_windows, on="epoch_time_id")

Unnamed: 0,epoch_time,label,epoch_time_id,distance_bus_routes,distance_subway,distance_railways,distance_bus_routes_window_60_mean,distance_bus_routes_window_60_std,distance_bus_routes_window_60_median,distance_subway_window_60_mean,...,distance_railways_window_300_median_shift_60_future,distance_bus_routes_window_600_mean_shift_60_future,distance_bus_routes_window_600_std_shift_60_future,distance_bus_routes_window_600_median_shift_60_future,distance_subway_window_600_mean_shift_60_future,distance_subway_window_600_std_shift_60_future,distance_subway_window_600_median_shift_60_future,distance_railways_window_600_mean_shift_60_future,distance_railways_window_600_std_shift_60_future,distance_railways_window_600_median_shift_60_future
0,1490431628000,4,1490431628000,0.000746,0.581761,0.006968,0.000160,0.000238,0.000039,0.582727,...,,,,,,,,,,
1,1490431629000,4,1490431629000,0.000746,0.581761,0.006968,0.000160,0.000238,0.000039,0.582727,...,,,,,,,,,,
2,1490431630000,4,1490431630000,0.000746,0.581761,0.006968,0.000160,0.000238,0.000039,0.582727,...,,,,,,,,,,
3,1490431631000,4,1490431631000,0.000746,0.581761,0.006968,0.000160,0.000238,0.000039,0.582727,...,,,,,,,,,,
4,1490431632000,4,1490431632000,0.000746,0.581761,0.006968,0.000160,0.000238,0.000039,0.582727,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
707329,1499267848000,5,1499267848000,0.001004,0.581049,0.007721,0.000777,0.000375,0.000957,0.581106,...,0.009338,0.000776,0.000559,0.000811,0.581816,0.003460,0.579412,0.007197,0.002964,0.009338
707330,1499267849000,5,1499267849000,0.001007,0.580982,0.007789,0.000790,0.000365,0.000957,0.581046,...,0.009339,0.000777,0.000559,0.000811,0.581796,0.003437,0.579411,0.007209,0.002960,0.009339
707331,1499267850000,5,1499267850000,0.001013,0.580913,0.007859,0.000805,0.000352,0.000957,0.580986,...,0.009339,0.000779,0.000558,0.000812,0.581775,0.003414,0.579411,0.007220,0.002954,0.009339
707332,1499267851000,5,1499267851000,0.001021,0.580845,0.007928,0.000820,0.000336,0.000957,0.580926,...,0.009339,0.000780,0.000557,0.000812,0.581755,0.003392,0.579410,0.007232,0.002949,0.009339


In [20]:
def read_cell_location(cell_location_file, feature_distance_file):
    cell_location = pd.read_csv(cell_location_file)
    return cell_location[~cell_location.epoch_time_id.isin(pd.read_parquet(feature_distance_file, columns=['epoch_time_id'])['epoch_time_id'])]

train_cell_location = read_cell_location('../data/train/features_cell_to_location.csv', '../data/train/features_distances.parquet')
test_cell_location = read_cell_location('../data/test/features_cell_to_location.csv', '../data/test/features_distances.parquet')
validate_cell_location = read_cell_location('../data/validate/features_cell_to_location.csv', '../data/validate/features_distances.parquet')


display(train_cell_location)

Unnamed: 0,epoch_time_id,Latitude,Longitude,accuracy
0,1490430923000,50.833338,-0.135122,1568
1,1490632117000,50.836352,-0.140269,1606
2,1490782709000,50.819276,-0.132715,1063
3,1492591507000,50.833338,-0.135122,1568
4,1492591567000,50.833338,-0.135122,1568
...,...,...,...,...
7015,1499249253000,50.833338,-0.135122,1568
7016,1499249316000,50.833338,-0.135122,1568
7019,1499268291000,50.832570,-0.126835,1493
7020,1499268352000,50.833338,-0.135122,1568


In [21]:
train_cell_distances_to_points_with_windows = fillna_agg_by_label(calculate_shift(calculate_window(create_point_distance_features(train_cell_location), **settings), **shift_settings), train_label)
display(train_cell_distances_to_points_with_windows)

train_cell_distances_to_points_with_windows.to_parquet('../data/train/features_cell_distances.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_point_distance_features(test_cell_location), **settings), **shift_settings), test_label).to_parquet('../data/test/features_cell_distances.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_point_distance_features(validate_cell_location), **settings), **shift_settings), validate_label).to_parquet('../data/validate/features_cell_distances.parquet')

Unnamed: 0,epoch_time_id,distance_London,distance_Brighton,distance_parks,distance_bus_stops,distance_London_window_60_mean,distance_London_window_60_std,distance_London_window_60_median,distance_Brighton_window_60_mean,distance_Brighton_window_60_std,...,distance_London_window_600_median_shift_60_future,distance_Brighton_window_600_mean_shift_60_future,distance_Brighton_window_600_std_shift_60_future,distance_Brighton_window_600_median_shift_60_future,distance_parks_window_600_mean_shift_60_future,distance_parks_window_600_std_shift_60_future,distance_parks_window_600_median_shift_60_future,distance_bus_stops_window_600_mean_shift_60_future,distance_bus_stops_window_600_std_shift_60_future,distance_bus_stops_window_600_median_shift_60_future
99667,1492593287000,0.672214,0.030038,0.012786,0.000781,0.675667,0.003015,0.676741,0.019248,0.003554,...,,,,,,,,,,
99668,1492593288000,0.672214,0.030038,0.012786,0.000781,0.675667,0.003015,0.676741,0.019248,0.003554,...,,,,,,,,,,
99669,1492593289000,0.672214,0.030038,0.012786,0.000781,0.675667,0.003015,0.676741,0.019248,0.003554,...,,,,,,,,,,
99670,1492593290000,0.672214,0.030038,0.012786,0.000781,0.675667,0.003015,0.676741,0.019248,0.003554,...,,,,,,,,,,
99671,1492593291000,0.672214,0.030038,0.012786,0.000781,0.675667,0.003015,0.676741,0.019248,0.003554,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984138,1499249253000,0.676741,0.018510,0.012829,0.001557,0.676746,0.000171,0.676741,0.019147,0.001908,...,,,,,,,,,,
984139,1499249316000,0.676741,0.018510,0.012829,0.001557,0.676746,0.000171,0.676741,0.019147,0.001908,...,,,,,,,,,,
984140,1499268291000,0.677351,0.026381,0.007935,0.001257,0.676746,0.000171,0.676741,0.019147,0.001908,...,,,,,,,,,,
984141,1499268352000,0.676741,0.018510,0.012829,0.001557,0.676746,0.000171,0.676741,0.019147,0.001908,...,,,,,,,,,,


In [22]:
train_cell_distances_to_lines_with_windows = fillna_agg_by_label(calculate_shift(calculate_window(create_distance_to_lines_features(train_cell_location), **settings), **shift_settings), train_label)
display(train_cell_distances_to_lines_with_windows)

train_cell_distances_to_lines_with_windows.to_parquet('../data/train/features_cell_distances_to_lines_with_windows.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_distance_to_lines_features(test_cell_location), **settings), **shift_settings), test_label).to_parquet('../data/test/features_cell_distances_to_lines_with_windows.parquet')
fillna_agg_by_label(calculate_shift(calculate_window(create_distance_to_lines_features(validate_cell_location), **settings), **shift_settings), validate_label).to_parquet('../data/validate/features_cell_distances_to_lines_with_windows.parquet')

100%|██████████| 6451/6451 [00:12<00:00, 514.26it/s]
100%|██████████| 6451/6451 [00:03<00:00, 1765.87it/s]
100%|██████████| 6451/6451 [00:02<00:00, 2161.48it/s]


Unnamed: 0,epoch_time_id,distance_bus_routes,distance_subway,distance_railways,distance_bus_routes_window_60_mean,distance_bus_routes_window_60_std,distance_bus_routes_window_60_median,distance_subway_window_60_mean,distance_subway_window_60_std,distance_subway_window_60_median,...,distance_railways_window_300_median_shift_60_future,distance_bus_routes_window_600_mean_shift_60_future,distance_bus_routes_window_600_std_shift_60_future,distance_bus_routes_window_600_median_shift_60_future,distance_subway_window_600_mean_shift_60_future,distance_subway_window_600_std_shift_60_future,distance_subway_window_600_median_shift_60_future,distance_railways_window_600_mean_shift_60_future,distance_railways_window_600_std_shift_60_future,distance_railways_window_600_median_shift_60_future
99667,1492593287000,0.000121,0.589640,0.002764,0.001094,0.000817,0.001084,0.591516,0.003041,0.592562,...,,,,,,,,,,
99668,1492593288000,0.000121,0.589640,0.002764,0.001094,0.000817,0.001084,0.591516,0.003041,0.592562,...,,,,,,,,,,
99669,1492593289000,0.000121,0.589640,0.002764,0.001094,0.000817,0.001084,0.591516,0.003041,0.592562,...,,,,,,,,,,
99670,1492593290000,0.000121,0.589640,0.002764,0.001094,0.000817,0.001084,0.591516,0.003041,0.592562,...,,,,,,,,,,
99671,1492593291000,0.000121,0.589640,0.002764,0.001094,0.000817,0.001084,0.591516,0.003041,0.592562,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984138,1499249253000,0.001084,0.592562,0.003616,0.001124,0.000223,0.001084,0.592663,0.000398,0.592562,...,,,,,,,,,,
984139,1499249316000,0.001084,0.592562,0.003616,0.001124,0.000223,0.001084,0.592663,0.000398,0.592562,...,,,,,,,,,,
984140,1499268291000,0.000776,0.594374,0.006300,0.001124,0.000223,0.001084,0.592663,0.000398,0.592562,...,,,,,,,,,,
984141,1499268352000,0.001084,0.592562,0.003616,0.001124,0.000223,0.001084,0.592663,0.000398,0.592562,...,,,,,,,,,,


100%|██████████| 2423/2423 [00:05<00:00, 467.68it/s]
100%|██████████| 2423/2423 [00:01<00:00, 1673.78it/s]
100%|██████████| 2423/2423 [00:01<00:00, 2178.80it/s]
100%|██████████| 708/708 [00:01<00:00, 463.01it/s]
100%|██████████| 708/708 [00:00<00:00, 2470.83it/s]
100%|██████████| 708/708 [00:00<00:00, 2139.15it/s]
