https://www.kaggle.com/datasets/naiku007/gps-data

In [7]:
import numpy as np
import pandas as pd
import glob
import os.path
import datetime
import os

def read_plt(plt_file):
    points = pd.read_csv(plt_file, skiprows=6, header=None,
                         parse_dates=[[5, 6]], infer_datetime_format=True)

    # for clarity rename columns
    points.rename(inplace=True, columns={'5_6': 'time', 0: 'lat', 1: 'lon', 3: 'alt'})

    # remove unused columns
    points.drop(inplace=True, columns=[2, 4])

    return points

mode_names = ['walk', 'bike', 'bus', 'car', 'subway','train', 'airplane', 'boat', 'run', 'motorcycle', 'taxi']
mode_ids = {s : i + 1 for i, s in enumerate(mode_names)}

def read_labels(labels_file):
    labels = pd.read_csv(labels_file, skiprows=1, header=None,
                         parse_dates=[[0, 1], [2, 3]],
                         infer_datetime_format=True, delim_whitespace=True)

    # for clarity rename columns
    labels.columns = ['start_time', 'end_time', 'label']

    # replace 'label' column with integer encoding
    labels['label'] = [mode_ids[i] for i in labels['label']]

    return labels

def apply_labels(points, labels):
    indices = labels['start_time'].searchsorted(points['time'], side='right') - 1
    no_label = (indices < 0) | (points['time'].values >= labels['end_time'].iloc[indices].values)
    points['label'] = labels['label'].iloc[indices].values
    points['label'][no_label] = 0

def read_user(user_folder):
    labels = None

    plt_files = glob.glob(os.path.join(user_folder, 'Trajectory', '*.plt'))
    df = pd.concat([read_plt(f) for f in plt_files])

    labels_file = os.path.join(user_folder, 'labels.txt')
    if os.path.exists(labels_file):
        labels = read_labels(labels_file)
        apply_labels(df, labels)
    else:
        df['label'] = np.NAN

    return df

def read_all_users(folder):
    subfolders = os.listdir(folder)
    dfs = []
    for i, sf in enumerate(subfolders):
        print('[%d/%d] processing user %s' % (i + 1, len(subfolders), sf))
        df = read_user(os.path.join(folder,sf))
        df['user'] = int(sf)
        dfs.append(df)
    return pd.concat(dfs)

In [None]:
df = read_all_users('data/raw/')

[1/182] processing user 059
[2/182] processing user 167


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[3/182] processing user 166
[4/182] processing user 053
[5/182] processing user 134
[6/182] processing user 018
[7/182] processing user 131
[8/182] processing user 054
[9/182] processing user 027
[10/182] processing user 029
[11/182] processing user 164
[12/182] processing user 170
[13/182] processing user 140


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[14/182] processing user 039
[15/182] processing user 049
[16/182] processing user 133
[17/182] processing user 048
[18/182] processing user 116
[19/182] processing user 080
[20/182] processing user 032
[21/182] processing user 096


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[22/182] processing user 075
[23/182] processing user 128


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[24/182] processing user 111
[25/182] processing user 079
[26/182] processing user 085


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[27/182] processing user 070
[28/182] processing user 156
[29/182] processing user 122
[30/182] processing user 123
[31/182] processing user 130
[32/182] processing user 136
[33/182] processing user 159
[34/182] processing user 169


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[35/182] processing user 036
[36/182] processing user 040
[37/182] processing user 119
[38/182] processing user 108
[39/182] processing user 088
[40/182] processing user 157


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[41/182] processing user 038
[42/182] processing user 015
[43/182] processing user 150
[44/182] processing user 143
[45/182] processing user 121
[46/182] processing user 072
[47/182] processing user 154
[48/182] processing user 034


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[49/182] processing user 086
[50/182] processing user 042


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[51/182] processing user 028
[52/182] processing user 158
[53/182] processing user 012
[54/182] processing user 145
[55/182] processing user 066
[56/182] processing user 160
[57/182] processing user 063
[58/182] processing user 060
[59/182] processing user 105


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[60/182] processing user 005
[61/182] processing user 006
[62/182] processing user 052


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[63/182] processing user 098
[64/182] processing user 087
[65/182] processing user 055
[66/182] processing user 165
[67/182] processing user 082


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[68/182] processing user 062


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[69/182] processing user 084


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[70/182] processing user 014
[71/182] processing user 177
[72/182] processing user 069
[73/182] processing user 120
[74/182] processing user 024


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[75/182] processing user 031
[76/182] processing user 068


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[77/182] processing user 061
[78/182] processing user 180
[79/182] processing user 037
[80/182] processing user 176
[81/182] processing user 078


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[82/182] processing user 129
[83/182] processing user 033
[84/182] processing user 057
[85/182] processing user 089


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[86/182] processing user 092


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[87/182] processing user 118
[88/182] processing user 000
[89/182] processing user 008
[90/182] processing user 135
[91/182] processing user 010


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[92/182] processing user 115


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[93/182] processing user 071
[94/182] processing user 109
[95/182] processing user 030
[96/182] processing user 025
[97/182] processing user 023
[98/182] processing user 056


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[99/182] processing user 047
[100/182] processing user 142
[101/182] processing user 104


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[102/182] processing user 132
[103/182] processing user 153


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[104/182] processing user 058
[105/182] processing user 083
[106/182] processing user 125


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[107/182] processing user 013
[108/182] processing user 141


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[109/182] processing user 021
[110/182] processing user 163


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[111/182] processing user 103
[112/182] processing user 126


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[113/182] processing user 091


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[114/182] processing user 127
[115/182] processing user 004
[116/182] processing user 161
[117/182] processing user 155
[118/182] processing user 144


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[119/182] processing user 097
[120/182] processing user 148
[121/182] processing user 043
[122/182] processing user 173
[123/182] processing user 035
[124/182] processing user 093
[125/182] processing user 019
[126/182] processing user 114
[127/182] processing user 146
[128/182] processing user 003


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[129/182] processing user 171
[130/182] processing user 046
[131/182] processing user 138
[132/182] processing user 099
[133/182] processing user 051


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[134/182] processing user 020


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[135/182] processing user 179


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[136/182] processing user 181
[137/182] processing user 095
[138/182] processing user 172
[139/182] processing user 124
[140/182] processing user 094
[141/182] processing user 026


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[142/182] processing user 174
[143/182] processing user 001


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[144/182] processing user 022
[145/182] processing user 178
[146/182] processing user 102
[147/182] processing user 017


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[148/182] processing user 139
[149/182] processing user 076
[150/182] processing user 112


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[151/182] processing user 073


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[152/182] processing user 045
[153/182] processing user 113
[154/182] processing user 011
[155/182] processing user 002
[156/182] processing user 149
[157/182] processing user 081
[158/182] processing user 107
[159/182] processing user 175
[160/182] processing user 100
[161/182] processing user 007


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[162/182] processing user 151
[163/182] processing user 064
[164/182] processing user 168


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


[165/182] processing user 077
[166/182] processing user 044
[167/182] processing user 110
[168/182] processing user 065


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['label'][no_label] = 0


In [4]:
df

Unnamed: 0,time,lat,lon,alt,label,user
0,2011-03-11 10:46:34,39.977715,116.330418,0.0,0,59
1,2011-03-11 10:46:39,39.977677,116.330452,0.0,0,59
2,2011-03-11 10:46:44,39.977657,116.330443,0.0,0,59
3,2011-03-11 10:46:47,39.977657,116.330443,0.0,0,59
4,2011-03-11 10:46:49,39.977657,116.330443,0.0,0,59
...,...,...,...,...,...,...
2468,2009-06-18 04:58:03,40.413687,116.466335,597.1,0,41
2469,2009-06-18 04:58:08,40.413783,116.466308,597.1,0,41
2470,2009-06-18 04:58:13,40.413887,116.466265,600.4,0,41
2471,2009-06-18 04:58:19,40.413960,116.466210,600.4,0,41


In [5]:
df.isna().sum()

time     0
lat      0
lon      0
alt      0
label    0
user     0
dtype: int64