# Code preparation

In [1]:
import json
import numpy as np
from datetime import datetime, timedelta
import torch
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt

In [2]:
# Parameterize time (in seconds) onto the unit circle 
def time_to_circle(time):
    seconds_per_day = 60*60*24
    cos_time = np.cos(2*np.pi*time/seconds_per_day)
    sin_time = np.sin(2*np.pi*time/seconds_per_day)
    return cos_time, sin_time

In [3]:
# Converting Unix time onto the unit circle (time of day) and the week day
def time_stamp(times):
    datetimes = np.array([datetime.fromtimestamp(time[1]) for time in times])
    week_day = [time.weekday() for time in datetimes]
    times = np.array([time.second + time.minute*60 + time.hour*60*60 for time in datetimes])
    time_circle = time_to_circle(times)
    return time_circle, week_day
    

In [4]:
# Creates list of zeros and ones where ones means that it is the first time the location is seen.
def explore(path):
    seen = set()
    explore_path = []
    for place in path:
        if place in seen:
            explore_path.append(0)
        else:
            explore_path.append(1)
            seen.add(place)
    return explore_path

In [16]:
# Preprocess all the data in saved files such that 1 file contains all the information for one person.
# A file for test, train and all of the data is created.
# In total it created a file containing the matrix: [path,time_cos,time_sin,week_day,explore_path,person_ID]
# It also relables the location id in path such that the amount if descending the higher the id.
def prepare_data(peps):
    tmp_max = 0
    for pep in peps:
        try:
            with open(f"Data/{pep}/final_series.json") as json_file:
                data_path = json.load(json_file)
            with open(f"Data/{pep}/timestamps.json") as json_file:
                time = json.load(json_file)

            [time_cos,time_sin], week_day = time_stamp(time)

            data_path = [path[-1] for path in data_path]

            

            explore_path = explore(data_path)
            person_list = [pep for _ in range(len(explore_path))]
            
            data = np.array([data_path,time_cos,time_sin,week_day,explore_path,person_list])
            
            frac = int(data.shape[1]*0.90)
            data_train = data[:,:frac].copy()
            data_test = data[:,frac:].copy()
            
            np.save(f"Data/{pep}/prepared_data_train.npy",data_train)
            np.save(f"Data/{pep}/prepared_data_test.npy",data_test)
            np.save(f"Data/{pep}/prepared_data_all.npy",data)
            
            
            counter = Counter(data_train[0,:])
            counter_all = Counter(data[0,:])
            ranks = {rank[0]:i for i,rank in enumerate(counter.most_common())}
            ranks_all = {rank[0]:i for i,rank in enumerate(counter_all.most_common())}
            data_train_relabeled = [ranks[loc] for loc in data_train[0,:]]
            data_test_relabeled = [ranks[loc] if loc in ranks else 900 for loc in data_test[0,:] ]
            data_all_relabeled = [ranks_all[loc] for loc in data[0,:]]
            data_train[0,:] = data_train_relabeled
            data_test[0,:] = data_test_relabeled
            data[0,:] = data_all_relabeled
            np.save(f"Data/{pep}/prepared_data_train_relabeled.npy",data_train)
            np.save(f"Data/{pep}/prepared_data_test_relabeled.npy",data_test)
            np.save(f"Data/{pep}/prepared_data_all_relabeled.npy",data)
            df_ranks = pd.DataFrame(list(ranks.keys()),list(ranks.values()))
            df_ranks.to_pickle(f"Data/{pep}/label_dict.pkl")
            
        except FileNotFoundError:
            print(f"File {pep} not found")
            continue

In [6]:
# Saves some meta data of all users
def prepare_user_inf(peps):
    df = pd.DataFrame(columns = ["length","cluster"])
    for pep in peps:
        try:
            with open(f"Data/{pep}/final_series.json") as json_file:
                data_path = json.load(json_file)
            with open(f"Data/labels_infomap_corr03.json") as json_file:
                data_cluster = json.load(json_file)
                
            df.loc[pep] = [len(data_path), data_cluster[str(pep)]]
        except FileNotFoundError:
            print(f"File {pep} not found")
    #return df
    df.to_pickle("data_inf.pkl")

In [80]:
prepare_user_inf(range(856))

File 78 not found
File 417 not found
File 710 not found
File 727 not found
File 732 not found
File 782 not found
File 791 not found
File 795 not found
File 821 not found
File 841 not found
File 846 not found
File 852 not found
File 853 not found
File 854 not found
File 855 not found


In [17]:
prepare_data(range(856))

File 78 not found
File 417 not found
File 710 not found
File 727 not found
File 732 not found
File 782 not found
File 791 not found
File 795 not found
File 821 not found
File 841 not found
File 846 not found
File 852 not found
File 853 not found
File 854 not found
File 855 not found


In [94]:
def load_loc(peps):
    locs = np.array([[],[]])
    for pep in peps:
        #import pdb; pdb.set_trace()  
        try:
            with open(f"Data_m_loc/{pep}/stop_coords.json") as json_file:
                data_loc = np.array(json.load(json_file))[:20].transpose()
            #import pdb; pdb.set_trace()
            locs = np.concatenate((locs,data_loc),axis=1)
        except FileNotFoundError:
            print(f"File {pep} not found")
            continue  
        dict_lat_lon = {'lat':locs[0],'lon':locs[1]}
    return dict_lat_lon
