## 1. Import libs & listing required data

In [1]:
import os
import pickle
import numpy as np
import pandas as pd

## 2. Set up GLOBAL VARS

In [6]:
SPEED_REGRESSION = os.getcwd() + '/MachineLearningModels/reg_rehab_speed_1.sav'
PREDICTION_MODEL = os.getcwd() + '/MachineLearningModels/logreg_1.sav'
OUTPUT_FOLDER = "/Data/clustering_outputs/"
FOLDER = '/Data/OctoberGPS/'


FILES = os.listdir(FOLDER)
FILES = list(set(FILES) - set(os.listdir(OUTPUT_FOLDER)))
SAMPLE_FILES = list(map(lambda x,y: x+y, [FOLDER]*len(FILES), FILES))


## 3. Set up clustering pipeline

In [4]:
from Preprocessing import preprocess
from Clustering import clusterize


N = len(SAMPLE_FILES)
i = list(range(N))


def do_clustering(SAMPLE_FILE, i, N):
    try:
        df = pd.read_csv(SAMPLE_FILE, parse_dates=['time'])
        df = df.loc[~df.unixtime.duplicated(keep='last'), :].reset_index(drop=True).copy()

        fname = SAMPLE_FILE.rsplit('/', 1)[1]

        args = {
            'filename': fname,
            'df': df.copy(),
            'speed_regr': pickle.load(open(SPEED_REGRESSION, 'rb')),
            'highway_detection_model': pickle.load(open(PREDICTION_MODEL, 'rb')),
            'on_cols_to_clust_pred': ['x_match', 'y_match', 'unixtime'],
            'min_samples': 20,
            'eps': 120
        }

        outputs = clusterize(preprocess(args))
        outputs['df'].to_csv(OUTPUT_FOLDER+fname, index=False)

        del outputs
        print(f"Finish processing for: {fname} | {i} | {int(i*100/N)}%")
    except Exception as exc:
        print(f"{fname}: {exc}")
        

In [5]:
from multiprocessing import Pool

with Pool(5) as pool:
    pool.starmap(do_clustering, zip(SAMPLE_FILES, i, [N]*N))

Finish processing for: XE-5609B_2020-10-29.csv | 72 | 15%
Finish processing for: XE-5665M_2020-10-22.csv | 48 | 10%
Finish processing for: XE-5629T_2020-10-3.csv | 0 | 0%
Finish processing for: XE-5620S_2020-10-26.csv | 24 | 5%
Finish processing for: XE-5748T_2020-10-30.csv | 96 | 20%
Finish processing for: XE-5559L_2020-10-1.csv | 97 | 20%
Finish processing for: XE-5598Z_2020-10-17.csv | 73 | 15%
Finish processing for: XE-5577J_2020-10-7.csv | 49 | 10%
Finish processing for: XE-5628X_2020-10-25.csv | 25 | 5%
Finish processing for: XE-5632H_2020-10-9.csv | 1 | 0%


KeyboardInterrupt: 