In [1]:
import geolib.geohash
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import pandas as pd
from tqdm.auto import tqdm
from cri98tj.partitioners.Geohash_partitioner import Geohash_partitioner
from cri98tj.normalizers.FirstPoint_normalizer import FirstPoint_normalizer
from cri98tj.selectors.RandomInformationGain_selector import RandomInformationGain_selector
from cri98tj.distancers.Euclidean_distancer import Euclidean_distancer
from sklearn.model_selection import train_test_split
from cri98tj.distancers.Euclidean_distancer import euclideanBestFitting

In [2]:
df_original = pd.read_csv('../examples/Vehicles Dataset/data/vehicles_preapred.zip').sort_values(by=["tid", "t"])# precision=5, 50 movelet, DTW
df_original["c1"] = df_original.c1/100000
df_original["c2"] = df_original.c2/100000

df = df_original[["tid", "class", "c1", "c2"]].copy()

df.head()

Unnamed: 0,tid,class,c1,c2
0,30901,B,42.07716,4.738411
1,30901,B,42.077246,4.739088
2,30901,B,42.077259,4.739096
3,30901,B,42.077369,4.739158
4,30901,B,42.077635,4.739343


In [3]:
tid_train, tid_test, _, _ = train_test_split(df.groupby(by=["tid"]).max().reset_index()["tid"],
                                                        df.groupby(by=["tid"]).max().reset_index()["class"],
                                                        test_size=.3,
                                                        stratify=df.groupby(by=["tid"]).max().reset_index()["class"],
                                                        random_state=3)

spatioTemporalCols = ["c1", "c2"]
n_jobs = 24
verbose = False

In [4]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

def compute_measures(test, pred):
    return (accuracy_score(test, pred), precision_score(test, pred, average="micro"), f1_score(test, pred, average="micro"), recall_score(test, pred, average="micro"))


In [6]:
%%time

from datetime import datetime
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
from cri98tj.distancers.RotatingEuclidean_distancer import RotatingEuclidean_distancer, \
    rotatingEuclideanBestFitting

precision = 6
iter = 1

res = []
n_mov_rig = []
time = []
for i in tqdm(range(2, 20)):
    df = df_original[["tid", "class", "c1", "c2"]].copy()
    
    res.append((.0, .0, .0, .0, .0))
    n_mov_rig.append(0)
    time.append(.0)

    for _ in range(iter):
        normalizer = FirstPoint_normalizer(spatioTemporalColumns=spatioTemporalCols, fillna=None, verbose=verbose)
        distancer = RotatingEuclidean_distancer(normalizer=normalizer, spatioTemporalColumns=spatioTemporalCols, n_jobs=n_jobs, verbose=verbose)
        partitioner = Geohash_partitioner(precision=precision, spatioTemporalColumns=spatioTemporalCols, verbose=verbose)

        start = datetime.now()
        part = partitioner.fit_transform(df[df.tid.isin(tid_train)].values)
        selector = RandomInformationGain_selector(top_k=int(1.4**i), bestFittingMeasure=rotatingEuclideanBestFitting, movelets_per_class=300, trajectories_for_orderline=50, n_jobs=n_jobs, spatioTemporalColumns=spatioTemporalCols, normalizer=normalizer, verbose=verbose)
        shapelets = selector.fit_transform(part)
        _, dist_np = distancer.fit_transform((df.values, shapelets))
        stop = start - datetime.now()

        n_mov_rig[i-2] += (dist_np.shape[1])

        clf = RandomForestClassifier(max_depth=3, random_state=3, n_jobs=n_jobs, n_estimators=5000)

        dist_np_df = pd.DataFrame(dist_np)
        X = dist_np_df.drop(columns=[0]).values
        y = dist_np_df[0].values

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, stratify=y, random_state=3)

        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        res[i-2] = tuple(a+b for a, b in zip(compute_measures(y_test, y_pred), res[i-2]))
        time[i-2] += stop.total_seconds()*1000 #millisecondi

    res[i-2] = list(map(lambda x: x/iter, res[i-2]))
    n_mov_rig[i-2] /= iter
    time[i-2] /= iter

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/79 [00:00<?, ?it/s]

  0%|          | 0/111 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/217 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

Wall time: 2h 55min 58s


In [9]:
df_res_rig = pd.DataFrame(res, columns=["acc", "prec", "f1", "recall"])

df_res_rig["t"] = time

df_res_rig.t *= -1

df_res_rig["n"] = n_mov_rig

df_res_rig.to_csv(f"Test n_movelet rotatingED vehicle RIG {precision}.csv", index=None)

In [8]:
df_res_rig

Unnamed: 0,acc,prec,f1,recall,t,n
0,0.878261,0.878261,0.878261,0.878261,255908.601,2.0
1,0.982609,0.982609,0.982609,0.982609,257324.497,3.0
2,0.965217,0.965217,0.965217,0.965217,244293.204,4.0
3,0.973913,0.973913,0.973913,0.973913,245602.987,6.0
4,0.982609,0.982609,0.982609,0.982609,247480.286,8.0
5,0.973913,0.973913,0.973913,0.973913,247325.89,11.0
6,0.973913,0.973913,0.973913,0.973913,256995.468,15.0
7,0.965217,0.965217,0.965217,0.965217,271543.886,21.0
8,0.965217,0.965217,0.965217,0.965217,299776.57,29.0
9,0.965217,0.965217,0.965217,0.965217,362103.874,41.0


In [10]:
df_res_rig = pd.read_csv(f"Test n_movelet rotatingED vehicle RIG {precision}.csv")
df_res_rig

Unnamed: 0,acc,prec,f1,recall,t,n
0,0.878261,0.878261,0.878261,0.878261,255908.601,2.0
1,0.982609,0.982609,0.982609,0.982609,257324.497,3.0
2,0.965217,0.965217,0.965217,0.965217,244293.204,4.0
3,0.973913,0.973913,0.973913,0.973913,245602.987,6.0
4,0.982609,0.982609,0.982609,0.982609,247480.286,8.0
5,0.973913,0.973913,0.973913,0.973913,247325.89,11.0
6,0.973913,0.973913,0.973913,0.973913,256995.468,15.0
7,0.965217,0.965217,0.965217,0.965217,271543.886,21.0
8,0.965217,0.965217,0.965217,0.965217,299776.57,29.0
9,0.965217,0.965217,0.965217,0.965217,362103.874,41.0


In [12]:
%%time
from cri98tj.selectors.Random_selector import Random_selector
from sklearn.metrics import accuracy_score
from datetime import datetime

res = []
n_mov_r = []
time = []
i=1
for n in tqdm(df_res_rig.n.unique()):
    df = df_original[["tid", "class", "c1", "c2"]].copy()
    
    res.append((.0, .0, .0, .0, .0))
    n_mov_r.append(0)
    time.append(.0)
    
    c = 0
    
    while c < iter:
        try:#for _ in tqdm(range(5)):
            normalizer = FirstPoint_normalizer(spatioTemporalColumns=spatioTemporalCols, fillna=None, verbose=verbose)
            distancer = RotatingEuclidean_distancer(normalizer=normalizer, spatioTemporalColumns=spatioTemporalCols, n_jobs=n_jobs, verbose=verbose)
            partitioner = Geohash_partitioner(precision=precision, spatioTemporalColumns=spatioTemporalCols, verbose=verbose)

            start = datetime.now()
            part = partitioner.fit_transform(df[df.tid.isin(tid_train)].values)
            selector = Random_selector(movelets_per_class=max(1, n//2), n_jobs=n_jobs, spatioTemporalColumns=spatioTemporalCols, normalizer=normalizer, verbose=verbose)
            shapelets = selector.fit_transform(part)

            _, dist_np = distancer.fit_transform((df.values, shapelets))
            stop = start - datetime.now()

            n_mov_r[i-1] += (dist_np.shape[1])

            clf = RandomForestClassifier(max_depth=3, random_state=3, n_jobs=n_jobs, n_estimators=5000)

            dist_np_df = pd.DataFrame(dist_np)
            X = dist_np_df.drop(columns=[0]).values
            y = dist_np_df[0].values

            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, stratify=y, random_state=3)

            clf.fit(X_train, y_train)

            y_pred = clf.predict(X_test)

            res[i-1] = tuple(a+b for a, b in zip(compute_measures(y_test, y_pred), res[i-1]))
            time[i-1] += stop.total_seconds()*1000 #millisecondi
            
            c += 1
        except:
            print("failed")

    res[i-1] = list(map(lambda x: x/iter, res[i-1]))
    n_mov_r[i-1] /= iter
    time[i-1] /= iter
    
    i +=1

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/112 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/218 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

Wall time: 1h 23min 29s


In [13]:
pd.DataFrame(res, columns=["acc", "prec", "f1", "recall"])

Unnamed: 0,acc,prec,f1,recall
0,0.721739,0.721739,0.721739,0.721739
1,0.721739,0.721739,0.721739,0.721739
2,0.704348,0.704348,0.704348,0.704348
3,0.73913,0.73913,0.73913,0.73913
4,0.808696,0.808696,0.808696,0.808696
5,0.8,0.8,0.8,0.8
6,0.8,0.8,0.8,0.8
7,0.782609,0.782609,0.782609,0.782609
8,0.782609,0.782609,0.782609,0.782609
9,0.843478,0.843478,0.843478,0.843478


In [14]:
df_res_r = pd.DataFrame(res, columns=["acc", "prec", "f1", "recall"])

df_res_r["t"] = time

df_res_r.t *= -1

df_res_r["n"] = n_mov_r

df_res_r.to_csv(f"Test n_movelet rotatingED vehicle R {precision}.csv", index=None)

In [15]:
df_res_r = pd.read_csv(f"Test n_movelet rotatingED vehicle R {precision}.csv")

In [16]:
df_res_r

Unnamed: 0,acc,prec,f1,recall,t,n
0,0.721739,0.721739,0.721739,0.721739,65600.558,3.0
1,0.721739,0.721739,0.721739,0.721739,64190.55,3.0
2,0.704348,0.704348,0.704348,0.704348,67683.713,5.0
3,0.73913,0.73913,0.73913,0.73913,78347.301,7.0
4,0.808696,0.808696,0.808696,0.808696,76559.301,9.0
5,0.8,0.8,0.8,0.8,79323.955,11.0
6,0.8,0.8,0.8,0.8,81138.59,15.0
7,0.782609,0.782609,0.782609,0.782609,101113.634,21.0
8,0.782609,0.782609,0.782609,0.782609,152110.328,29.0
9,0.843478,0.843478,0.843478,0.843478,194692.587,41.0


In [None]:
%%time
from cri98tj.selectors.Random_selector import Random_selector
from sklearn.metrics import accuracy_score
from cri98tj.distancers.InterpolatedRootDistance_distancer import InterpolatedRootDistance_distancer, \
    InterpolatedRootDistanceBestFitting
from datetime import datetime
from cri98tj.partitioners.Voronoi_partitioner import Voronoi_partitioner

res = []
n_mov_r = []
time = []
i=1
for n in tqdm(df_res_rig.n.unique()):
    df = df_original[["tid", "class", "c1", "c2", "t"]].copy()
    
    res.append((.0, .0, .0, .0, .0))
    n_mov_r.append(0)
    time.append(.0)
    
    
    for _ in range(3): 
        normalizer = FirstPoint_normalizer(spatioTemporalColumns=spatioTemporalCols, fillna=None, verbose=verbose)
        distancer = InterpolatedRootDistance_distancer(normalizer=normalizer, spatioTemporalColumns=spatioTemporalCols, n_jobs=n_jobs, verbose=verbose)
        partitioner = Voronoi_partitioner(spatioTemporalColumns=spatioTemporalCols, radius=600, stop_distance=10, stop_seconds=600)

        start = datetime.now()
        part = partitioner.fit_transform(df[df.tid.isin(tid_train)].values)
        selector = RandomInformationGain_selector(top_k=int(1.4**i), 
                                                  bestFittingMeasure=InterpolatedRootDistanceBestFitting, 
                                                  movelets_per_class=300, trajectories_for_orderline=100, 
                                                  n_jobs=n_jobs, spatioTemporalColumns=spatioTemporalCols, 
                                                  normalizer=normalizer, verbose=verbose)
        shapelets = selector.fit_transform(part)

        _, dist_np = distancer.fit_transform((df.values, shapelets))
        stop = start - datetime.now()

        n_mov_r[i-1] += (dist_np.shape[1])

        clf = RandomForestClassifier(max_depth=3, random_state=3, n_jobs=n_jobs, n_estimators=5000)

        dist_np_df = pd.DataFrame(dist_np)
        X = dist_np_df.drop(columns=[0]).values
        y = dist_np_df[0].values

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, stratify=y, random_state=3)

        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        res[i-1] = tuple(a+b for a, b in zip(compute_measures(y_test, y_pred), res[i-1]))
        time[i-1] += stop.total_seconds()*1000 #millisecondi


    res[i-1] = list(map(lambda x: x/3, res[i-1]))
    n_mov_r[i-1] /= 3
    time[i-1] /= 3
    
    i +=1

In [None]:
df_res_rig_v = pd.DataFrame(res, columns=["acc", "prec", "f1", "recall"])

df_res_rig_v["t"] = time

df_res_rig_v.t *= -1

df_res_rig_v["n"] = n_mov_r

df_res_rig_v.to_csv(f"Test n_movelet vehicle RIG voronoi.csv", index=None)

In [None]:
df_res_rig_v