Le but de ce fichier est d'évaluer notre modèle sur les données de test.

In [1]:
import pandas as pd
import numpy as np
import io
import os
import csv
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns
from matplotlib.ticker import  MultipleLocator, FormatStrFormatter
import re
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import models, transforms, datasets
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm

from sklearn.cluster import estimate_bandwidth, MeanShift

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

Using gpu: True 


# Data loading and conversion

In [2]:
df_test=pd.read_csv('test_avant_tenseur.csv')
df_test=df_test.drop(df_test.columns[0],axis=1)
df_test.dropna(axis=0, inplace=True)
df_test.head()

Unnamed: 0,ORIGIN_CALL,ORIGIN_STAND,TAXI_ID,WEEK_OF_YEAR,DAY_OF_WEEK,QUARTER_HOUR,START_1_LON,START_1_LAT,START_2_LON,START_2_LAT,...,END_2_LON,END_2_LAT,END_3_LON,END_3_LAT,END_4_LON,END_4_LAT,END_5_LON,END_5_LAT,LABEL_LON,LABEL_LAT
0,57105.0,14,355,32,3,71,-8.585676,41.148522,-8.585712,41.148639,...,-8.585982,41.148963,-8.586396,41.148954,-8.586072,41.14872,-8.586324,41.147847,-8.584884,41.146623
1,57105.0,56,91,32,3,71,-8.610876,41.14557,-8.610858,41.145579,...,-8.607357,41.149962,-8.606817,41.150979,-8.606358,41.151915,-8.605719,41.152788,-8.601894,41.163597
2,57105.0,14,252,32,3,71,-8.585739,41.148558,-8.58573,41.148828,...,-8.582346,41.158773,-8.58213,41.159295,-8.5815,41.160816,-8.580978,41.162715,-8.574903,41.167719
3,57105.0,52,322,32,3,71,-8.613963,41.141169,-8.614125,41.141124,...,-8.614125,41.141124,-8.615088,41.140926,-8.615277,41.140818,-8.615259,41.140809,-8.614638,41.14098
4,57105.0,17,406,32,3,71,-8.619903,41.148036,-8.619894,41.148036,...,-8.619903,41.148036,-8.619903,41.148036,-8.619903,41.148036,-8.619894,41.148036,-8.619894,41.148036


In [3]:
df_try=pd.read_csv('train_avant_tenseur.csv')
df_try=df_try.drop(df_try.columns[0],axis=1)
df_try.dropna(axis=0, inplace=True)
df_try.head()

Unnamed: 0,ORIGIN_CALL,ORIGIN_STAND,TAXI_ID,WEEK_OF_YEAR,DAY_OF_WEEK,QUARTER_HOUR,START_1_LON,START_1_LAT,START_2_LON,START_2_LAT,...,END_2_LON,END_2_LAT,END_3_LON,END_3_LAT,END_4_LON,END_4_LAT,END_5_LON,END_5_LAT,LABEL_LON,LABEL_LAT
0,57105,63,380,26,0,0,-8.618643,41.141412,-8.618499,41.141376,...,-8.631738,41.148225,-8.629938,41.150385,-8.62911,41.151213,-8.629128,41.15124,-8.630838,41.154489
1,57105,6,384,26,0,0,-8.639847,41.159826,-8.640351,41.159871,...,-8.660178,41.163192,-8.663112,41.163687,-8.666235,41.1642,-8.669169,41.164704,-8.66574,41.170671
2,57105,63,217,26,0,0,-8.612964,41.140359,-8.613378,41.14035,...,-8.6499,41.154264,-8.599383,41.141736,-8.59653,41.140566,-8.65008,41.154291,-8.61597,41.14053
3,57105,63,343,26,0,0,-8.574678,41.151951,-8.574705,41.151942,...,-8.59869,41.146119,-8.598816,41.146101,-8.600193,41.146155,-8.601057,41.146101,-8.607996,41.142915
4,57105,63,230,26,0,0,-8.645994,41.18049,-8.645949,41.180517,...,-8.676918,41.171841,-8.680032,41.171949,-8.682615,41.173191,-8.685441,41.173776,-8.687268,41.178087


In [4]:
train_labels = np.column_stack([df_try['LABEL_LON'], df_try['LABEL_LAT'] ])
train_tensor_label = torch.tensor(df_try[['LABEL_LON','LABEL_LAT']].values.astype('float64'))
train_tensor_client = torch.tensor(df_try[['ORIGIN_CALL']].values.astype('int64'))
train_tensor_stand = torch.tensor(df_try[['ORIGIN_STAND']].values.astype('int64'))
train_tensor_taxi = torch.tensor(df_try[['TAXI_ID']].values.astype('int64'))
train_tensor_week = torch.tensor(df_try[['WEEK_OF_YEAR']].values.astype('int64'))
train_tensor_day = torch.tensor(df_try[['DAY_OF_WEEK']].values.astype('int64'))
train_tensor_quarter = torch.tensor(df_try[['QUARTER_HOUR']].values.astype('int64'))
df_try_2=df_try.drop(['LABEL_LON','LABEL_LAT','QUARTER_HOUR','DAY_OF_WEEK','WEEK_OF_YEAR','TAXI_ID','ORIGIN_STAND','ORIGIN_CALL'],axis=1)
train_tensor_points = torch.tensor(df_try_2.values.astype('float32'))

In [5]:
test_labels = np.column_stack([df_test['LABEL_LON'], df_test['LABEL_LAT'] ])
test_tensor_label = torch.tensor(df_test[['LABEL_LON','LABEL_LAT']].values.astype('float64'))
test_tensor_client = torch.tensor(df_test[['ORIGIN_CALL']].values.astype('int64'))
test_tensor_stand = torch.tensor(df_test[['ORIGIN_STAND']].values.astype('int64'))
test_tensor_taxi = torch.tensor(df_test[['TAXI_ID']].values.astype('int64'))
test_tensor_week = torch.tensor(df_test[['WEEK_OF_YEAR']].values.astype('int64'))
test_tensor_day = torch.tensor(df_test[['DAY_OF_WEEK']].values.astype('int64'))
test_tensor_quarter = torch.tensor(df_test[['QUARTER_HOUR']].values.astype('int64'))
df_test_2=df_test.drop(['LABEL_LON','LABEL_LAT','QUARTER_HOUR','DAY_OF_WEEK','WEEK_OF_YEAR','TAXI_ID','ORIGIN_STAND','ORIGIN_CALL'],axis=1)
test_tensor_points = torch.tensor(df_test_2.values.astype('float32'))

# Neural network class

In [7]:
class Network(nn.Module):
    def __init__(self, clusters):
        super(Network, self).__init__()
        
        self.clusters = clusters
        self.fc1= nn.Linear(80, 500)
        self.fc2= nn.Linear(500, len(clusters))

        self.emb_client = nn.Embedding(57106, 10)
        self.emb_stand = nn.Embedding(64, 10)
        self.emb_taxi = nn.Embedding(448, 10)
        self.emb_week = nn.Embedding(52, 10)
        self.emb_day = nn.Embedding(7, 10)
        self.emb_quarter = nn.Embedding(96, 10)
        
        self.relu = nn.ReLU(inplace=True)
        self.bn = nn.BatchNorm1d(500)
        self.soft = nn.Softmax()

    def forward(self, traj, client, stand, taxi, week, day, quarter):
        # Embedding of the metadata
        client = self.emb_client(client)
        stand = self.emb_stand(stand)
        taxi = self.emb_taxi(taxi)
        week = self.emb_week(week)
        day = self.emb_day(day)
        quarter = self.emb_quarter(quarter)
        
        # Concatenation of the embedding output and of the trajectories
        batch = torch.cat((traj, client.view(-1,10), stand.view(-1,10), taxi.view(-1,10),
                           week.view(-1,10), day.view(-1,10), quarter.view(-1,10)), dim=1)

        # Computation of the porbabilities of each centroid
        output = self.relu(self.fc1(batch))
        output = self.bn(output)
        output = self.fc2(output)
        output = self.soft(output)

        # Computation of the weighted average of the centroids 
        output = torch.mm(output, clusters)

        return output

# Loss function

In [6]:
class equi_rec_loss(nn.Module):

    def __init__(self):
        super(equi_rec_loss, self).__init__()
        self.R = 6371
        self.deg2rad = 3.14/180.

    def forward(self, pos1, pos2):  
        long1 = pos1[:,0] * self.deg2rad
        lat1 = pos1[:,1] * self.deg2rad
        long2 = pos2[:,0] * self.deg2rad
        lat2 = pos2[:,1] * self.deg2rad
            
        x = (long2-long1) * torch.cos((lat1+lat2)/2)
        y = (lat2-lat1)

        loss = torch.mean(torch.sqrt(x**2 + y**2), dim=0)  * self.R
        return loss

# Clustering function

In [8]:
def get_clusters(coords):
    """
    Estimate clusters for the given list of coordinates.
    """
    # First, grossly reduce the spatial dataset by rounding up the coordinates to the 4th decimal
    # (i.e. 11 meters. See: https://en.wikipedia.org/wiki/Decimal_degrees)
    clusters = pd.DataFrame({
        'approx_longitudes': coords[:,0].round(4),
        'approx_latitudes': coords[:,1].round(4)
    })
    clusters = clusters.drop_duplicates(['approx_longitudes', 'approx_latitudes'])
    clusters = clusters.as_matrix()

    ms = MeanShift(bandwidth=0.01, bin_seeding=True)
    ms.fit(clusters)
    return torch.Tensor(ms.cluster_centers_).to(device)

In [11]:
clusters = get_clusters(train_labels)

  if sys.path[0] == '':


# Data loader

In [10]:
batch_size = 20

In [12]:
# Create dataloader
dataloader_test = DataLoader(TensorDataset(
    test_tensor_points,test_tensor_client,test_tensor_stand,test_tensor_taxi,
    test_tensor_week,test_tensor_day, test_tensor_quarter,test_tensor_label),
     batch_size=batch_size, shuffle=True)

# Model evaluation

In [13]:
criterion = equi_rec_loss()

In [14]:
cd model_cluster

/home/jupyter/model_cluster


In [None]:
l=[]
for i in range(0,136,2):
    model = Network(clusters)# Initialize model
    model=model.to(device)
    model.load_state_dict(torch.load('Model_avec_clusters_'+str(i)+'.pth'))  # Load pretrained parameters
    model.eval()  # Set to eval mode to change behavior of Dropout, BatchNorm

    count=0
    running_loss = 0.0
    for traj, client, stand, taxi, week, day, quarter, targets in dataloader_test:
        traj = traj.to(device)
        targets = targets.to(device)
        client = client.to(device)
        stand = stand.to(device)
        taxi = taxi.to(device)
        week = week.to(device)
        day = day.to(device)
        quarter = quarter.to(device)
        outputs = model(traj, client, stand, taxi, week, day, quarter) # Get predicted class if multi-class classification
        loss = criterion(outputs.double(),targets)
        count += 1

        running_loss += loss.data.item()
    l.append(running_loss/count)