In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import pickle

In [66]:
X_test = pd.read_csv("./data/splited/far/X_test.csv", header=None).values
y_test = pd.read_csv("./data/splited/far/y_test.csv", header=None).values

n_towers = 134

In [67]:
class TrajectoryDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
test_dataset = TrajectoryDataset(X_test, y_test)

test_loader = DataLoader(test_dataset, batch_size=128)

In [68]:
class TrajectoryModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TrajectoryModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 80),
            nn.ReLU(),
            nn.Linear(80, 100),
            nn.ReLU(),
            nn.Linear(100, 105),
            nn.ReLU(),
            nn.Linear(105, output_dim),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.network(x)

In [69]:
# Configuración para GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

Usando dispositivo: cuda


In [70]:
criterion = nn.KLDivLoss(reduction='batchmean')

In [71]:
# Cargar el modelo
model = TrajectoryModel(input_dim=X_test.shape[1], output_dim=n_towers).to(device)
model.load_state_dict(torch.load('best_model.pt'))
# model.load_state_dict(torch.load('saved_model/model_trajectory.pt'))

<All keys matched successfully>

In [72]:
model.eval()

# Inicializar listas para métricas
true_labels = []
predicted_labels = []
top10_predictions = [] 

test_loss = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # Predicción del modelo
        outputs = model(X_batch)
        loss = criterion(outputs.log(), y_batch)
        test_loss += loss.item()

        # Convertir predicciones a etiquetas (una decodificación one-hot para la etiqueta más probable)
        predicted_labels.extend(torch.argmax(outputs, dim=1).cpu().numpy())
        true_labels.extend(torch.argmax(y_batch, dim=1).cpu().numpy())

        topk_values, topk_indices = torch.topk(outputs, k=10, dim=1)
        top10_predictions.extend(topk_indices.cpu().numpy().tolist())

In [73]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calcula la distancia en kilómetros entre dos puntos geográficos usando la fórmula de Haversine.
    """
    R = 6371  # Radio de la Tierra en kilómetros
    lat1, lon1, lat2, lon2 = [float(coord) for coord in [lat1, lon1, lat2, lon2]]
    lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = R * c

    return distance


def geospatial_distance(ground_truth, predicted_values):
    geospatial_distance = 0

    zn_pos = pd.read_csv('./data/zones_centroids.csv')
    zone_coords = zn_pos.set_index('zone')[['latitude', 'longitude']].to_dict(orient='index')

    for i in range(len(ground_truth)):
        lat1, lon1 = zone_coords[ground_truth[i] + 1]['latitude'], zone_coords[ground_truth[i] + 1]['longitude']

        lat2, lon2 = zone_coords[predicted_values[i] + 1]['latitude'], zone_coords[predicted_values[i] + 1]['longitude']

        # Sumar distancia geoespacial
        geospatial_distance += haversine_distance(lat1, lon1, lat2, lon2)

    return geospatial_distance / len(ground_truth)  # Promedio de las distancias

def topk(true_labels, predicted_labels, k):
    topk_token = 0
    for i in range(len(true_labels)):
        topk_i = predicted_labels[i][:k]
        if true_labels[i] in topk_i:
            topk_token += 1
    topk_score = topk_token / len(true_labels)
    return topk_score

In [None]:
# Calcular métricas adicionales
from sklearn.metrics import classification_report, accuracy_score

test_loss /= len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

# Precisión
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy:.4f}")

for i in [3, 5, 10]:
    topk_score = topk(true_labels, top10_predictions, i)
    print(f"Top-{i} Accuracy: {topk_score:.4f}") 

geospatial_distance = geospatial_distance(true_labels, predicted_labels)
print(f"Geospatial Distance: {geospatial_distance:.4f} km")

# Reporte de clasificación
print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels))

Test Loss: 3.1795
Accuracy: 0.1882
Top-3 Accuracy: 0.3911
Top-5 Accuracy: 0.5139
Top-10 Accuracy: 0.6809
Geospatial Distance: 4.8217 km

Classification Report:
              precision    recall  f1-score   support

           0       0.40      0.03      0.06        59
           1       0.14      0.26      0.18        77
           2       0.50      0.01      0.01       141
           3       0.00      0.00      0.00       170
           4       0.19      0.13      0.16       394
           5       0.13      0.09      0.11       276
           6       0.00      0.00      0.00       119
           7       0.00      0.00      0.00       273
           8       0.19      0.55      0.28      3414
           9       0.00      0.00      0.00       214
          10       0.14      0.09      0.11      1544
          11       0.21      0.04      0.07      1654
          12       0.19      0.06      0.09      1635
          13       0.15      0.00      0.01       818
          14       0.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


: 

In [97]:
from matplotlib import cm
import json
import folium

In [98]:
def rgb2hex(r,g,b):
    if type(r)!=int:
        r = int(255*r)
        g = int(255*g)
        b = int(255*b)
    return "#{:02x}{:02x}{:02x}".format(r,g,b)

def mystyle(x):
    fo = 0.0
    color = 'white'
    if 'color' in x['properties'].keys():
        fo= np.tanh(10*x['properties']['transp'])
        color=x['properties']['color']
    return {'weight': 1.5, 'fillOpacity': fo,'color': color}

In [99]:
trajectories = pd.read_csv('./data/processed_trajectories.csv')
zn_pos = pd.read_csv('./data/zones_centroids.csv')

zone_coords = zn_pos.set_index('zone')[['latitude', 'longitude']].to_dict(orient='index')

# Función para obtener latitud y longitud de una zona
def get_lat_long(zone):
    if zone in zone_coords:
        return zone_coords[zone]['latitude'], zone_coords[zone]['longitude']
    else:
        return None, None

# Crear las identificaciones únicas de las torres
towers_id = np.arange(1, 135, 1)  # Asegúrate de que esto coincida con las zonas válidas en tu conjunto de datos
n_towers = len(towers_id)
codes_onehot = np.eye(n_towers)  # Generar codificación one-hot para las torres

# Crear una lista de coordenadas para todas las zonas
ll = {zone: get_lat_long(zone) for zone in towers_id}
ll[None] = (None, None)  # Añadir coordenadas nulas para zonas no válidas

def input_labels(data):
    """
    Generar entradas (`inputs`) y etiquetas (`predict`) a partir de los datos procesados.
    """
    inputs = []
    predict = []

    for _, row in data.iterrows():
        zone_start, zone_middle, zone_end, f_time = row['zone_start'], row['zone_middle'], row['zone_end'], row['f_time']
        
        # Obtener coordenadas
        lat_start, long_start = ll.get(zone_start, (None, None))
        lat_end, long_end = ll.get(zone_end, (None, None))
        
        # Verificar si las coordenadas son válidas
        if None in (lat_start, long_start, lat_end, long_end):
            continue  # Ignorar trayectorias con zonas inválidas
        
        zone_middle = int(zone_middle)  # Convertir a entero
        
        # Validar que el índice sea válido dentro de codes_onehot
        if 1 <= zone_middle <= len(codes_onehot):
            # Crear entrada
            inputs.append([long_start, lat_start, long_end, lat_end, f_time])
            
            # Crear etiqueta one-hot para la zona intermedia
            predict.append(codes_onehot[zone_middle - 1])
    
    return np.array(inputs), np.array(predict)

# Generar datos de entrada y etiquetas
inputs_v2, predict = input_labels(trajectories)

# Barajar los datos para aleatoriedad
indices = np.arange(len(inputs_v2))
np.random.shuffle(indices)
inputs_v2 = inputs_v2[indices]
predict = predict[indices]

In [101]:
fd = open('./data/transport_zones.json', 'r')
transp_zones_json = json.load(fd)
fd.close()

fd = open('./data/voronoi_properties.json', 'r')
voronois_json = json.load(fd)
fd.close()

voronois_habana_features = filter(lambda f : f['properties']['province'] == 'La Habana' , voronois_json['features'])
voronois_habana_features = [f for f in voronois_habana_features]

In [106]:
start = 2
end = 33

In [None]:
from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()
scaler.fit(inputs_v2[:,0:4])

# Asumimos que `model` es una instancia de TrajectoryModel
# y está cargado con los pesos entrenados.
model.eval()  # Configura el modelo en modo de evaluación

# Normalización
scaler = preprocessing.MinMaxScaler()
scaler.fit(inputs_v2[:, 0:4])

# Definir las zonas A y B
ZonaA = (zn_pos['longitude'][start - 1], zn_pos['latitude'][start - 1])
ZonaB = (zn_pos['longitude'][end - 1], zn_pos['latitude'][end - 1])

# Preparar valores de entrada
input_val = np.zeros((100, 5))
X = scaler.transform(np.array([ZonaA[0], ZonaA[1], ZonaB[0], ZonaB[1]]).reshape(1, -1))[0]
for i in range(4):
    input_val[:, i] = X[i]
input_val[:, 4] = np.arange(0, 1, 0.01)

pb = np.zeros((input_val.shape[0], n_towers))  # Matriz de resultados

with torch.no_grad():  # Deshabilitar cálculo de gradientes
    for i in range(input_val.shape[0]):
        tensor_input = torch.tensor(input_val[i].reshape(1, 5), dtype=torch.float32).to(device)  # Convertir a tensor y mover al dispositivo
        output = model(tensor_input).cpu().numpy()  # Obtener salida y mover a CPU para usar con NumPy
        pb[i] = output

# Procesamiento de zonas y colores
[intervals, zonenumber] = pb.shape
zonesincolor = np.zeros((zonenumber, 3))
tonorm = np.zeros(zonenumber)
norm_fac = np.zeros(zonenumber)

for i in range(intervals):
    zones = pb[i, :].copy()
    Max = max(zones)
    col = cm.get_cmap('viridis')(i * 1.0 / intervals)
    zones_sorted = np.sort(zones)[::-1]
    for ki in range(10):
        index_ = np.where(pb[i, :] == zones_sorted[ki])[0][0]
        tonorm[index_] += 1 / (ki + 1)
    for j in range(zonenumber):
        frac = (pb[i, j]) / Max
        norm_fac[j] += frac
        col1 = frac * np.array(col)
        zonesincolor[j, 0] += col1[0]
        zonesincolor[j, 1] += col1[1]
        zonesincolor[j, 2] += col1[2]

for i in range(3):
    zonesincolor[:, i] = np.array([zonesincolor[j, i] / (1e-5 + norm_fac[j]) for j in range(zonenumber)])

tonorm = tonorm / max(tonorm)

# Actualizar g2 con colores y transparencia
g2 = transp_zones_json.copy()
for f in g2['features']:
    j = int(f['properties']['NO_DE_ZONA']) - 1
    f['properties']['color'] = rgb2hex(zonesincolor[j, 0], zonesincolor[j, 1], zonesincolor[j, 2])
    f['properties']['transp'] = tonorm[j]

In [None]:
map = folium.Map(location=[23.0826, -82.2845], zoom_start=11, tiles='openstreetmap')

zones_geojson = folium.GeoJson(g2, 
                               style_function = lambda x:mystyle(x), 
                               tooltip=folium.features.GeoJsonTooltip(fields = ['NO_DE_ZONA']))

folium.Marker(location=ll[start], icon=folium.Icon(color='red', icon='circle', prefix='fa')).add_to(map)
folium.Marker(location=ll[end], icon=folium.Icon(color='red', icon='circle', prefix='fa')).add_to(map)

zones_geojson.add_to(map)

map