In [1]:
# libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import requests

In [2]:
# Floyd-Warshall algorithm
def floyd_warshall(AdjMatrix):
    n = len(AdjMatrix)
    cost = np.copy(AdjMatrix)
    cost[cost == 0] = np.inf
    np.fill_diagonal(cost, 0)
    for k in range(n):
        for i in range(n):
            for j in range(n):
                cost[i, j] = min(cost[i, j], cost[i, k] + cost[k, j])
    return cost

def leer_matriz(nombre_archivo):
    matriz = []
    with open(nombre_archivo, 'r') as archivo:
        archivo.readline()
        archivo.readline()
        for linea in archivo:
            fila = [float(valor) for valor in linea.strip().split()]
            matriz.append(fila)
    return matriz

def encontrar_estacion(est, matriz):
    for i in range(len(matriz)):
        if matriz[i][0] == est:
            return matriz[i][1], matriz[i][2]
    return None, None

def send_message(message, channel):
    requests.post(f"https://ntfy.sh/{channel}",
        data=message.encode(encoding='utf-8'))

In [3]:
def gaussian_kernel1(a,b, sigma = 0.1620289):
    return np.exp(-((a-b)**2)*sigma)

def gaussian_kernel2(a,b, sigma = 38.9827449):
    a = np.array(a)
    b = np.array(b)
    return np.exp(-(np.linalg.norm(a-b)**2)*sigma)

In [4]:
def count_trips_mibici(data_user, threshold = 5, complement = False):
    viajes_user = data_user.groupby([data_user[['Origen_Id', 'Destino_Id']].min(axis=1), data_user[['Origen_Id', 'Destino_Id']].max(axis=1)]).size().reset_index(name='counts')
    viajes_user.columns = ['Est_A', 'Est_B', 'counts']
    if not complement:
        viajes_user = viajes_user[viajes_user['counts'] >= threshold]
    else:
        viajes_user = viajes_user[viajes_user['counts'] < threshold]
    if viajes_user.empty:
        return None
    total = viajes_user['counts'].sum()
    viajes_user['prob'] = viajes_user['counts']/total
    viajes_user = viajes_user.sort_values(by = 'prob', ascending = False).reset_index(drop=True)
    return viajes_user

In [5]:
def log_prob_matrix(counter_user, normalized = False, self_loops = False):
    if not self_loops:
        counter_user = counter_user[counter_user['Est_A'] != counter_user['Est_B']]
    vertex = list(set(counter_user['Est_A'].unique().tolist() + counter_user['Est_B'].unique().tolist()))
    matrix = np.zeros((len(vertex), len(vertex)))
    for i in range(len(counter_user)):
        current_trip = counter_user.iloc[i]
        count = -np.log(current_trip["prob"])
        estA = current_trip["Est_A"]
        estB = current_trip["Est_B"]

        matrix[vertex.index(estA)][vertex.index(estB)] = count
        matrix[vertex.index(estB)][vertex.index(estA)] = count
    if normalized:
        D = np.sum(matrix, axis = 1)
        D = np.diag(D)
        D = np.linalg.inv(np.sqrt(D))
        matrix = D @ matrix @ D
    return matrix, vertex

In [6]:
dir = '/home/user/Desktop/Datos/'
#dir = '/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/'

In [7]:
data_2019 = pd.read_csv(f'{dir}mibici/mibici_2019.csv')

In [8]:
est_2019 = leer_matriz(f"{dir}/Adj_mibici/matrices_estaciones/est_2019.txt")

In [9]:
data = []
v = []

dates = [f"2019-03-{str(i).zfill(2)}" for i in range(1, 32)]

for date in dates:
    print(f'Processing date: {date}', end='\r')
    current_data = data_2019[data_2019['Inicio_del_viaje'].str.contains(date)]
    current_counter = count_trips_mibici(current_data, threshold = 5)
    if current_counter is not None:
        current_matrix, current_vertex = log_prob_matrix(current_counter)
        data.append(current_matrix)
        v.append(current_vertex)

Processing date: 2019-03-31

In [10]:
def generate_index(n, m):
    for i in range(n):
        for j in range(m):
            yield i, j

In [11]:
index_comparison = generate_index(len(data), len(data))

In [12]:
t = index_comparison.__next__()
t

(0, 0)

In [13]:
d1 = data[t[0]]
d2 = data[t[1]]
v1 = v[t[0]]
v2 = v[t[1]]

In [None]:
k1 = []
k2 = []
k3 = []

indices_d1 = np.transpose(np.triu_indices_from(d1))
indices_d2 = np.transpose(np.triu_indices_from(d2))

indices_d1 = indices_d1[np.isfinite(d1[indices_d1[:, 0], indices_d1[:, 1]])]
indices_d2 = indices_d2[np.isfinite(d2[indices_d2[:, 0], indices_d2[:, 1]])]

d1_finite = d1[indices_d1[:, 0], indices_d1[:, 1]]
d2_finite = d2[indices_d2[:, 0], indices_d2[:, 1]]

k1 = [gaussian_kernel1(a, b) for a in d1_finite for b in d2_finite]

In [13]:
send_message("Finished", "My_Computer")