# Estadísticas de uso ADATRAP

In [2]:
# dependencias
import os
import csv
import datetime
import pandas as pd

from collections import defaultdict
from pytz import timezone as pytz_timezone

project_path = '/mnt/c/Users/cephei/Desktop/notebooks/fondef'

In [3]:
# sacar un dump de las estadísticas que se han capturado
# PASOS A SEGUIR
# Ingresar al servidor de producción adatrap.cl
# Ingresar a psql con: python manage.py dbshell
# Copiar datos de log a un archivo csv: \copy (select * from logapp_useractions left join auth_user on logapp_useractions.user_id=auth_user.id) TO '/tmp/datos.csv' DELIMITER ',' CSV HEADER;
# Descargar el archivo mediante ftp en el path $project_path

In [4]:
diccionario_acciones = {
    # vistas
    '/profile/transfers/': '', 
    '/profile/odmatrix/': '',
    '/profile/expedition/': '', 
    '/profile/stop/': '', 
    '/profile/trajectory/': '', 
    
    '/speed/matrix/': '', 
    '/speed/variation/': '', 
    '/speed/ranking/': '', 
    
    '/globalstat/detail/': '', 
    '/globalstat/resume/': '', 
    
    '/trip/strategies/': '', 
    '/trip/fromToMaps/': 'vista_origenes_y_destinos', 
    '/trip/resume/': 'vista_indicadores_de_viajes', 
    '/trip/map/': 'vista_geolocalización_de_viajes', 
    '/trip/large-trips/': 'vista_viajes-por_etapas', 
    
    '/shape/base/': '', 
    '/shape/map/': '', 
    '/shape/route/': '', 
    
    # días disponibles, indica que se cargó alguna vista
    '/esapi/profile/availableRoutes/': 'perfiles_días_disponibles', 
    '/esapi/speed/availableDays/': 'velocidades_días_disponibles', 
    '/esapi/trip/availableDays/': 'viajes_días_disponibles', 
    '/esapi/resume/availableDays/': 'estadísticas_días_disponibles', 
    '/esapi/stop/matchedStopData/': 'búsqueda_de_parada', 
    
    # solicitud de datos
    '/esapi/speed/speedByRoute/': 'vista_matriz_velocidades_por_servicio', 
    '/esapi/trip/largeTravelData/': 'datos_', 
    '/esapi/trip/resumeData/': '', 
    '/esapi/profile/loadProfileByStopData/': '', 
    '/esapi/profile/availableDays/': '', 
    '/esapi/resume/data/': '', 
    '/esapi/odbyroute/availableDays/': '', 
    '/esapi/odbyroute/availableRoutes/': '', 
    '/esapi/trip/mapData/': '', 
    '/esapi/speed/availableRoutes/': '', 
    '/esapi/speed/speedVariation/': '', 
    '/esapi/trip/transfersData/': '', 
    '/esapi/trip/fromToMapData/': '', 
    '/esapi/trip/strategiesData/': '', 
    '/esapi/speed/matrixData/': '', 
    '/esapi/profile/loadProfileByExpeditionData/': '', 
    '/esapi/speed/rankingData/': '', 
    
    # usuario
    '/user/login/': 'inicio_de_sesión', 
    '/user/password_change/': 'vista_cambiar_contraseña',
}


# Cálculo de sesiones

In [9]:
# Período a evaluar
location = pytz_timezone('America/Santiago')
start_time = location.localize(datetime.datetime(2021, 11, 1, 0, 0, 0,))
end_time = location.localize(datetime.datetime(2021, 12, 1, 0, 0, 0,))

# tiempo mínimo entre sesiones
time_windows_in_sec = 60 * 15 # 15 minutos

class User():
    
    def __init__(self, username):
        self.username = username
        self.session_number = 0
        self.last_session = None
        self.max_session_duration = datetime.timedelta()
        self.min_session_duration = datetime.timedelta(days=100)
        self.avg_session_duration = datetime.timedelta()
        self.activities = []
        
    def add_activity(self, activity):
        self.activities.append(activity)
        
    def sort_activities(self):
        self.activities.sort(key=lambda x: x[0])
    
    def calculate_metrics(self):
        self.session_number = 0
        self.last_session = self.activities[0][0]
        start_session = self.activities[0][0]
        previous_timestamp = self.activities[0][0]
        sessions = []
        
        for index, activity in enumerate(self.activities):
            timestamp = activity[0]
            
            diff_in_secs = (timestamp - previous_timestamp).total_seconds()
            if diff_in_secs > time_windows_in_sec or index == len(self.activities) - 1:
                duration = previous_timestamp.replace(microsecond=0) - start_session.replace(microsecond=0)
                
                print(self.username, start_session.replace(microsecond=0).astimezone(location), previous_timestamp.replace(microsecond=0).astimezone(location), duration)
                if duration > self.max_session_duration:
                    self.max_session_duration = duration
                if duration < self.min_session_duration:
                    self.min_session_duration = duration
                sessions.append(duration)
                self.last_session = previous_timestamp
                self.session_number += 1
                start_session = timestamp

            previous_timestamp = timestamp
            
        self.avg_session_duration = sum(sessions, datetime.timedelta()) / len(sessions)
        # quitar microsegundos
        self.avg_session_duration = self.avg_session_duration - datetime.timedelta(microseconds=self.avg_session_duration.microseconds)
            
    def get_results(self):
        fmt = '%Y-%m-%d %H:%M:%S %Z'
        last_session = self.last_session.astimezone(location).strftime(fmt)
        return [self.username, self.session_number, last_session, self.max_session_duration, self.min_session_duration, self.avg_session_duration]

users = dict()

# Cargar datos

In [10]:
input_filename = 'datos.csv'
input_path = os.path.join(project_path, input_filename)

with open(input_path, encoding='utf-8') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
    next(spamreader)
    url_set = set()
    for row in spamreader:
        # no es relevante lo que hace el administrador
        url = row['url'].split('?')[0]
        timestamp = datetime.datetime.strptime('{0}{1}'.format(row['timestamp'], '00'), '%Y-%m-%d %H:%M:%S.%f%z')
        username = row['username']
        
        if not (start_time <= timestamp < end_time):
            continue
        
        #if url.startswith('/admin'):
        #    continue
        # actividades no relevantes
        if url in ['/favicon.ico', '/']:
            continue
            
        if username not in users:
            users[username] = User(username)
        
        users[username].add_activity([timestamp])

for username in users:
    users[username].sort_activities()


# Sesiones por usuario

In [11]:
labels = ('usuario', 'n° sesiones', 'última sesión', 'tiempo máximo de sesión', 'tiempo mínimo de sesión', 'sesión promedio')
data = []
for username in users:
    user = users[username]
    user.calculate_metrics()
    data.append(user.get_results())
    
a = pd.DataFrame(data, columns=labels).sort_values(by=['n° sesiones'], ascending=False)
a = a[a['usuario'] != 'transantiago']

juan.sepulveda 2021-11-02 09:29:24-03:00 2021-11-02 09:29:40-03:00 0:00:16
juan.sepulveda 2021-11-02 09:56:39-03:00 2021-11-02 09:56:46-03:00 0:00:07
juan.sepulveda 2021-11-04 17:09:59-03:00 2021-11-04 17:10:30-03:00 0:00:31
juan.sepulveda 2021-11-08 10:29:44-03:00 2021-11-08 10:29:47-03:00 0:00:03
juan.sepulveda 2021-11-10 10:19:09-03:00 2021-11-10 10:19:32-03:00 0:00:23
juan.sepulveda 2021-11-17 13:16:56-03:00 2021-11-17 13:20:28-03:00 0:03:32
juan.sepulveda 2021-11-17 14:39:56-03:00 2021-11-17 14:42:39-03:00 0:02:43
juan.sepulveda 2021-11-17 17:22:36-03:00 2021-11-17 17:22:53-03:00 0:00:17
juan.sepulveda 2021-11-30 11:16:26-03:00 2021-11-30 11:16:29-03:00 0:00:03
juan.sepulveda 2021-11-30 16:23:18-03:00 2021-11-30 16:37:14-03:00 0:13:56
antonio.gschwender 2021-11-15 17:51:37-03:00 2021-11-15 18:24:28-03:00 0:32:51
antonio.gschwender 2021-11-25 12:14:48-03:00 2021-11-25 12:17:37-03:00 0:02:49
miguel.ormeno 2021-11-25 12:58:48-03:00 2021-11-25 13:07:30-03:00 0:08:42
diego.silva 2021-1

In [12]:
a.style

Unnamed: 0,usuario,n° sesiones,última sesión,tiempo máximo de sesión,tiempo mínimo de sesión,sesión promedio
15,tomas.rebolledo,122,2021-11-26 18:42:56 -03,0 days 01:08:40,0 days 00:00:00,0 days 00:08:02
11,muriel.manriquez,55,2021-11-29 18:47:21 -03,0 days 00:47:09,0 days 00:00:00,0 days 00:06:07
12,patrick.oemick,31,2021-11-30 15:28:12 -03,0 days 00:33:12,0 days 00:00:00,0 days 00:05:27
24,martin.quiroz,28,2021-11-30 12:33:46 -03,0 days 00:25:47,0 days 00:00:00,0 days 00:03:36
9,nicolas.gaete,26,2021-11-30 11:21:26 -03,0 days 00:28:45,0 days 00:00:00,0 days 00:07:18
17,cesar.nunez,21,2021-11-26 08:59:02 -03,0 days 00:12:54,0 days 00:00:00,0 days 00:01:29
5,juan.olea,19,2021-11-30 09:48:57 -03,0 days 00:24:51,0 days 00:00:00,0 days 00:03:57
25,andres.gonzalez,12,2021-11-30 18:29:13 -03,0 days 00:41:59,0 days 00:00:00,0 days 00:12:30
0,juan.sepulveda,10,2021-11-30 16:37:14 -03,0 days 00:13:56,0 days 00:00:03,0 days 00:02:11
16,barbara.poblete,9,2021-11-30 20:30:00 -03,0 days 00:05:58,0 days 00:00:00,0 days 00:01:22
