# Estadísticas de uso ADATRAP

In [2]:
import csv
import datetime
# dependencias
import os

import pandas as pd
from pytz import timezone as pytz_timezone

project_path = 'C:\\Users\\cephe\\PycharmProjects\\varios\\data'

In [3]:
# sacar un dump de las estadísticas que se han capturado
# PASOS A SEGUIR
# Ingresar al servidor de producción adatrap.cl
# Ingresar a psql con: python manage.py dbshell
# Copiar datos de log a un archivo csv: \copy (select * from logapp_useractions left join auth_user on logapp_useractions.user_id=auth_user.id) TO '/tmp/datos.csv' DELIMITER ',' CSV HEADER;
# Descargar el archivo mediante ftp en el path $project_path

In [4]:
diccionario_acciones = {
    # vistas
    '/profile/transfers/': '',
    '/profile/odmatrix/': '',
    '/profile/expedition/': '',
    '/profile/stop/': '',
    '/profile/trajectory/': '',

    '/speed/matrix/': '',
    '/speed/variation/': '',
    '/speed/ranking/': '',

    '/globalstat/detail/': '',
    '/globalstat/resume/': '',

    '/trip/strategies/': '',
    '/trip/fromToMaps/': 'vista_origenes_y_destinos',
    '/trip/resume/': 'vista_indicadores_de_viajes',
    '/trip/map/': 'vista_geolocalización_de_viajes',
    '/trip/large-trips/': 'vista_viajes-por_etapas',

    '/shape/base/': '',
    '/shape/map/': '',
    '/shape/route/': '',

    # días disponibles, indica que se cargó alguna vista
    '/esapi/profile/availableRoutes/': 'perfiles_días_disponibles',
    '/esapi/speed/availableDays/': 'velocidades_días_disponibles',
    '/esapi/trip/availableDays/': 'viajes_días_disponibles',
    '/esapi/resume/availableDays/': 'estadísticas_días_disponibles',
    '/esapi/stop/matchedStopData/': 'búsqueda_de_parada',

    # solicitud de datos
    '/esapi/speed/speedByRoute/': 'vista_matriz_velocidades_por_servicio',
    '/esapi/trip/largeTravelData/': 'datos_',
    '/esapi/trip/resumeData/': '',
    '/esapi/profile/loadProfileByStopData/': '',
    '/esapi/profile/availableDays/': '',
    '/esapi/resume/data/': '',
    '/esapi/odbyroute/availableDays/': '',
    '/esapi/odbyroute/availableRoutes/': '',
    '/esapi/trip/mapData/': '',
    '/esapi/speed/availableRoutes/': '',
    '/esapi/speed/speedVariation/': '',
    '/esapi/trip/transfersData/': '',
    '/esapi/trip/fromToMapData/': '',
    '/esapi/trip/strategiesData/': '',
    '/esapi/speed/matrixData/': '',
    '/esapi/profile/loadProfileByExpeditionData/': '',
    '/esapi/speed/rankingData/': '',

    # usuario
    '/user/login/': 'inicio_de_sesión',
    '/user/password_change/': 'vista_cambiar_contraseña',
}


# Cálculo de sesiones

In [6]:
# Período a evaluar
location = pytz_timezone('America/Santiago')
start_time = location.localize(datetime.datetime(2021, 1, 1, 0, 0, 0, ))
end_time = location.localize(datetime.datetime(2022, 6, 1, 0, 0, 0, ))

# tiempo mínimo entre sesiones
time_windows_in_sec = 60 * 15  # 15 minutos


class User():

    def __init__(self, username):
        self.username = username
        self.session_number = 0
        self.last_session = None
        self.max_session_duration = datetime.timedelta()
        self.min_session_duration = datetime.timedelta(days=100)
        self.avg_session_duration = datetime.timedelta()
        self.activities = []

    def add_activity(self, activity):
        self.activities.append(activity)

    def sort_activities(self):
        self.activities.sort(key=lambda x: x[0])

    def calculate_metrics(self):
        self.session_number = 0
        self.last_session = self.activities[0][0]
        start_session = self.activities[0][0]
        previous_timestamp = self.activities[0][0]
        sessions = []

        for index, activity in enumerate(self.activities):
            timestamp = activity[0]

            diff_in_secs = (timestamp - previous_timestamp).total_seconds()
            if diff_in_secs > time_windows_in_sec or index == len(self.activities) - 1:
                duration = previous_timestamp.replace(microsecond=0) - start_session.replace(microsecond=0)

                print(self.username, start_session.replace(microsecond=0).astimezone(location),
                      previous_timestamp.replace(microsecond=0).astimezone(location), duration)
                if duration > self.max_session_duration:
                    self.max_session_duration = duration
                if duration < self.min_session_duration:
                    self.min_session_duration = duration
                sessions.append(duration)
                self.last_session = previous_timestamp
                self.session_number += 1
                start_session = timestamp

            previous_timestamp = timestamp

        self.avg_session_duration = sum(sessions, datetime.timedelta()) / len(sessions)
        # quitar microsegundos
        self.avg_session_duration = self.avg_session_duration - datetime.timedelta(
            microseconds=self.avg_session_duration.microseconds)

    def get_results(self):
        fmt = '%Y-%m-%d %H:%M:%S %Z'
        last_session = self.last_session.astimezone(location).strftime(fmt)
        return [self.username, self.session_number, last_session, self.max_session_duration, self.min_session_duration,
                self.avg_session_duration]


users = dict()

# Cargar datos

In [7]:
input_filename = 'datos.csv'
input_path = os.path.join(project_path, input_filename)

with open(input_path, encoding='utf-8') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
    next(spamreader)
    url_set = set()
    for row in spamreader:
        # no es relevante lo que hace el administrador
        url = row['url'].split('?')[0]
        timestamp = datetime.datetime.strptime('{0}{1}'.format(row['timestamp'], '00'), '%Y-%m-%d %H:%M:%S.%f%z')
        username = row['username']

        if not (start_time <= timestamp < end_time):
            continue

        #if url.startswith('/admin'):
        #    continue
        # actividades no relevantes
        if url in ['/favicon.ico', '/']:
            continue

        if username not in users:
            users[username] = User(username)

        users[username].add_activity([timestamp])

for username in users:
    users[username].sort_activities()


# Sesiones por usuario

In [10]:
labels = (
'usuario', 'n° sesiones', 'última sesión', 'tiempo máximo de sesión', 'tiempo mínimo de sesión', 'sesión promedio')
data = []
for username in users:
    user = users[username]
    user.calculate_metrics()
    data.append(user.get_results())

df = pd.DataFrame(data, columns=labels).sort_values(by=['n° sesiones'], ascending=False)
df = df[df['usuario'] != 'transantiago']

barbara.poblete 2021-01-04 10:26:27-03:00 2021-01-04 10:27:47-03:00 0:01:20
barbara.poblete 2021-01-04 11:15:12-03:00 2021-01-04 11:22:39-03:00 0:07:27
barbara.poblete 2021-01-04 11:46:47-03:00 2021-01-04 11:47:00-03:00 0:00:13
barbara.poblete 2021-01-07 11:29:18-03:00 2021-01-07 11:32:34-03:00 0:03:16
barbara.poblete 2021-01-11 10:26:23-03:00 2021-01-11 10:26:23-03:00 0:00:00
barbara.poblete 2021-01-12 10:24:46-03:00 2021-01-12 10:24:49-03:00 0:00:03
barbara.poblete 2021-01-12 12:32:19-03:00 2021-01-12 12:32:39-03:00 0:00:20
barbara.poblete 2021-01-12 15:35:00-03:00 2021-01-12 15:38:09-03:00 0:03:09
barbara.poblete 2021-01-12 15:57:28-03:00 2021-01-12 15:57:37-03:00 0:00:09
barbara.poblete 2021-01-12 17:13:35-03:00 2021-01-12 17:13:37-03:00 0:00:02
barbara.poblete 2021-01-12 17:40:53-03:00 2021-01-12 17:41:59-03:00 0:01:06
barbara.poblete 2021-01-19 13:24:57-03:00 2021-01-19 13:25:36-03:00 0:00:39
barbara.poblete 2021-01-20 09:33:25-03:00 2021-01-20 09:34:57-03:00 0:01:32
barbara.pobl

In [11]:
df.style

Unnamed: 0,usuario,n° sesiones,última sesión,tiempo máximo de sesión,tiempo mínimo de sesión,sesión promedio
1,tomas.rebolledo,1282,2022-05-31 18:58:07 -04,0 days 02:37:41,0 days 00:00:00,0 days 00:06:51
5,muriel.manriquez,567,2022-05-27 11:09:57 -04,0 days 01:56:50,0 days 00:00:00,0 days 00:06:20
9,martin.quiroz,417,2022-05-12 15:30:40 -04,0 days 01:29:31,0 days 00:00:00,0 days 00:06:40
16,patrick.oemick,401,2022-05-26 16:42:38 -04,0 days 02:08:46,0 days 00:00:00,0 days 00:07:45
0,barbara.poblete,311,2022-05-31 09:13:03 -04,0 days 00:50:38,0 days 00:00:00,0 days 00:04:06
4,nicolas.gaete,202,2022-05-27 11:59:19 -04,0 days 00:28:45,0 days 00:00:00,0 days 00:02:39
13,mauricio.zuñiga,165,2022-05-20 11:56:16 -04,0 days 00:42:38,0 days 00:00:00,0 days 00:03:18
19,katherine.Garrido,159,2022-05-24 17:00:40 -04,0 days 00:56:06,0 days 00:00:00,0 days 00:07:30
22,nathalia.maira,151,2022-05-30 12:04:14 -04,0 days 00:55:13,0 days 00:00:00,0 days 00:06:08
6,dtpm,145,2022-05-27 15:48:50 -04,0 days 00:20:35,0 days 00:00:00,0 days 00:02:53


In [20]:
file_path = os.path.join(project_path, 'tabla_de_actividad.csv')
df.to_csv(file_path, index=False, sep=',')