# Estadísticas de uso ADATRAP

In [7]:
import csv
import datetime
# dependencias
import os

import pandas as pd
from pytz import timezone as pytz_timezone

project_path = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
data_path = os.path.join(project_path, 'data')

In [8]:
# sacar un dump de las estadísticas que se han capturado
# PASOS A SEGUIR
# Ingresar al servidor de producción adatrap.cl
# Ingresar a psql con: python manage.py dbshell
# Copiar datos de log a un archivo csv: \copy (select * from logapp_useractions left join auth_user on logapp_useractions.user_id=auth_user.id) TO '/tmp/datos.csv' DELIMITER ',' CSV HEADER;
# Descargar el archivo mediante ftp en el path $data_path

In [9]:
diccionario_acciones = {
    # vistas
    '/profile/transfers/': '',
    '/profile/odmatrix/': '',
    '/profile/expedition/': '',
    '/profile/stop/': '',
    '/profile/trajectory/': '',

    '/speed/matrix/': '',
    '/speed/variation/': '',
    '/speed/ranking/': '',

    '/globalstat/detail/': '',
    '/globalstat/resume/': '',

    '/trip/strategies/': '',
    '/trip/fromToMaps/': 'vista_origenes_y_destinos',
    '/trip/resume/': 'vista_indicadores_de_viajes',
    '/trip/map/': 'vista_geolocalización_de_viajes',
    '/trip/large-trips/': 'vista_viajes-por_etapas',

    '/shape/base/': '',
    '/shape/map/': '',
    '/shape/route/': '',

    # días disponibles, indica que se cargó alguna vista
    '/esapi/profile/availableRoutes/': 'perfiles_días_disponibles',
    '/esapi/speed/availableDays/': 'velocidades_días_disponibles',
    '/esapi/trip/availableDays/': 'viajes_días_disponibles',
    '/esapi/resume/availableDays/': 'estadísticas_días_disponibles',
    '/esapi/stop/matchedStopData/': 'búsqueda_de_parada',

    # solicitud de datos
    '/esapi/speed/speedByRoute/': 'vista_matriz_velocidades_por_servicio',
    '/esapi/trip/largeTravelData/': 'datos_',
    '/esapi/trip/resumeData/': '',
    '/esapi/profile/loadProfileByStopData/': '',
    '/esapi/profile/availableDays/': '',
    '/esapi/resume/data/': '',
    '/esapi/odbyroute/availableDays/': '',
    '/esapi/odbyroute/availableRoutes/': '',
    '/esapi/trip/mapData/': '',
    '/esapi/speed/availableRoutes/': '',
    '/esapi/speed/speedVariation/': '',
    '/esapi/trip/transfersData/': '',
    '/esapi/trip/fromToMapData/': '',
    '/esapi/trip/strategiesData/': '',
    '/esapi/speed/matrixData/': '',
    '/esapi/profile/loadProfileByExpeditionData/': '',
    '/esapi/speed/rankingData/': '',

    # usuario
    '/user/login/': 'inicio_de_sesión',
    '/user/password_change/': 'vista_cambiar_contraseña',
}


# Cálculo de sesiones

In [15]:
# Período a evaluar
location = pytz_timezone('America/Santiago')
start_time = location.localize(datetime.datetime(2022, 5, 1, 0, 0, 0, ))
end_time = location.localize(datetime.datetime(2022, 7, 1, 0, 0, 0, ))

# tiempo mínimo entre sesiones
time_windows_in_sec = 60 * 15  # 15 minutos


class User():

    def __init__(self, username):
        self.username = username
        self.session_number = 0
        self.last_session = None
        self.max_session_duration = datetime.timedelta()
        self.min_session_duration = datetime.timedelta(days=100)
        self.avg_session_duration = datetime.timedelta()
        self.activities = []

    def add_activity(self, activity):
        self.activities.append(activity)

    def sort_activities(self):
        self.activities.sort(key=lambda x: x[0])

    def calculate_metrics(self):
        self.session_number = 0
        self.last_session = self.activities[0][0]
        start_session = self.activities[0][0]
        previous_timestamp = self.activities[0][0]
        sessions = []

        for index, activity in enumerate(self.activities):
            timestamp = activity[0]

            diff_in_secs = (timestamp - previous_timestamp).total_seconds()
            if diff_in_secs > time_windows_in_sec or index == len(self.activities) - 1:
                duration = previous_timestamp.replace(microsecond=0) - start_session.replace(microsecond=0)

                print(self.username, start_session.replace(microsecond=0).astimezone(location),
                      previous_timestamp.replace(microsecond=0).astimezone(location), duration)
                if duration > self.max_session_duration:
                    self.max_session_duration = duration
                if duration < self.min_session_duration:
                    self.min_session_duration = duration
                sessions.append(duration)
                self.last_session = previous_timestamp
                self.session_number += 1
                start_session = timestamp

            previous_timestamp = timestamp

        self.avg_session_duration = sum(sessions, datetime.timedelta()) / len(sessions)
        # quitar microsegundos
        self.avg_session_duration = self.avg_session_duration - datetime.timedelta(
            microseconds=self.avg_session_duration.microseconds)

    def get_results(self):
        fmt = '%Y-%m-%d %H:%M:%S %Z'
        last_session = self.last_session.astimezone(location).strftime(fmt)
        return [self.username, self.session_number, last_session, self.max_session_duration, self.min_session_duration,
                self.avg_session_duration]


users = dict()

# Cargar datos

In [16]:
input_filename = 'datos.csv'
input_path = os.path.join(data_path, input_filename)

with open(input_path, encoding='utf-8') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
    next(spamreader)
    url_set = set()
    for row in spamreader:
        # no es relevante lo que hace el administrador
        url = row['url'].split('?')[0]
        timestamp = datetime.datetime.strptime('{0}{1}'.format(row['timestamp'], '00'), '%Y-%m-%d %H:%M:%S.%f%z')
        username = row['username']

        if not (start_time <= timestamp < end_time):
            continue

        #if url.startswith('/admin'):
        #    continue
        # actividades no relevantes
        if url in ['/favicon.ico', '/']:
            continue

        if username not in users:
            users[username] = User(username)

        users[username].add_activity([timestamp])

for username in users:
    users[username].sort_activities()


# Sesiones por usuario

In [17]:
labels = (
'usuario', 'n° sesiones', 'última sesión', 'tiempo máximo de sesión', 'tiempo mínimo de sesión', 'sesión promedio')
data = []
for username in users:
    user = users[username]
    user.calculate_metrics()
    data.append(user.get_results())

df = pd.DataFrame(data, columns=labels).sort_values(by=['n° sesiones'], ascending=False)
df = df[df['usuario'] != 'transantiago']

transantiago 2022-05-01 14:17:41-04:00 2022-05-01 14:17:43-04:00 0:00:02
transantiago 2022-05-02 10:14:02-04:00 2022-05-02 10:21:27-04:00 0:07:25
transantiago 2022-05-03 16:09:28-04:00 2022-05-03 16:09:37-04:00 0:00:09
transantiago 2022-05-04 16:09:39-04:00 2022-05-04 16:09:43-04:00 0:00:04
transantiago 2022-05-05 11:33:16-04:00 2022-05-05 11:47:58-04:00 0:14:42
transantiago 2022-05-05 13:15:40-04:00 2022-05-05 13:21:18-04:00 0:05:38
transantiago 2022-05-05 15:51:45-04:00 2022-05-05 15:52:02-04:00 0:00:17
transantiago 2022-05-05 16:16:59-04:00 2022-05-05 16:17:22-04:00 0:00:23
transantiago 2022-05-05 17:07:44-04:00 2022-05-05 17:14:45-04:00 0:07:01
transantiago 2022-05-05 23:31:14-04:00 2022-05-05 23:37:10-04:00 0:05:56
transantiago 2022-05-06 09:13:46-04:00 2022-05-06 09:15:17-04:00 0:01:31
transantiago 2022-05-06 10:26:24-04:00 2022-05-06 10:29:49-04:00 0:03:25
transantiago 2022-05-06 17:16:18-04:00 2022-05-06 17:18:32-04:00 0:02:14
transantiago 2022-05-06 17:56:31-04:00 2022-05-06 1

In [18]:
df.style

Unnamed: 0,usuario,n° sesiones,última sesión,tiempo máximo de sesión,tiempo mínimo de sesión,sesión promedio
6,tomas.rebolledo,224,2022-06-30 17:44:05 -04,0 days 01:53:45,0 days 00:00:00,0 days 00:08:17
16,vicente.iglesias,147,2022-06-30 15:43:04 -04,0 days 00:36:30,0 days 00:00:00,0 days 00:04:44
15,sebastian.busquets,106,2022-06-30 17:54:19 -04,0 days 02:24:59,0 days 00:00:00,0 days 00:07:43
14,dtpm,90,2022-06-30 17:40:52 -04,0 days 00:43:56,0 days 00:00:00,0 days 00:02:59
1,patrick.oemick,68,2022-06-29 15:24:00 -04,0 days 00:52:09,0 days 00:00:00,0 days 00:05:47
5,andres.gonzalez,58,2022-06-29 16:10:12 -04,0 days 00:54:12,0 days 00:00:00,0 days 00:05:03
3,barbara.poblete,53,2022-06-29 16:17:35 -04,0 days 00:32:05,0 days 00:00:00,0 days 00:03:30
20,nathalia.maira,47,2022-06-30 16:02:25 -04,0 days 00:43:17,0 days 00:00:00,0 days 00:06:15
9,rodrigo.alvarez,29,2022-06-16 17:20:45 -04,0 days 01:16:24,0 days 00:00:00,0 days 00:05:36
8,katherine.Garrido,28,2022-06-10 11:37:53 -04,0 days 00:47:11,0 days 00:00:00,0 days 00:05:41


In [19]:
file_path = os.path.join(data_path, 'tabla_de_actividad.csv')
df['última sesión'] = df['última sesión'].apply(lambda x: x.replace(' -04', '').replace('0 days ', '').replace(' -03', ''))
df['tiempo máximo de sesión'] = df['tiempo máximo de sesión'].apply(lambda x: str(x).replace(' -04', '').replace('0 days ', '').replace(' -03', ''))
df['tiempo mínimo de sesión'] = df['tiempo mínimo de sesión'].apply(lambda x: str(x).replace(' -04', '').replace('0 days ', '').replace(' -03', ''))
df['sesión promedio'] = df['sesión promedio'].apply(lambda x: str(x).replace(' -04', '').replace('0 days ', '').replace(' -03', ''))

df.to_csv(file_path, index=False, sep=',')

# Post proceso
- abrir el csv
- Pegar el contenido del csv en un google sheet y luego pegarlo en la tabla del documento