# Estadísticas de uso ADATRAP

In [1]:
import csv
import datetime
import os

import pandas as pd
from pytz import timezone as pytz_timezone

project_path = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
data_path = os.path.join(project_path, 'data')

# Evaluar periodos en horario local
location = pytz_timezone('America/Santiago')

In [2]:
# sacar un dump de las estadísticas que se han capturado
# PASOS A SEGUIR
# Ingresar al servidor de producción adatrap.cl
# Ingresar a psql con: python manage.py dbshell
# Copiar datos de log a un archivo csv: \copy (select * from logapp_useractions left join auth_user on logapp_useractions.user_id=auth_user.id) TO '/tmp/datos.csv' DELIMITER ',' CSV HEADER;
# Descargar el archivo mediante ftp en el path $data_path

In [3]:
diccionario_acciones = {
    # vistas
    '/profile/transfers/': '',
    '/profile/odmatrix/': '',
    '/profile/expedition/': '',
    '/profile/stop/': '',
    '/profile/trajectory/': '',

    '/speed/matrix/': '',
    '/speed/variation/': '',
    '/speed/ranking/': '',

    '/globalstat/detail/': '',
    '/globalstat/resume/': '',

    '/trip/strategies/': '',
    '/trip/fromToMaps/': 'vista_origenes_y_destinos',
    '/trip/resume/': 'vista_indicadores_de_viajes',
    '/trip/map/': 'vista_geolocalización_de_viajes',
    '/trip/large-trips/': 'vista_viajes-por_etapas',

    '/shape/base/': '',
    '/shape/map/': '',
    '/shape/route/': '',

    # días disponibles, indica que se cargó alguna vista
    '/esapi/profile/availableRoutes/': 'perfiles_días_disponibles',
    '/esapi/speed/availableDays/': 'velocidades_días_disponibles',
    '/esapi/trip/availableDays/': 'viajes_días_disponibles',
    '/esapi/resume/availableDays/': 'estadísticas_días_disponibles',
    '/esapi/stop/matchedStopData/': 'búsqueda_de_parada',

    # solicitud de datos
    '/esapi/speed/speedByRoute/': 'vista_matriz_velocidades_por_servicio',
    '/esapi/trip/largeTravelData/': 'datos_',
    '/esapi/trip/resumeData/': '',
    '/esapi/profile/loadProfileByStopData/': '',
    '/esapi/profile/availableDays/': '',
    '/esapi/resume/data/': '',
    '/esapi/odbyroute/availableDays/': '',
    '/esapi/odbyroute/availableRoutes/': '',
    '/esapi/trip/mapData/': '',
    '/esapi/speed/availableRoutes/': '',
    '/esapi/speed/speedVariation/': '',
    '/esapi/trip/transfersData/': '',
    '/esapi/trip/fromToMapData/': '',
    '/esapi/trip/strategiesData/': '',
    '/esapi/speed/matrixData/': '',
    '/esapi/profile/loadProfileByExpeditionData/': '',
    '/esapi/speed/rankingData/': '',

    # usuario
    '/user/login/': 'inicio_de_sesión',
    '/user/password_change/': 'vista_cambiar_contraseña',
}


# Cálculo de sesiones

In [6]:
class User:

    def __init__(self, username):
        self.username = username
        self.session_number = 0
        self.last_session = None
        self.max_session_duration = datetime.timedelta()
        self.min_session_duration = datetime.timedelta(days=100)
        self.avg_session_duration = datetime.timedelta()
        self.activities = []

        # tiempo mínimo entre sesiones
        self.time_windows_in_sec = 60 * 15  # 15 minutos

    def add_activity(self, activity):
        self.activities.append(activity)

    def sort_activities(self):
        self.activities.sort(key=lambda x: x[0])

    def calculate_metrics(self):
        self.session_number = 0
        self.last_session = self.activities[0][0]
        start_session = self.activities[0][0]
        previous_timestamp = self.activities[0][0]
        sessions = []

        for index, activity in enumerate(self.activities):
            timestamp = activity[0]

            diff_in_secs = (timestamp - previous_timestamp).total_seconds()
            if diff_in_secs > self.time_windows_in_sec or index == len(self.activities) - 1:
                duration = previous_timestamp.replace(microsecond=0) - start_session.replace(microsecond=0)

                print(self.username, start_session.replace(microsecond=0).astimezone(location),
                      previous_timestamp.replace(microsecond=0).astimezone(location), duration)
                if duration > self.max_session_duration:
                    self.max_session_duration = duration
                if duration < self.min_session_duration:
                    self.min_session_duration = duration
                sessions.append(duration)
                self.last_session = previous_timestamp
                self.session_number += 1
                start_session = timestamp

            previous_timestamp = timestamp

        self.avg_session_duration = sum(sessions, datetime.timedelta()) / len(sessions)
        # quitar microsegundos
        self.avg_session_duration = self.avg_session_duration - datetime.timedelta(
            microseconds=self.avg_session_duration.microseconds)

    def get_results(self):
        fmt = '%Y-%m-%d %H:%M:%S %Z'
        last_session = self.last_session.astimezone(location).strftime(fmt)
        return [self.username, self.session_number, last_session, self.max_session_duration, self.min_session_duration,
                self.avg_session_duration]

# Cargar datos

In [7]:
def build_user_activity(start_time, end_time):
    filename = 'datos.csv'
    input_path = os.path.join(data_path, filename)
    users = dict()

    with open(input_path, encoding='utf-8') as csvfile:
        spamreader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
        next(spamreader)
        url_set = set()
        for row in spamreader:
            # no es relevante lo que hace el administrador
            url = row['url'].split('?')[0]
            timestamp = datetime.datetime.strptime('{0}{1}'.format(row['timestamp'], '00'), '%Y-%m-%d %H:%M:%S.%f%z')
            username = row['username']

            if not (start_time <= timestamp < end_time):
                continue

            #if url.startswith('/admin'):
            #    continue
            # actividades no relevantes
            if url in ['/favicon.ico', '/']:
                continue

            if username not in users:
                users[username] = User(username)

            users[username].add_activity([timestamp])

    for username in users:
        users[username].sort_activities()

    return users

# Sesiones por usuario

In [8]:
start_time = location.localize(datetime.datetime(2022, 12, 1, 0, 0, 0, ))
end_time = location.localize(datetime.datetime(2023, 1, 1, 0, 0, 0, ))

users = build_user_activity(start_time, end_time)

labels = (
    'usuario', 'n° sesiones', 'última sesión', 'tiempo máximo de sesión', 'tiempo mínimo de sesión', 'sesión promedio')
data = []
for username in users:
    user = users[username]
    user.calculate_metrics()
    data.append(user.get_results())

df = pd.DataFrame(data, columns=labels).sort_values(by=['n° sesiones'], ascending=False)
df = df[df['usuario'] != 'transantiago']
df = df[df['usuario'] != 'dtpm']
df = df[df['usuario'] != 'prueba']

gabriel.vargas 2022-12-01 10:48:57-03:00 2022-12-01 11:02:15-03:00 0:13:18
gabriel.vargas 2022-12-01 11:26:59-03:00 2022-12-01 11:35:52-03:00 0:08:53
gabriel.vargas 2022-12-01 12:56:19-03:00 2022-12-01 13:18:47-03:00 0:22:28
gabriel.vargas 2022-12-01 16:07:09-03:00 2022-12-01 16:37:29-03:00 0:30:20
gabriel.vargas 2022-12-01 16:52:56-03:00 2022-12-01 17:16:17-03:00 0:23:21
gabriel.vargas 2022-12-01 18:30:30-03:00 2022-12-01 18:34:15-03:00 0:03:45
gabriel.vargas 2022-12-02 10:55:50-03:00 2022-12-02 10:57:40-03:00 0:01:50
gabriel.vargas 2022-12-02 12:24:39-03:00 2022-12-02 12:24:57-03:00 0:00:18
gabriel.vargas 2022-12-05 11:02:05-03:00 2022-12-05 11:02:42-03:00 0:00:37
gabriel.vargas 2022-12-05 11:59:36-03:00 2022-12-05 12:10:05-03:00 0:10:29
gabriel.vargas 2022-12-05 16:48:09-03:00 2022-12-05 16:50:35-03:00 0:02:26
gabriel.vargas 2022-12-05 17:37:09-03:00 2022-12-05 17:57:58-03:00 0:20:49
gabriel.vargas 2022-12-06 09:11:26-03:00 2022-12-06 09:11:59-03:00 0:00:33
gabriel.vargas 2022-12-06

In [9]:
df.style

Unnamed: 0,usuario,n° sesiones,última sesión,tiempo máximo de sesión,tiempo mínimo de sesión,sesión promedio
5,tomas.rebolledo,91,2022-12-30 15:51:53 -03,0 days 01:22:49,0 days 00:00:00,0 days 00:06:02
6,macarena.salazar,78,2022-12-30 12:42:58 -03,0 days 01:13:30,0 days 00:00:00,0 days 00:09:59
2,sebastian.tamblay,48,2022-12-29 10:56:31 -03,0 days 00:38:14,0 days 00:00:00,0 days 00:02:44
18,andres.gonzalez,37,2022-12-30 11:39:43 -03,0 days 00:55:53,0 days 00:00:01,0 days 00:07:38
3,juan.lagos,31,2022-12-30 14:50:18 -03,0 days 00:18:10,0 days 00:00:00,0 days 00:02:39
7,maria.viveros,28,2022-12-30 12:52:13 -03,0 days 01:04:48,0 days 00:00:00,0 days 00:06:30
0,gabriel.vargas,26,2022-12-30 10:09:01 -03,0 days 01:00:58,0 days 00:00:00,0 days 00:09:34
4,miguel.vielma,25,2022-12-20 21:56:25 -03,0 days 00:23:17,0 days 00:00:00,0 days 00:04:47
9,sebastian.busquets,23,2022-12-26 18:44:27 -03,0 days 00:54:25,0 days 00:00:00,0 days 00:07:21
17,patrick.oemick,20,2022-12-29 16:22:37 -03,0 days 00:35:47,0 days 00:00:00,0 days 00:04:12


In [10]:
file_path = os.path.join(data_path, 'tabla_de_actividad.csv')
df['última sesión'] = df['última sesión'].apply(
    lambda x: x.replace(' -04', '').replace('0 days ', '').replace(' -03', ''))
df['tiempo máximo de sesión'] = df['tiempo máximo de sesión'].apply(
    lambda x: str(x).replace(' -04', '').replace('0 days ', '').replace(' -03', ''))
df['tiempo mínimo de sesión'] = df['tiempo mínimo de sesión'].apply(
    lambda x: str(x).replace(' -04', '').replace('0 days ', '').replace(' -03', ''))
df['sesión promedio'] = df['sesión promedio'].apply(
    lambda x: str(x).replace(' -04', '').replace('0 days ', '').replace(' -03', ''))

df.to_csv(file_path, index=False, sep=',')

# Post proceso
- abrir el csv
- Pegar el contenido del csv en un google sheet
- Expandir los datos a varias columnas (por defecto se almacenan en una por fila). Para esto hacer lo siguiente:
    - Ir a Data
    - Seleccionar "Split text to columns"
- Incorporar una columna a la izquierda de la tabla con el número de fila y el header "N°"
- Copiar la tabla de google sheet y seleccionar las celdas de la tabla del doc donde irán los valores y seleccionar pegado normal