# SQR Ponderado

In [4]:
import pandas as pd
import numpy as np


def sqr_ponderado(data, drop_cols=True, min_freq=3):
    """This function computed the Weighted SQR, by using the users' sleep quality notes.

    Args:
        data (DataFrame): DataFrame of users sleep sessions, with at least the following columns:

        idPerfiles, notaUsuario and sqr

        drop_cols (bool, optional): Defaults to True. If set to False
        initial SQR (sqr_old) and notes relative frequency (freq_rel) are provided
        min_freq (int, optional): Defaults to 3. Minimum number of notes to take
        into account user opinion.


    Returns:
        DataFrame: Same structure of data, where the column sqr is the new weighted sqr.
    """

    df = data.copy()

    df['notaUsuario'] = df['notaUsuario']*10

    freq_notas_perfiles = df[df['notaUsuario'].notnull()]['idPerfiles'].value_counts()

    freq_sesiones = df['idPerfiles'].value_counts()

    freq_sesiones.name = 'freq_sesiones'

    freq_notas_perfiles_validos = freq_notas_perfiles[freq_notas_perfiles > min_freq-1]

    freq_notas_perfiles_validos.name = 'freq_notas'

    df_notas = pd.concat([freq_sesiones, freq_notas_perfiles_validos], axis=1)

    df_notas = df_notas[df_notas.freq_notas.notnull()]

    df_notas['freq_rel'] = np.round(df_notas.freq_notas / df_notas.freq_sesiones,2)

    new_df = pd.merge(df, df_notas['freq_rel'], how='left', left_on='idPerfiles', right_index=True)

    new_df['sqr_old'] = new_df['sqr']

    new_df.loc[(new_df['freq_rel'].notnull()) & (new_df['notaUsuario'].notnull()), 'sqr'] = new_df['freq_rel']*new_df['notaUsuario'] + (1-new_df['freq_rel'])*new_df['sqr_old']

    if drop_cols:
        new_df.drop(columns=['freq_rel', 'sqr_old'])

    return new_df

## Ejemplo de uso

In [5]:
# Cargamos datos con SQR (Sesiones)
perfiles_sqr = pd.read_parquet('../data/processed/perfiles_sqr_filtrado.parquet')

In [8]:
# Mínimo número de calificaciones de sueño 3: min_freq=3
# No borramos las columnas: drop_cols=False
weighted_sqr = sqr_ponderado(perfiles_sqr, drop_cols=False, min_freq=3)

In [10]:
# Calculamos los casos donde el sqr ponderado y el inicial son distintos
# freq_rel indica el peso de la nota del usuario en la ponderación
weighted_sqr[['idSesiones', 'sqr_old', 'sqr', 'freq_rel']][weighted_sqr['sqr']!= weighted_sqr['sqr_old']]

Unnamed: 0,idSesiones,sqr_old,sqr,freq_rel
33797,39819,49.504551,50.514460,0.02
34540,40655,55.178001,56.074441,0.02
36651,43063,44.633452,52.384769,0.14
39568,46356,50.192193,55.172973,0.10
39790,46607,79.887430,81.898687,0.10
...,...,...,...,...
83112,93932,66.944675,83.472337,0.50
83283,94112,77.260794,98.635648,0.94
83285,94114,72.327122,98.339627,0.94
83287,94116,72.374366,98.342462,0.94
