In [15]:
import pandas as pd 
from functions.functions_data import get_engagement_list_v2
from functions.functions_graphics import plot_metrics

In [5]:
#import data 
path_to_csv = "data/data_20230606.csv"
df = pd.read_csv(path_to_csv)
engagement_list = get_engagement_list_v2(df = df, start_date= str("2023-02-01"), end_data= str("2023-06-03")  )
#both functions are already in functions_data.py

  df = pd.read_csv(path_to_csv)


In [2]:
def get_rolling_values_version2(engagement_list, lookback):
  """
  engagement_list: DataFrame
  lookback: int

  return dataframe: With the days as index and the amount of engagements and unique users in the
  range=(day - lookback : day). The dataset also has the mean, quantile25 and quantile 75 per day.
  """
  #Select two columns
  prueba1 = engagement_list[['UserId','EventDateTime']]
  #Group by UserId and EventDateTime, the result of size is set as Interacciones
  df_grouped = prueba1.groupby(['UserId', 'EventDateTime']).size().reset_index(name='Interacciones')
  #Return reshaped DataFrame organized by given index / column values.
  df_pivoted = df_grouped.pivot(index='EventDateTime', columns='UserId', values='Interacciones')
  #Fill na and sum values in a rolling window calculations
  #window is lookback
  df_rolling = df_pivoted.fillna(0).rolling(window=lookback).sum()
  #Calculations
  df_rolling['Mean'] = df_rolling[df_rolling !=0].mean(axis=1)
  df_rolling['Quantile_25'] = df_rolling[df_rolling !=0].quantile(q=0.25, axis=1, interpolation='nearest')
  df_rolling['Quantile_75'] = df_rolling[df_rolling !=0].quantile(q=0.75, axis=1, interpolation='nearest')
  
  #pivot to normal dataframe (without multi index in columns )
  df_rolling = pd.DataFrame(df_rolling.to_records())
  #columns with UserId
  columnas_userId = df_rolling.columns[1: -3]
  #userId to rows 
  df_rolling = pd.melt(df_rolling, id_vars=['EventDateTime','Mean', 'Quantile_25', 'Quantile_75'], 
                       value_vars=columnas_userId, 
                       var_name ='UserId', value_name ='Num_interacciones')
  return df_rolling

In [6]:
#data for the plot
rolling_v2 = get_rolling_values_version2(engagement_list, 7)
#plot 
rolling_v2

Unnamed: 0,EventDateTime,Mean,Quantile_25,Quantile_75,UserId,Num_interacciones
0,2023-03-15 00:00:00+00:00,,,,100015,
1,2023-03-16 00:00:00+00:00,,,,100015,
2,2023-03-17 00:00:00+00:00,,,,100015,
3,2023-03-18 00:00:00+00:00,,,,100015,
4,2023-03-19 00:00:00+00:00,,,,100015,
...,...,...,...,...,...,...
2160395,2023-05-29 00:00:00+00:00,2.584489,1.0,3.0,999991,0.0
2160396,2023-05-30 00:00:00+00:00,2.581902,1.0,3.0,999991,0.0
2160397,2023-05-31 00:00:00+00:00,2.654592,1.0,3.0,999991,0.0
2160398,2023-06-01 00:00:00+00:00,2.710549,1.0,3.0,999991,0.0


In [7]:
rolling_v2[rolling_v2["EventDateTime"] == "2023-05-29 00:00:00+00:00"]["Mean"]

75         2.584489
155        2.584489
235        2.584489
315        2.584489
395        2.584489
             ...   
2160075    2.584489
2160155    2.584489
2160235    2.584489
2160315    2.584489
2160395    2.584489
Name: Mean, Length: 27005, dtype: float64

In [17]:
plot_metrics(rolling_v2,  str(7) +' days').show()

In [13]:
def get_rolling_values_version3(engagement_list, lookback):
  """
  engagement_list: DataFrame
  lookback: int

  return dataframe: With the days as index and the amount of engagements and unique users in the
  range=(day - lookback : day). The dataset also has the mean, quantile25 and quantile 75 per day.
  """
  #Select two columns
  prueba1 = engagement_list[['UserId','EventDateTime']]
  #Group by UserId and EventDateTime, the result of size is set as Interacciones
  df_grouped = prueba1.groupby(['UserId', 'EventDateTime']).size().reset_index(name='Interacciones')
  #Return reshaped DataFrame organized by given index / column values.
  df_pivoted = df_grouped.pivot(index='EventDateTime', columns='UserId', values='Interacciones')
  #Fill na and sum values in a rolling window calculations
  #window is lookback
  df_rolling = df_pivoted.fillna(0).rolling(window=lookback).sum()
  #Calculations
  df_rolling['Mean'] = df_rolling[df_rolling !=0].mean(axis=1)
  df_rolling['Quantile_25'] = df_rolling[df_rolling !=0].quantile(q=0.25, axis=1, interpolation='nearest')
  df_rolling['Quantile_75'] = df_rolling[df_rolling !=0].quantile(q=0.75, axis=1, interpolation='nearest')
  
  #pivot to normal dataframe (without multi index in columns )
  df_rolling = pd.DataFrame(df_rolling.to_records())
  #columns with UserId
  columnas_userId = df_rolling.columns[1: -3]
  #userId to rows 
  df_rolling = df_rolling.loc[:, ~df_rolling.columns.isin(columnas_userId)]
  
  return df_rolling

In [14]:
#if you doesnt need the userId
rolling_v3 = get_rolling_values_version3(engagement_list, 7)

rolling_v3

Unnamed: 0,EventDateTime,Mean,Quantile_25,Quantile_75
0,2023-03-15 00:00:00+00:00,,,
1,2023-03-16 00:00:00+00:00,,,
2,2023-03-17 00:00:00+00:00,,,
3,2023-03-18 00:00:00+00:00,,,
4,2023-03-19 00:00:00+00:00,,,
...,...,...,...,...
75,2023-05-29 00:00:00+00:00,2.584489,1.0,3.0
76,2023-05-30 00:00:00+00:00,2.581902,1.0,3.0
77,2023-05-31 00:00:00+00:00,2.654592,1.0,3.0
78,2023-06-01 00:00:00+00:00,2.710549,1.0,3.0


In [16]:
plot_metrics(rolling_v3,  str(7) +' days').show()