# Funciones de AWS Lambda (eventos)

Analizamos los eventos de las funciones

## Cargar dependencias

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# generamos DF a partir de CSV de entrada
input_file = '../csvs/events.csv'
df = pd.read_csv(input_file)
df

Unnamed: 0,project_id,function_name,event
0,0xEduardo_nodeless_serverless,hello,s3
1,0xEduardo_nodeless_serverless,hello,s3
2,0xayot_waas_serverless,hello,http
3,0xayot_waas_serverless,retrieveAddres,http
4,0xayot_waas_serverless,generateAddress,http
...,...,...,...
2594,peakshift_makers_bolt_fun_serverless,upload-image-url,http
2595,peakshift_makers_bolt_fun_serverless,test-something,http
2596,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,hello,httpApi
2597,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,hello2,httpApi


In [3]:
filtered_df = df.dropna(subset=['event'])
filtered_df

Unnamed: 0,project_id,function_name,event
0,0xEduardo_nodeless_serverless,hello,s3
1,0xEduardo_nodeless_serverless,hello,s3
2,0xayot_waas_serverless,hello,http
3,0xayot_waas_serverless,retrieveAddres,http
4,0xayot_waas_serverless,generateAddress,http
...,...,...,...
2594,peakshift_makers_bolt_fun_serverless,upload-image-url,http
2595,peakshift_makers_bolt_fun_serverless,test-something,http
2596,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,hello,httpApi
2597,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,hello2,httpApi


In [4]:
# events supported by Serverless Framework
supported_events = [
'httpApi',
'http',
'activemq',
'alb',
'alexaSkill',
'alexaSmartHome',
'cloudwatchEvent',
'cloudwatchLog',
'cloudFront',
'cognitoUserPool',
'eventBridge',
'iot',
'iotFleetProvisioning',
'kafka',
'stream',
'msk',
'rabbitmq',
's3',
'schedule',
'sns',
'sqs',
'websocket'
]
supported_events

['httpApi',
 'http',
 'activemq',
 'alb',
 'alexaSkill',
 'alexaSmartHome',
 'cloudwatchEvent',
 'cloudwatchLog',
 'cloudFront',
 'cognitoUserPool',
 'eventBridge',
 'iot',
 'iotFleetProvisioning',
 'kafka',
 'stream',
 'msk',
 'rabbitmq',
 's3',
 'schedule',
 'sns',
 'sqs',
 'websocket']

In [5]:
filtered_df = filtered_df[filtered_df['event'].isin(supported_events)]
filtered_df

Unnamed: 0,project_id,function_name,event
0,0xEduardo_nodeless_serverless,hello,s3
1,0xEduardo_nodeless_serverless,hello,s3
2,0xayot_waas_serverless,hello,http
3,0xayot_waas_serverless,retrieveAddres,http
4,0xayot_waas_serverless,generateAddress,http
...,...,...,...
2594,peakshift_makers_bolt_fun_serverless,upload-image-url,http
2595,peakshift_makers_bolt_fun_serverless,test-something,http
2596,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,hello,httpApi
2597,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,hello2,httpApi


In [6]:
# group the filtered DataFrame by the 'event' column
grouped_df = filtered_df.groupby('event').agg({
    'project_id': 'count'  # Count of occurrences
}).reset_index()

# rename the columns for clarity
grouped_df = grouped_df.rename(columns={'project_id': 'count'})

# calculate the total count of runtimes
total_count = grouped_df['count'].sum()

# add a new column "occurrence" with the percentage values
grouped_df['occurrence'] = (grouped_df['count'] / total_count) * 100

# sort df by the "occurrence" column in descending order
grouped_df = grouped_df.sort_values(by='occurrence', ascending=False)

# reset the index
grouped_df = grouped_df.reset_index(drop=True)
grouped_df

Unnamed: 0,event,count,occurrence
0,http,1431,66.158114
1,httpApi,391,18.076745
2,schedule,178,8.229311
3,websocket,40,1.849283
4,sqs,27,1.248266
5,sns,25,1.155802
6,s3,22,1.017106
7,eventBridge,17,0.785945
8,stream,11,0.508553
9,cloudwatchEvent,9,0.416089
