# Análisis de código fuente de repositorios (entornos de ejecución)
Este análisis netamente considera los entornos de ejecución de los despliegues indicados en cada archivo de configuración relevante de cada repositorio

## Cargar dependencias

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Cargar archivo CSV de entrada

In [2]:
# generamos DF a partir de CSV de entrada
input_file = '../csvs/runtimes.csv'
raw_df = pd.read_csv(input_file)
raw_df

Unnamed: 0,project_id,provider,runtime,stage,region
0,0xEduardo_nodeless_serverless,aws,nodejs12.x,,us-east-1
1,0xayot_waas_serverless,aws,nodejs12.x,dev,us-east-1
2,20minutes_serverless-github-check_serverless,aws,nodejs22.x,,"${opt:region, 'eu-west-1'}"
3,20minutes_serverless-provisioned-memory-report...,aws,nodejs22.x,,"${opt:region, 'eu-west-1'}"
4,2amigos_laravel-mail-api_serverless,aws,,,us-east-1
...,...,...,...,...,...
752,brazucas_brz_gg_serverless,aws,nodejs18.x,dev,us-east-1
753,tsanghan_sctp-ce6-mod3_12_serverless,aws,nodejs20.x,dev,ap-southeast-1
754,tsanghan_sctp-ce6-mod3_6-serverless-app_server...,aws,nodejs20.x,,ap-southeast-1
755,Nyholm_sfhackday_com_serverless,aws,provided.al2,prod,eu-central-1


### Preprocesamiento

In [3]:
# define a mapping dictionary for runtime merging
runtime_mapping = {
    '${env:NODE_RUNTIME, \'nodejs12.x\'}': 'nodejs',
    '${opt:runtime, \'nodejs14.x\' }': 'nodejs',
    '${opt:runtime, \'nodejs18.x\' }': 'nodejs',
    '${file(../sls.global.yml):aws_nodejs_runtime}': 'nodejs',
    '${self:custom.CONF.${self:custom.CORE.profile}.runtime, \'nodejs16.x\'}': 'nodejs',
    '${self:custom.param.NODE_VERSION}': 'nodejs',
    'nodejs${env:NODE_VERSION, \'12\' }.x': 'nodejs',
    'nodejs${self:custom.runtime.${opt:stage, self:provider.stage}}': 'nodejs',
    '${env:PYTHON_VERSION}': 'python',
    '${file(../sls.global.yml):aws_python_runtime}': 'python',
    '${env:RUNTIME}': 'custom',
    '${file(../../serverless.common.yml):provider.runtime}': 'custom',
    '${file(../shared/provider.yml):runtime}': 'custom',
    '${file(serverless-common.yml):runtime}': 'custom',
    '${self:custom.provider.runtime}': 'custom',
    '${self:custom.settings.common.runtime}': 'custom',
    'provided': 'custom',
    'provided.al2': 'custom',
    'dotnet6': 'dotnet',
    'dotnetcore1.0': 'dotnet',
    'dotnetcore2.0': 'dotnet',
    'dotnetcore2.1': 'dotnet',
    'dotnetcore3.1': 'dotnet',
    'go1.x': 'go',
    'rust': 'custom',
    'ruby': 'ruby',
    'ruby2.5': 'ruby',
    'ruby2.7': 'ruby',
    'ruby3.2': 'ruby',
    'haskell': 'custom',   # added 'haskell' to 'custom'
    'java11': 'java',  # merged 'java11', 'java17', 'java8' into 'java'
    'java17': 'java',
    'java8': 'java',
    'node8.10': 'nodejs',   # merged various 'nodejs' versions into 'nodejs'
    'nodejs10': 'nodejs',
    'nodejs10.17': 'nodejs',
    'nodejs10.x': 'nodejs',
    'nodejs12.13': 'nodejs',
    'nodejs12.x': 'nodejs',
    'nodejs14.x': 'nodejs',
    'nodejs16.x': 'nodejs',
    'nodejs18.x': 'nodejs',
    'nodejs4.3': 'nodejs',
    'nodejs6.10': 'nodejs',
    'nodejs8.10': 'nodejs',
    'nodejs8.12': 'nodejs',
    'nodejs[10|12].x': 'nodejs',
    'python2.7': 'python',   # merged various 'python' versions into 'python'
    'python3.10': 'python',
    'python3.11': 'python',
    'python3.6': 'python',
    'python3.7': 'python',
    'python3.8': 'python',
    'python3.9': 'python',
    'python:3.8': 'python',
}

# create a new column 'merged_runtime' based on the mapping
raw_df['merged_runtime'] = raw_df['runtime'].map(runtime_mapping)

# group by 'merged_runtime'
grouped_by_runtime_df = raw_df.groupby('merged_runtime').size().reset_index(name='count')
grouped_by_runtime_df

Unnamed: 0,merged_runtime,count
0,custom,20
1,dotnet,4
2,go,12
3,java,16
4,nodejs,426
5,python,94
6,ruby,7


Copiamos este DataFrame para convertir el tipo de dato de las columnas a numérico

In [4]:
df = grouped_by_runtime_df.copy()
df['count'] = pd.to_numeric(df['count'], errors='coerce')

In [5]:
# calculate the total count of runtimes
total_count = df['count'].sum()

# add a new column "occurrence" with the percentage values
df['occurrence'] = (df['count'] / total_count) * 100

# sort df by the "occurrence" column in descending order
df = df.sort_values(by='occurrence', ascending=False)

# reset the index
df = df.reset_index(drop=True)
df

Unnamed: 0,merged_runtime,count,occurrence
0,nodejs,426,73.57513
1,python,94,16.234888
2,custom,20,3.454231
3,java,16,2.763385
4,go,12,2.072539
5,ruby,7,1.208981
6,dotnet,4,0.690846


In [6]:
# guardamos el nuevo DataFrame en un nuevo CSV
output_file = '../temp_data/eda_functions_runtimes.csv'
df.to_csv(output_file, index=False)