In [None]:
import pandas as pd
import sys
sys.path.append('../')
from repo_utils import checkout_master_fetch_and_reset

apps = pd.read_csv(f"../datasets/applications.csv")

for index, app in apps.iterrows():
    checkout_master_fetch_and_reset(f'../../repos/{app["APPLICATION_NAME"]}')

In [None]:
import pandas as pd
import re
from dateutil.relativedelta import relativedelta

release_version_pattern = re.compile(r'^\d+\.\d+\.\d+$')

# Suponiendo que df es tu DataFrame con las columnas: class, tag_name, tag_date, APPLICATION_NAME
df = pd.read_csv(f"../datasets/apps_ck_versions.csv")
first_release_dates = pd.read_csv(f"../datasets/applications_first_release_date.csv")

# Convertir tag_date a datetime si no lo está
df['tag_date'] = pd.to_datetime(df['tag_date'])

# Filtrar las versiones que coincidan con el patrón de versión release
release_versions = df[df['tag_name'].apply(lambda x: bool(release_version_pattern.match(x)))]

# Eliminar duplicados basados en APPLICATION_NAME y tag_name
release_versions_unique = release_versions.drop_duplicates(subset=['APPLICATION_NAME', 'tag_name'])

# Obtener la fecha del primer release para cada aplicación
#first_release_dates = df.groupby('APPLICATION_NAME')['tag_date'].min().reset_index()
#first_release_dates = first_release_dates.rename(columns={'tag_date': 'first_release_date'})

# Obtener la fecha del ultimo release para cada aplicación
last_release_dates = df.groupby('APPLICATION_NAME')['tag_date'].max().reset_index()
last_release_dates = last_release_dates.rename(columns={'tag_date': 'last_release_date'})

# Obtener la cantidad de clases de la versión más reciente para cada aplicación
latest_versions = df.sort_values('tag_date').drop_duplicates('APPLICATION_NAME', keep='last')
latest_versions_class_counts = df[df['tag_date'].isin(latest_versions['tag_date'])].groupby('APPLICATION_NAME')['class'].nunique().reset_index()
latest_versions_class_counts = latest_versions_class_counts.rename(columns={'class': 'class_count'})

# Contar el número de versiones release únicas para cada aplicación
release_version_counts = release_versions_unique.groupby('APPLICATION_NAME').size().reset_index(name='release_version_count')

# Combinar los resultados en un solo DataFrame
result = pd.merge(first_release_dates, last_release_dates, on='APPLICATION_NAME')
result = pd.merge(result, latest_versions_class_counts, on='APPLICATION_NAME')
result = pd.merge(result, release_version_counts, on='APPLICATION_NAME')
#result['first_release_date'] = result['first_release_date'].dt.strftime('%Y-%m-%d')
#result['last_release_date'] = result['last_release_date'].dt.strftime('%Y-%m-%d')

# Calcular la diferencia en meses entre el primer y el último release
def diff_months(d1, d2):
    rd = relativedelta(d2, d1)
    return rd.years * 12 + rd.months + 1  # +1 para incluir el mes de inicio

result['active_months'] = result.apply(
    lambda row: diff_months(row['first_release_date'], row['last_release_date']),
    axis=1
)

# Calcular la densidad de releases (releases por mes)
result['release_density'] = (result['release_version_count'] / result['active_months']).round(2)

# Clasificar la actividad del repositorio
def clasificar_actividad(density):
    if density >= 2:
        return '🟢 Alta'
    elif density >= 1:
        return '🟡 Media'
    else:
        return '🔴 Baja'

result['activity_level'] = result['release_density'].apply(clasificar_actividad)

# Ordenar los resultados alfabéticamente por APPLICATION_NAME
result = result.sort_values(by='activity_level', ascending=False)

# Mostrar el resultado
result
result.to_csv('../output/repos_stats_full.csv', index=False)  # Guardar el resultado en un archivo CSV

In [None]:
import pandas as pd

# Suponiendo que df es tu DataFrame con las columnas: class, tag_name, tag_date, APPLICATION_NAME
df = pd.read_csv(f"../datasets/apps_deploys.csv")

df = df[df['DEPLOY_STATUS'].isin(['finished', 'rollbacked'])]
df = df[df['DEPLOY_STRATEGY'].isin(['BlueGreenDeployment', 'InitialDeployment', 'SafeDeployment'])]
df = df[df['APPLICATION_NAME'] == 'dt-benefits-backend']

# FILTER DEPLOY_DATE during april 2024
df['DEPLOY_DATE'] = pd.to_datetime(df['DEPLOY_DATE'], format='mixed')
df = df[(df['DEPLOY_DATE'] >= '2024-10-01') & (df['DEPLOY_DATE'] < '2024-11-01')]

# sort by SERVICE_NAME and DEPLOY_DATE
df = df.sort_values(by=['SERVICE_NAME', 'DEPLOY_DATE'])

df
