In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from ck_analysis import get_repo_id

def weighted_average(group, value_col, weight_col):
    return (group[value_col] * group[weight_col]).sum() / group[weight_col].sum()

def get_ck_over_time(repo_name, agg_func):
    df = pd.read_csv(f'../output/versioned/{repo_name}.csv')
    df.columns = df.columns.str.strip()

    # Filtrar datos no deseados
    df = df[df['type'] != 'enum']
    df = df[~df['class'].str.contains('.dto.', na=False)]
    df = df[['tag_date', 'wmc', 'dit', 'noc', 'cbo', 'rfc', 'lcom', 'loc']]
    df.columns = ['TagDate', 'WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM', 'LOC']
    df['TagDate'] = pd.to_datetime(df['TagDate'])

    # Agrupar según la fecha y aplicar la función agregada
    #df = df.groupby('TagDate').agg(agg_func, numeric_only=True).reset_index()

     # Crear un diccionario de funciones de agregación
    agg_funcs = {metric: lambda x, col=metric: weighted_average(df.loc[x.index], col, 'LOC') 
                for metric in ['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']}

    # Calcular el promedio ponderado por fecha usando groupby y agg
    df = df.groupby('TagDate').agg(agg_funcs).reset_index()

    # Crear columna de mes
    df['MONTH'] = df['TagDate'].dt.to_period('M')

    # Obtener el primer registro de cada mes basado en la fecha más temprana
    df = df.loc[df.groupby('MONTH')['TagDate'].idxmax()]

    # Convertir 'MONTH' de Period a Timestamp
    df['MONTH'] = df['MONTH'].dt.to_timestamp()

    return df.set_index('MONTH')[['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']]

def get_quality_indicator_over_time(repo_name):

    df = pd.read_csv('../output/static_analisys/raw/static_analisys_summary.csv')
    # Strip any leading or trailing spaces from column names
    df.columns = df.columns.str.strip()
    df['CREATE_DTTM'] = pd.to_datetime(df['CREATE_DTTM'])
   # create a new column named REPO_NAME and set it to a funcition applied to APPLICATION_NAME
    df['REPO_NAME'] = df['APPLICATION_NAME'].apply(get_repo_id)
    df = df[df['REPO_NAME'] == repo_name]
    df = df[df['BRANCH_TYPE'].isin(['master', 'release', 'hotfix'])]

    # Crear columna de mes
    df['MONTH'] = df['CREATE_DTTM'].dt.to_period('M')

    # Obtener el primer registro de cada mes basado en la fecha más temprana
    df = df.loc[df.groupby('MONTH')['CREATE_DTTM'].idxmax()]

    # Convertir 'MONTH' de Period a Timestamp
    df['MONTH'] = df['MONTH'].dt.to_timestamp()

    return df.set_index('MONTH')[['QUALITY_INDICATOR', 'HIGH_SEVERITY_ISSUES_QTY', 'MEDIUM_SEVERITY_ISSUES_QTY', 'LOW_SEVERITY_ISSUES_QTY']]


def get_deploys_over_time(repo_name):
    df = pd.read_csv(f'../output/deploys/{repo_name}_deploys.csv')
    df.columns = df.columns.str.strip()
    df['PERIOD_DATE'] = pd.to_datetime(df['PERIOD_DATE'])
    df['ROLLBACK_RATE'] = df['ROLLBACKED_DEPLOYS'] / df['DEPLOYS']
    df['ROLLBACK_RATE'] = df['ROLLBACK_RATE'].fillna(0)

    # Asegurarse de que también esté en formato mensual
    df['MONTH'] = df['PERIOD_DATE'].dt.to_period('M')

    # Convertir 'MONTH' de Period a Timestamp
    df['MONTH'] = df['MONTH'].dt.to_timestamp()

    return df[['MONTH', 'ROLLBACKED_DEPLOYS', 'ROLLBACK_RATE']]


repo_names = ['pa-as-yo-go-ap', 'pa-te-ap', 'pr-ap', 'sp-ut-pr-ap', 'pa-ta-ap']
agg_funcs = ['mean', 'median', 'max']

for repo_name in repo_names:
    # Cargar datos
    agg_func = 'mean'
    corr_method = "kendall"  # pearson kendall spearman
    df_ck = get_ck_over_time(repo_name, agg_func)
    df_deploys = get_deploys_over_time(repo_name)
    df_quality = get_quality_indicator_over_time(repo_name)

    # Unir ambos dataframes en base al mes
    df_merged = pd.merge(df_ck, df_deploys, on='MONTH', how='inner')
    df_merged = pd.merge(df_merged, df_quality, on='MONTH', how='inner')

    # Calcular correlaciones
    correlations = df_merged.corr(method=corr_method).round(2)
    correlations = correlations[['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM', 'QUALITY_INDICATOR', 'HIGH_SEVERITY_ISSUES_QTY', 'MEDIUM_SEVERITY_ISSUES_QTY', 'LOW_SEVERITY_ISSUES_QTY']]
    correlations = correlations[correlations.index.isin(['ROLLBACK_RATE', 'ROLLBACKED_DEPLOYS', 'QUALITY_INDICATOR', 'HIGH_SEVERITY_ISSUES_QTY', 'MEDIUM_SEVERITY_ISSUES_QTY', 'LOW_SEVERITY_ISSUES_QTY'])]

    # Crear el mapa de calor
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlations, annot=True, cmap='coolwarm', center=0, vmin=-1, vmax=1)

    # Personalizar el gráfico
    plt.title(f'Correlacion {corr_method} - {repo_name} - {agg_func}')
    plt.tight_layout()

    # Mostrar el gráfico
    plt.show()
