In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sys.path.append('../')
from ck_analysis import get_repo_id

    # Definir la función de promedio ponderado
def weighted_average(group, value_col, weight_col):
    return (group[value_col] * group[weight_col]).sum() / group[weight_col].sum()

def plot_ck_over_time(repo_name, date_range):

    df = pd.read_csv(f'../output/versioned/{repo_name}.csv')
    # Strip any leading or trailing spaces from column names
    df.columns = df.columns.str.strip()

    # Remove any rows where the type is enum
    df = df[df['type'] != 'enum']
    # Remove any rows where the class name contains ".dto." 
    df = df[~df['class'].str.contains('.dto.')]

    df = df[['tag_date', 'tag_name', 'class', 'wmc', 'dit', 'noc', 'cbo', 'rfc', 'lcom', 'loc']]
    df.columns = ['TagDate', 'TagName', 'ClassName', 'WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM', 'LOC']
    df['TagDate'] = pd.to_datetime(df['TagDate'])

    # Group by date and calculate the median for each group, excluding non-numeric columns
    median_by_date = df.groupby('TagDate').median(numeric_only=True)

    # Plot the median values over time
    plt.figure(figsize=(12, 6))
    for column in median_by_date.columns:
        plt.plot(median_by_date.index, median_by_date[column], label=column)

    plt.title(f'Median Values Over Time - {repo_name}')
    plt.xlabel('Date')
    plt.ylabel('Median Value')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # Group by date and calculate the median for each group, excluding non-numeric columns
    mean_by_date = df.groupby('TagDate').mean(numeric_only=True)

    # Plot the median values over time
    plt.figure(figsize=(12, 6))
    for column in mean_by_date.columns:
        plt.plot(mean_by_date.index, mean_by_date[column], label=column)

    plt.title(f'Mean Values Over Time - {repo_name}')
    plt.xlabel('Date')
    plt.ylabel('Mean Value')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # Crear un diccionario de funciones de agregación
    agg_funcs = {metric: lambda x, col=metric: weighted_average(df.loc[x.index], col, 'LOC') 
                for metric in ['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']}

    # Calcular el promedio ponderado por fecha usando groupby y agg
    weighted_avg_by_date = df.groupby('TagDate').agg(agg_funcs)

    # Plot the median values over time
    plt.figure(figsize=(12, 6))
    for column in weighted_avg_by_date.columns:
        plt.plot(weighted_avg_by_date.index, weighted_avg_by_date[column], label=column)

    plt.title(f'Weighted Average Values Over Time - {repo_name}')
    plt.xlabel('Date')
    plt.ylabel('Weighted Average')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_quality_indicator_over_time(repo_name, date_range):

    df = pd.read_csv('../output/static_analisys/raw/static_analisys_summary.csv')
    # Strip any leading or trailing spaces from column names
    df.columns = df.columns.str.strip()
    df['CREATE_DTTM'] = pd.to_datetime(df['CREATE_DTTM'])
   # create a new column named REPO_NAME and set it to a funcition applied to APPLICATION_NAME
    df['REPO_NAME'] = df['APPLICATION_NAME'].apply(get_repo_id)
    df = df[df['REPO_NAME'] == repo_name]
    df = df[df['BRANCH_TYPE'].isin(['master', 'release', 'hotfix'])]

    # Plot values of QUALITY_INDICATOR over time, using CREATE_DTTM as the x-axis
    plt.figure(figsize=(12, 6))
    #sns.lineplot(data=df, x='CREATE_DTTM', y='QUALITY_INDICATOR', label='Quality Indicator')
    sns.lineplot(data=df, x='CREATE_DTTM', y='HIGH_SEVERITY_ISSUES_QTY', label='High')
    sns.lineplot(data=df, x='CREATE_DTTM', y='MEDIUM_SEVERITY_ISSUES_QTY', label='Medium')
    #sns.lineplot(data=df, x='CREATE_DTTM', y='LOW_SEVERITY_ISSUES_QTY', label='Low')

    #df['QUALITY_INDICATOR_DIFF'] = df['QUALITY_INDICATOR'].diff()
    # Graficar la derivada de QUALITY_INDICATOR
    #sns.lineplot(data=df, x='CREATE_DTTM', y='QUALITY_INDICATOR_DIFF', label='Quality Indicator Diff')

    plt.title(f'Quality Indicator Over Time - {repo_name}')
    plt.xlabel('Date')
    plt.ylabel('Quality Indicator')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

      # Plot values of QUALITY_INDICATOR over time, using CREATE_DTTM as the x-axis
    plt.figure(figsize=(12, 6))

    sns.lineplot(data=df, x='CREATE_DTTM', y='LOW_SEVERITY_ISSUES_QTY', label='Low')

    #df['QUALITY_INDICATOR_DIFF'] = df['QUALITY_INDICATOR'].diff()
    # Graficar la derivada de QUALITY_INDICATOR
    #sns.lineplot(data=df, x='CREATE_DTTM', y='QUALITY_INDICATOR_DIFF', label='Quality Indicator Diff')

    plt.title(f'Quality Indicator Over Time - {repo_name}')
    plt.xlabel('Date')
    plt.ylabel('Quality Indicator')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


def plot_mi_over_time(repo_name, date_range):

    df = pd.read_csv(f'../output/mi_versioned/{repo_name}.csv')
    # Strip any leading or trailing spaces from column names
    df.columns = df.columns.str.strip()

    df = df[['tag_date', 'tag_name', 'ClassName', 'TotalLOC', 'AverageMI']]
    df.columns = ['TagDate', 'TagName', 'ClassName', 'LOC', 'MI']
    df['TagDate'] = pd.to_datetime(df['TagDate'])

        # Crear un diccionario de funciones de agregación
    agg_funcs = {metric: lambda x, col=metric: weighted_average(df.loc[x.index], col, 'LOC') 
                for metric in ['MI']}

    # Calcular el promedio ponderado por fecha usando groupby y agg
    weighted_avg_by_date = df.groupby('TagDate').agg(agg_funcs)

    # Plot the median values over time
    plt.figure(figsize=(12, 6))
    for column in weighted_avg_by_date.columns:
        plt.plot(weighted_avg_by_date.index, weighted_avg_by_date[column], label=column)

    plt.title(f'Weighted Average Over Time - {repo_name}')
    plt.xlabel('Date')
    plt.ylabel('Weighted Average')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()



def plot_deploys_over_time(repo_name, date_range):

    df = pd.read_csv(f'../output/deploys/{repo_name}_deploys.csv')
    # Strip any leading or trailing spaces from column names
    df.columns = df.columns.str.strip()
    df['PERIOD_DATE'] = pd.to_datetime(df['PERIOD_DATE'])

    plt.figure(figsize=(12, 6))
    sns.lineplot(data=df, x='PERIOD_DATE', y='DEPLOYS', label='Deploys', color='green')
    sns.lineplot(data=df, x='PERIOD_DATE', y='ROLLBACKED_DEPLOYS', label='Rollbacks', color='red')

    for line in plt.gca().lines:
        for x, y in zip(line.get_xdata(), line.get_ydata()):
            plt.text(x, y, f'{y:.0f}', fontsize=9, ha='right')

    plt.title(f'Deploys vs Rollbacks - {repo_name}')
    plt.xlabel('Date')
    plt.xticks(date_range, date_range.strftime('%b %Y'), rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

repo_name = 'pa-ta-ap'
date_range = pd.date_range(start='2023-01-01', end='2024-12-31', freq='M')
plot_ck_over_time(repo_name, date_range)
plot_mi_over_time(repo_name, date_range)
plot_quality_indicator_over_time(repo_name, date_range)
plot_deploys_over_time(repo_name, date_range)

