In [30]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
import datetime
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from collections import defaultdict

In [32]:
df = pd.read_csv(rf'Path to the input file')

In [33]:
def parse_json_column(column):
    return column.apply(lambda x: json.loads(x) if isinstance(x, str) and x.startswith("[") else [])

df['comments_parsed'] = parse_json_column(df['comments'])
df['activity_log_parsed'] = parse_json_column(df['activity_log'])
df['commits_parsed'] = parse_json_column(df['commits'])

In [34]:
def prepare_dates(dataframe):
    dataframe['created_dt'] = pd.to_datetime(dataframe['created'])
    dataframe['updated_dt'] = pd.to_datetime(dataframe['updated'])
    return dataframe

df = prepare_dates(df)

In [35]:
def plot_weekly_comments(dataframe):
    dataframe['week'] = dataframe['created_dt'].dt.to_period('W').apply(lambda r: r.start_time)
    comment_counts = dataframe.groupby('week')['comments_parsed'].apply(lambda x: sum(len(c) for c in x))
    
    plt.figure(figsize=(12, 6))
    plt.plot(comment_counts.index, comment_counts.values, marker='o', linewidth=2)
    plt.title('Weekly Comment Activity')
    plt.xlabel('Week')
    plt.ylabel('Number of Comments')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [36]:
def plot_weekly_transitions(dataframe):
    dataframe['week'] = dataframe['created_dt'].dt.to_period('W').apply(lambda r: r.start_time)
    transition_counts = dataframe.groupby('week')['activity_log_parsed'].apply(lambda x: sum(len(a) for a in x))

    plt.figure(figsize=(12, 6))
    plt.plot(transition_counts.index, transition_counts.values, marker='s', linewidth=2, color='orange')
    plt.title('Weekly Status Transitions')
    plt.xlabel('Week')
    plt.ylabel('Number of Transitions')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [37]:
def plot_weekly_commits(dataframe):
    dataframe['week'] = dataframe['created_dt'].dt.to_period('W').apply(lambda r: r.start_time)
    commit_counts = dataframe.groupby('week')['commits_parsed'].apply(lambda x: sum(len(c) for c in x))

    plt.figure(figsize=(12, 6))
    plt.plot(commit_counts.index, commit_counts.values, marker='^', linewidth=2, color='green')
    plt.title('Weekly Commits Linked to Issues')
    plt.xlabel('Week')
    plt.ylabel('Number of Commits')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [38]:
def plot_weekly_resolution_time(dataframe):
    dataframe['resolution_days'] = (dataframe['updated_dt'] - dataframe['created_dt']).dt.total_seconds() / (60 * 60 * 24)
    dataframe['week'] = dataframe['created_dt'].dt.to_period('W').apply(lambda r: r.start_time)
    resolution_by_week = dataframe.groupby('week')['resolution_days'].mean()

    plt.figure(figsize=(12, 6))
    plt.plot(resolution_by_week.index, resolution_by_week.values, marker='D', linewidth=2, color='purple')
    plt.title('Average Resolution Time per Week (days)')
    plt.xlabel('Week')
    plt.ylabel('Resolution Time (days)')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [44]:
def plot_change_density(dataframe):
    activity_counts = dataframe['activity_log_parsed'].apply(len)
    history_parsed = parse_json_column(dataframe['history'])
    history_counts = history_parsed.apply(len)
    total_changes = activity_counts + history_counts

    plt.figure(figsize=(12, 6))
    total_changes.plot(kind='hist', bins=20, color='steelblue', edgecolor='black')
    plt.title('Change Density per Issue (Activity + History)')
    plt.xlabel('Number of Changes')
    plt.ylabel('Number of Issues')
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()

In [46]:
def plot_comment_resolution_correlation(dataframe):
    dataframe['comment_count'] = dataframe['comments_parsed'].apply(len)
    dataframe['resolution_days'] = (dataframe['updated_dt'] - dataframe['created_dt']).dt.total_seconds() / (60 * 60 * 24)

    plt.figure(figsize=(8, 6))
    plt.scatter(dataframe['comment_count'], dataframe['resolution_days'], alpha=0.6, color='teal')
    plt.title('Comments vs. Resolution Time')
    plt.xlabel('Number of Comments')
    plt.ylabel('Resolution Time (days)')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()

In [47]:
def count_reopenings(log):
    visited = []
    count = 0
    for entry in log:
        to_status = entry.get('to', '')
        if to_status in visited:
            count += 1
        visited.append(to_status)
    return count

def plot_reopenings(dataframe):
    dataframe['reopens'] = dataframe['activity_log_parsed'].apply(count_reopenings)
    plt.figure(figsize=(10, 5))
    dataframe['reopens'].value_counts().sort_index().plot(kind='bar', color='tomato')
    plt.title('Task Reopenings (Returning to Previous Status)')
    plt.xlabel('Number of Reopens')
    plt.ylabel('Number of Issues')
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()

In [48]:
def plot_update_gaps(dataframe):
    dataframe['update_gap'] = (dataframe['updated_dt'] - dataframe['created_dt']).dt.total_seconds() / (60 * 60 * 24)
    plt.figure(figsize=(10, 5))
    dataframe['update_gap'].plot(kind='hist', bins=20, color='darkorange', edgecolor='black')
    plt.title('Update Gaps: Created vs. Last Update')
    plt.xlabel('Gap Duration (days)')
    plt.ylabel('Number of Issues')
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()

In [49]:
def plot_created_vs_concluded(dataframe):
    
    dataframe['week'] = dataframe['created_dt'].dt.to_period('W').apply(lambda r: r.start_time)
    created_counts = dataframe.groupby('week').size()

    concluded = dataframe[dataframe['status'] == 'Concluído']
    concluded['week'] = concluded['updated_dt'].dt.to_period('W').apply(lambda r: r.start_time)
    concluded_counts = concluded.groupby('week').size()

    plt.figure(figsize=(15, 8))
    
    plt.plot(created_counts.index, created_counts.values, label='Created', 
             linewidth=3, marker='o', linestyle='-', markersize=8)
    
    plt.plot(concluded_counts.index, concluded_counts.values, label='Concluded', 
             linewidth=3, marker='s', linestyle='--', markersize=8)
    
    plt.xlabel('Month', fontsize=16, labelpad=10)
    plt.ylabel('Number of Tasks', fontsize=16, labelpad=10)
    
    plt.tick_params(axis='both', which='major', labelsize=14)
    
    plt.legend(fontsize=14)
    
    plt.grid(True, linestyle='--', alpha=0.6)
    
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b/%Y'))
    plt.xticks(rotation=45)
    
    plt.margins(x=0.01, y=0.1)
    
    plt.tight_layout()
    plt.show()

In [50]:
def plot_comments_vs_reopens(dataframe):
    dataframe['comments_parsed'] = parse_json_column(dataframe['comments'])
    dataframe['activity_log_parsed'] = parse_json_column(dataframe['activity_log'])

    dataframe['comment_count'] = dataframe['comments_parsed'].apply(len)
    dataframe['reopen_count'] = dataframe['activity_log_parsed'].apply(count_reopenings)

    plt.figure(figsize=(8, 6))
    plt.scatter(dataframe['comment_count'], dataframe['reopen_count'], alpha=0.7, color='mediumseagreen')
    plt.title('Comments vs. Reopen Count per Task')
    plt.xlabel('Number of Comments')
    plt.ylabel('Number of Reopens')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()