In [6]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def generate_mock_jira_data(start_date, end_date):
    # Set up date range
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # Set up team members
    team_members = {
        'Junior Engineer 1': 'junior',
        'Junior Engineer 2': 'junior',
        'Mid Engineer 1': 'mid',
        'Mid Engineer 2': 'mid',
        'Senior Engineer 1': 'senior',
        'Senior Engineer 2': 'senior'
    }
    
    # Set up story point ranges
    story_point_ranges = {
        'junior': (3, 8),
        'mid': (5, 10),
        'senior': (10, 12)
    }
    
    # Generate sprints
    sprint_start = start_date
    sprints = []
    sprint_number = 1
    while sprint_start < end_date:
        sprint_end = sprint_start + timedelta(days=13)
        sprints.append({
            'Sprint': f'Sprint {sprint_number}',
            'Start Date': sprint_start,
            'End Date': sprint_end
        })
        sprint_start = sprint_end + timedelta(days=1)
        sprint_number += 1
    
    sprints_df = pd.DataFrame(sprints)
    
    # Generate issues
    issues = []
    issue_key = 1
    
    for _, sprint in sprints_df.iterrows():
        # Determine if it's a vacation week (let's say it's the first week of July)
        is_vacation_week = sprint['Start Date'].month == 7 and sprint['Start Date'].day <= 7
        
        for member, level in team_members.items():
            # Skip 3 random team members during vacation week
            if is_vacation_week and np.random.random() < 0.5:
                continue
            
            # Generate story points for this team member for this sprint
            story_points_range = story_point_ranges[level]
            
            # Special case for Senior Engineer 2 (alternating pattern)
            if member == 'Senior Engineer 2':
                if int(sprint['Sprint'].split()[1]) % 2 == 0:
                    story_points_total = 3  # Every even-numbered sprint, only 3 story points
                else:
                    story_points_total = np.random.randint(10, 13)  # Normal range for odd-numbered sprints
            else:
                story_points_total = np.random.randint(story_points_range[0], story_points_range[1] + 1)
            
            while story_points_total > 0:
                story_points = np.random.choice([1, 2, 3, 5, 8], p=[0.1, 0.2, 0.3, 0.3, 0.1])
                if story_points > story_points_total:
                    story_points = story_points_total
                
                created_date = sprint['Start Date'] + timedelta(days=np.random.randint(0, 14))
                resolved_date = created_date + timedelta(days=np.random.randint(1, 14))
                if resolved_date > sprint['End Date']:
                    resolved_date = None
                
                # Higher probability of completion to make forecast closer to actuals
                completion_probability = 0.9
                
                issues.append({
                    'Issue Key': f'PROJ-{issue_key}',
                    'Issue Type': np.random.choice(['Bug', 'Task', 'Story'], p=[0.3, 0.3, 0.4]),
                    'Status': 'Done' if resolved_date and np.random.random() < completion_probability else np.random.choice(['To Do', 'In Progress', 'In Review'], p=[0.2, 0.6, 0.2]),
                    'Priority': np.random.choice(['High', 'Medium', 'Low'], p=[0.2, 0.6, 0.2]),
                    'Created Date': created_date,
                    'Resolved Date': resolved_date,
                    'Sprint': sprint['Sprint'],
                    'Story Points': story_points,
                    'Assignee': member,
                    'Component': np.random.choice(['Frontend', 'Backend', 'Database'], p=[0.4, 0.4, 0.2]),
                    'Sprint Start Date': sprint['Start Date'],
                    'Sprint End Date': sprint['End Date'],
                    'Quarter': f'Q{(sprint["Start Date"].month - 1) // 3 + 1}'
                })
                
                issue_key += 1
                story_points_total -= story_points
    
    return pd.DataFrame(issues)

# Generate data for January to October 2024
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 10, 31)
jira_data = generate_mock_jira_data(start_date, end_date)

# Save to CSV
jira_data.to_csv('mock_jira_data.csv', index=False)

# Print first few rows and data info
print(jira_data.head())
print(jira_data.info())

# Calculate and print some statistics
print("\nTotal Story Points per Quarter:")
print(jira_data.groupby('Quarter')['Story Points'].sum())

print("\nTotal Story Points per Team Member:")
print(jira_data.groupby('Assignee')['Story Points'].sum().sort_values(ascending=False))

print("\nAverage Story Points per Sprint:")
print(jira_data.groupby('Sprint')['Story Points'].mean())

# Print story points for Senior Engineer 2 by sprint
senior_engineer_2_points = jira_data[jira_data['Assignee'] == 'Senior Engineer 2'].groupby('Sprint')['Story Points'].sum()
print("\nStory Points for Senior Engineer 2 by Sprint:")
print(senior_engineer_2_points)

  Issue Key Issue Type       Status Priority Created Date Resolved Date  \
0    PROJ-1      Story         Done     High   2024-01-07    2024-01-09   
1    PROJ-2      Story  In Progress   Medium   2024-01-05           NaT   
2    PROJ-3       Task  In Progress   Medium   2024-01-13           NaT   
3    PROJ-4      Story  In Progress     High   2024-01-14           NaT   
4    PROJ-5       Task         Done   Medium   2024-01-02    2024-01-13   

     Sprint  Story Points           Assignee Component Sprint Start Date  \
0  Sprint 1             3  Junior Engineer 1  Database        2024-01-01   
1  Sprint 1             4  Junior Engineer 1  Frontend        2024-01-01   
2  Sprint 1             3  Junior Engineer 2  Database        2024-01-01   
3  Sprint 1             1  Junior Engineer 2  Frontend        2024-01-01   
4  Sprint 1             5     Mid Engineer 1   Backend        2024-01-01   

  Sprint End Date Quarter  
0      2024-01-14      Q1  
1      2024-01-14      Q1  
2      2

In [36]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load the data
df = pd.read_csv('mock_jira_data.csv')

# Convert date columns to datetime
date_columns = ['Created Date', 'Resolved Date', 'Sprint Start Date', 'Sprint End Date']
for col in date_columns:
    df[col] = pd.to_datetime(df[col])

# Extract sprint number for sorting and labeling
df['Sprint Number'] = df['Sprint'].str.extract(r'(\d+)').astype(int)

# Simplify engineer names
df['Simplified Engineer'] = df['Assignee'].replace({
    'Junior Engineer 1': 'Junior 1',
    'Junior Engineer 2': 'Junior 2',
    'Mid Engineer 1': 'Mid 1',
    'Mid Engineer 2': 'Mid 2',
    'Senior Engineer 1': 'Senior 1',
    'Senior Engineer 2': 'Senior 2'
})

# 1. Stacked Bar Chart: Engineers' Contributions per Sprint
plt.figure(figsize=(24, 10), facecolor='none')  # Set figure facecolor to none for transparency

# Define a color palette that matches the screenshot more closely
color_palette = ['#264653', '#2A9D8F', '#E9C46A', '#F4A261', '#E76F51', '#8AB17D']

sprint_contributions = df.pivot_table(values='Story Points', index='Sprint Number', columns='Simplified Engineer', aggfunc='sum')
sprint_contributions = sprint_contributions.sort_index()

# Create the plot with a transparent background
ax = sprint_contributions.plot(kind='bar', stacked=True, width=0.8, color=color_palette)
ax.set_facecolor('none')  # Set axes background to transparent
fig = ax.get_figure()
fig.patch.set_alpha(0.0)  # Set figure background to transparent

plt.title('Engineer Contributions per Sprint', fontsize=20, fontweight='bold')
plt.xlabel('Sprint Number', fontsize=16, fontweight='bold')
plt.ylabel('Story Points', fontsize=16, fontweight='bold')

# Adjust legend
legend = plt.legend(title='Engineer', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)
legend.get_title().set_fontweight('bold')
for text in legend.get_texts():
    text.set_fontweight('bold')

# Adjust x-axis labels to show only sprint numbers
plt.xticks(range(len(sprint_contributions)), sprint_contributions.index, rotation=0, fontsize=12, fontweight='bold')

# Make y-axis labels bold
plt.yticks(fontsize=12, fontweight='bold')

# Add gridlines for better readability
plt.grid(axis='y', linestyle='--', alpha=0.3)

# Adjust layout to prevent cutting off labels
plt.tight_layout()
plt.subplots_adjust(right=0.85)  # Adjust right margin for legend

plt.savefig('engineer_contributions_per_sprint.png', dpi=300, bbox_inches='tight', transparent=True)
plt.close()

print("Updated visualization with transparent background and bolder text has been saved as 'engineer_contributions_per_sprint.png'.")


# 2. Burndown Chart: Forecast vs Actuals Sprint by Sprint
plt.figure(figsize=(20, 10), facecolor='none')  # Increased size and set transparent background

# Calculate total points and completed points per sprint
sprint_total = df.groupby('Sprint Number')['Story Points'].sum()
df['Completed'] = df['Story Points'].where(df['Status'] == 'Done', 0)
sprint_completed = df.groupby('Sprint Number')['Completed'].sum()

# Calculate cumulative completed points and remaining points
cumulative_completed = sprint_completed.cumsum()
remaining_points = sprint_total.sum() - cumulative_completed

# Create forecast (stairstep) extending to the end of the year
total_points = sprint_total.sum()
sprints = sprint_total.index
last_sprint_date = df['Sprint End Date'].max()
sprints_to_year_end = pd.date_range(start=last_sprint_date, end='2024-12-31', freq='14D')
total_sprints = len(sprints) + len(sprints_to_year_end)
forecast = [total_points - (i * total_points / total_sprints) for i in range(total_sprints)]

# Plotting
ax = plt.gca()
ax.set_facecolor('white')  # Set axes background to white for better visibility
ax.patch.set_alpha(0.7)  # Make the white background slightly transparent

plt.step(range(total_sprints), forecast, where='post', label='Forecast', marker='o', color='blue', linewidth=3, markersize=8)
plt.plot(sprints, remaining_points, label='Actual', marker='o', color='red', linewidth=3, markersize=8)

plt.title('Sprint-by-Sprint Burndown: Forecast vs Actual', fontsize=20, fontweight='bold')
plt.xlabel('Sprint', fontsize=16, fontweight='bold')
plt.ylabel('Remaining Story Points', fontsize=16, fontweight='bold')

all_sprint_labels = [str(i) for i in range(1, total_sprints + 1)]  # Simplified sprint labels
plt.xticks(range(total_sprints), all_sprint_labels, rotation=45, ha='right', fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')

# Make axis labels bold
ax.xaxis.label.set_weight('bold')
ax.yaxis.label.set_weight('bold')

# Make tick labels bold
for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontweight('bold')

# Create legend and make its text bold
legend = plt.legend(fontsize=14, loc='upper right')
for text in legend.get_texts():
    text.set_fontweight('bold')

plt.grid(True, alpha=0.5, linestyle='--')  # More visible grid

# Add some padding to the plot
plt.xlim(-0.5, total_sprints - 0.5)
plt.ylim(0, total_points * 1.1)

# Adjust layout
plt.tight_layout()

# Save the figure with a transparent background
plt.savefig('sprint_burndown.png', dpi=300, bbox_inches='tight', transparent=True)
plt.close()

print("Updated burndown chart with improved visibility and bold text has been saved as 'sprint_burndown.png'.")

Updated visualization with transparent background and bolder text has been saved as 'engineer_contributions_per_sprint.png'.
Updated burndown chart with improved visibility and bold text has been saved as 'sprint_burndown.png'.


<Figure size 2400x1000 with 0 Axes>