In [1]:
from crewai import Agent, Task, Crew, LLM, Process
from src.agents.tools.data_extraction import JiraDataExtraction
from src.agents.tools.data_processing import JiraDataProcessing
from src.agents.tools.data_analysis import ListJiraReports, ReadJiraReport, SaveJiraData, JsonFileOperations
import yaml
import os
import json
import glob
import pandas as pd
from datetime import datetime
from crewai.tools import tool
from IPython.display import Markdown

### LLM Setup

In [2]:
#Setting roxy variables
os.environ["OPENAI_API_URL"] = "http://127.0.0.1:8899/v1"
os.environ["OPENAI_API_BASE"] = "http://127.0.0.1:8899/v1"
os.environ["OPENAI_API_KEY"] = "dummy-key"

In [3]:

llm = LLM(model = 'gpt-4o') #'o1-preview')

### Analytics Engineer agent

Tasks: Data Ingestion and Data Processing

In [4]:
analytics_engineer_agent = Agent(
    role="Senior Jira Analytics Engineer",
    goal="""
    Transform and optimize Jira project data into structured, analysis-ready formats while ensuring data quality, 
    completeness, and compliance with best practices for project analyst consumption.
    """,
    backstory="""
    You are a specialized Data Integration Engineer with over 10 years of experience in Jira data processing and analytics. 
    Your core expertise includes:
    
    Technical Skills:
    - Advanced Jira API integration and data extraction
    - Data cleaning, transformation, and validation
    - CSV and Markdown report generation
    - Data quality assurance and validation
    
    Domain Knowledge:
    - Deep understanding of Jira data structures and relationships
    - Expertise in agile project management metrics
    - Strong background in data documentation and reporting
    
    Best Practices:
    - Implements robust error handling and data validation
    - Ensures data consistency and standardization
    - Maintains clear documentation and audit trails
    - Follows data privacy and security guidelines
    
    Your primary focus is on delivering high-quality, actionable data that enables project analysts 
    to make informed decisions and generate valuable insights from Jira project information.
    """,
    verbose=True,
    llm=llm,
    allow_delegation=False  # Added to ensure direct handling of sensitive data
)


In [5]:
data_ingestion_task = Task(
    description="""
    Extract and process Jira project data to create a structured CSV file containing the board overview.

    Key Objectives:
    1. Connect to Jira and extract project board data
    2. Generate a clean, well-structured CSV file
    3. Ensure all relevant project information is captured

    Required Action:
    Use the ingest_board_overview() action from the JiraDataExtraction tool with these mandatory parameters:
    - project_id: {project_id} (Jira project identifier)
    - labels: {labels} (List of labels to filter issues)

    Success Criteria:
    - Successfully connected to Jira API
    - Data extracted without errors
    - CSV file created with proper formatting
    - All specified labels included in the extraction
    - Data properly sanitized and structured

    Output Format:
    The CSV file should contain:
    - One row per issue/item
    - Consistent date formats
    - No missing or corrupted data
    - UTF-8 encoding

    """,
    expected_output="Full path to the generated CSV file containing the Jira board overview data",
    agent=analytics_engineer_agent,
    tools=[JiraDataExtraction()]
)


In [6]:
data_processing_task = Task(
    description="""
    Convert the Jira CSV data into individual team-specific markdown reports for the project analyst's review.

    Key Objectives:
    1. Filter Jira updates between {start_date} and {end_date}
    2. Generate separate markdown reports for each team
    3. Ensure reports contain only relevant updates within the specified date range

    Required Action:
    Use the create_teams_markdowns() action from the JiraDataProcessing tool with these parameters:
    - csv_file: Path to the previously generated CSV file
    - start_date: {start_date} (format: YYYY-MM-DD)
    - end_date: {end_date} (format: YYYY-MM-DD)

    Success Criteria:
    - Each team should have its own markdown report
    - Reports should only include updates within the specified date range
    - Reports should be properly formatted for analyst review
    - All markdown files should be saved in an organized directory structure

    Note: Ensure the CSV file exists and is accessible before processing.
    """,
    expected_output="Directory path containing the generated markdown reports",
    agent=analytics_engineer_agent,
    tools=[JiraDataProcessing()],
    context=[data_ingestion_task]  # This task will wait for data_ingestion_task to complete

)

### Project Analyst Agent

In [7]:
project_analyst = Agent(
    role="Senior Project Performance Analyst",
    goal="""
    Deliver comprehensive, data-driven analysis of team performance through Jira metrics, providing actionable insights 
    and maintaining effective communication channels with development teams to drive continuous improvement.
    """,
    backstory="""
    You are a seasoned Project Performance Analyst with 10+ years of experience in agile environments, specializing in 
    team productivity analysis and process optimization.

    Technical Expertise:
    - Advanced analysis of Jira metrics and team performance indicators
    - Deep understanding of agile development workflows and metrics
    - Expert in interpreting issue relationships and dependencies
    - Proficient in identifying patterns and trends in project data

    Analytical Skills:
    - Strategic issue analysis and root cause identification
    - Data-driven decision-making and recommendation formulation
    - Sprint performance evaluation and optimization
    - Risk assessment and mitigation strategy development

    Communication Excellence:
    - Diplomatic and effective feedback delivery
    - Clear and concise reporting style
    - Engaging and positive communication approach
    - Ability to maintain professional relationships while ensuring accountability

    Personal Attributes:
    - Known for combining professionalism with approachability
    - Maintains a positive, solution-focused mindset
    - Demonstrates emotional intelligence in team interactions
    - Balances humor with professionalism in communications
    - Expert at delivering constructive feedback without creating tension

    Best Practices:
    - Ensures all analyses are backed by concrete data
    - Maintains confidentiality and data security
    - Provides context-aware recommendations
    - Follows up systematically while maintaining positive team dynamics
    - Creates actionable, specific, and measurable improvement plans
    """,
    verbose=True,
    allow_delegation=False,
    llm=llm,
    allow_code_execution=False
)


In [8]:
from pathlib import Path
from typing import Tuple

start_date = "2025-02-03"
end_date = "2025-02-08"

md_path = "teams-markdown/" + start_date + "_to_" + end_date
json_path = "teams-json/" + start_date + "_to_" + end_date

#def check_corresponding_files(md_path: str, json_path: str) -> str:

    
def check_corresponding_files(result: str) -> Tuple[bool, str]:
    """
    Check if there are corresponding .json files for each .md file by comparing filenames without extensions.
    Lists the markdown files that don't have corresponding json files, including their two parent folders.
    
    Args:
        md_path (str): Path to the directory containing markdown files
        json_path (str): Path to the directory containing json files
    
    Returns:
        str: A message indicating either unprocessed markdown files or confirmation that all files are processed
    """
    # Get all md files and convert to lowercase for case-insensitive comparison
    md_files = {
        f.stem.lower(): f"{f.parent.parent.name}/{f.parent.name}/report_{f.stem}.md" 
        for f in Path(md_path).glob("*.md")
    }
    
    # Get all json files and convert to lowercase for case-insensitive comparison
    json_files = {f.stem.lower() for f in Path(json_path).glob("*.json")}
    
    # Find unprocessed markdown files (preserving path structure)
    unprocessed_files = [md_files[name] for name in md_files.keys() if name not in json_files]
    
    if unprocessed_files:
        return (False, f"ERROR: The following reports still need to be processed: {', '.join(unprocessed_files)}")
    else:
        return (True, "SUCCESS: All the json files were generated as expected")

In [9]:
fup_generation_task = Task(
    description="""
    ## Task Overview
    Analyze team Jira reports and generate structured follow-ups between {start_date} and {end_date}.

    ## Phase 1: Report Collection and Tracking
    1. Use list_reports() action from ListJiraReports tool to get all markdown files:
       ```
       team_reports = list_reports("path_to_markdown_files")
       ```
    2. IMPORTANT: Create a processing tracking list:
       * Store the total number of reports to process: total_reports = len(team_reports)
       * Keep track of processed reports: processed_reports = 0
       * You must process ALL reports before completing the task

    ## Phase 2: Sequential Report Processing
    CRITICAL: You must follow this sequence for EACH report in team_reports:
    1. Get current report path: current_report = team_reports[processed_reports]
    2. Read report using read_report() from ReadJiraReport tool:
       ```
       team_data = read_report(current_report)
       ```
    3. Process report completely before moving to the next one
    4. After processing and saving, increment: processed_reports += 1
    5. Verify: processed_reports matches the report being handled

    ## Processing Checkpoints
    After each team processing:
    * Confirm JSON was saved successfully
    * Report current progress: "Processed <processed_reports> out of <total_reports> teams"
    * List the name of the team just processed
    * DO NOT proceed to next team until current team is fully processed
    
    Process the report according to the following structure:
    ```
    ## Team name: <team>
    ### Points of Contact
    [Contact list]
    ### Updated Issues
    [Recent updates]
    ### Not Updated Issues
    [Pending updates]
    ### This ends all the issues from the team <team> ###

    ## Phase 3: Analysis Workflow
    For each team report:

    1. EXTRACT CORE DATA
       * Team name (exact match)
       * Points of Contact list
       * Updated issues section
       * Non-updated issues section
       * Parent-child issue mappings

    2. PERFORM ISSUE ANALYSIS
       For each parent issue:
       * Analyze issue description
       * Review all comments
       * Examine child issues
       * Generate follow-up addressing:
         * Current progress
         * Blocking issues
         * Required clarifications
         * Specific action items
         * Hugo Zanini is the current project manager of multi-repos. He looks into the cards weekly. If there are comments from him, probably, are things you need to follow up too.


    3. GENERATE JSON OUTPUT
       Create a structured dictionary:
       * Team Information:
         * name: Exact team name
         * contacts: [@firstnamelastname format] #Example: John Doe -> @johndoe
       
       * Issue Arrays:
         * updated_issues: Recent activity
         * no_update_issues: Pending updates
         
       * Issue Details:
         * id: "DBPD-737"
         * title: Complete issue title
         * url: "https://nubank.atlassian.net/browse/DBPD-737"
         * workstream: Specific workstream
         * fup: Contextual follow-up comment
    

     4. SAVE & VERIFY OUTPUT
         Use the tool save_data, from the SaveJiraData tool to save the json data.
         
       * Execute save_data:
         ```
         save_data(
             data_dict=team_data, #In the dictionary format
             file_name= <team-name>.json",
             base_path = teams_json
             folder= {start_date}_to_{end_date}"
         )
         ```
       * Verify:
         * File exists in teams_json/{start_date}_to_{end_date}/<team_name>.json
         * Filename matches team
         * All fields present and valid

     ## Quality Guidelines
        1. Processing Guarantees:
           * Process teams in the exact order they appear in team_reports
           * Never skip any team in the sequence
           * Maintain a clear count of processed teams
           * Report progress after each team completion
           * Verify each team's JSON exists before proceeding

        2. Completion Requirements:
            * Task is NOT complete until processed_reports equals total_reports
            * Generate summary of all processed teams at the end
            * Verify all team JSONs exist in the output directory
            * Report any processing issues immediately
    
        3. Follow-up Quality:
           * Write professional, friendly communications
           * Use @firstnamelastname format for mentions. Example: John Doe -> @johndoe
           * Provide specific, actionable feedback
           * Reference relevant context and updates
           * Keep messages short and focused
           * Use appropriate corporate humor (relaxed but professional)
           * Include emojis for better readability
           * Bold critical information
           * Use bullet points for organization
           * Formatting Rules:
                * Always use "\n" for line breaks
                * Always use "•" for bullet points
                * Always bold titles with *
                * Always include relevant emojis
    
        4. Output Requirements:
           * Only proceed to the next team report once you have saved the json of the team being analyzed
           * Ensure JSON structure matches template
           * Validate all required fields present
           * Verify contact format compliance; **always** refer to people in the format @firstnamelastname.

    ## Progress Tracking Format
        After each team processing, report the following in the beginning of the next action:
        ```
        Team Processing Status:
        - Team Name: [team name]
        - Progress: [X] of [Y] teams processed
        - JSON Status: Saved successfully at [path]
        - Next Team: [next team name or "Task Complete"]
        
    # CRITICAL: YOU MUST NOT COMPLETE THE TASK UNTIL ALL TEAMS ARE PROCESSED

        """,

    expected_output= """
            Save the team JSON on the following format:
            {{
                "name": "Exact Team Name",
                "contacts": ["@firstnamelastname", "@firstnamelastname"], 
                "updated_issues": [
                    {{
                        "id": "ISSUE-KEY",
                        "title": "Exact Issue Title",
                        "url": "https://nubank.atlassian.net/browse/ISSUE-KEY",
                        "workstream": "Exact Workstream Name",
                        "fup": "Context-specific follow-up with @firstnamelastname mentions when needed"
                    }}
                ],
                "no_update_issues": [
                    {{
                        "id": "ISSUE-KEY",
                        "title": "Exact Issue Title",
                        "url": "https://nubank.atlassian.net/browse/ISSUE-KEY",
                        "workstream": "Exact Workstream Name",
                        "fup": "Context-specific follow-up with @firstnamelastname mentions when needed"
                    }}
                ]
            }}""",
    agent=project_analyst,
    tools = [ListJiraReports(), ReadJiraReport(), SaveJiraData()],
    context = [data_processing_task],
    guardrail = check_corresponding_files,
    max_retries=10
)

In [13]:
fups_consolidation_task = Task(
    description="""
    ## Task Overview
    Consolidate all team-specific JSON reports into a single comprehensive JSON file for the period {start_date} to {end_date}.

    ## Objective
    Create a unified report that combines all individual team JSONs while maintaining data integrity and structure.

    ## Required Action
    Use the generate_consolidated_report action from the SaveJiraData tool with these parameters:
    ```
    generate_consolidated_report(
        base_path=teams_json,  # Directory containing individual team JSONs
        folder="{start_date}_to_{end_date}"  # Target period folder
    ) teams_json/{start_date}_to_{end_date}/slack_message.
    ```

    ## Success Criteria
    1. All individual team JSONs successfully merged
    2. Consolidated file maintains original data structure
    3. No data loss during consolidation
    4. Proper file naming and location

    ## Validation Steps
    - Verify consolidated JSON exists
    - Confirm all teams are included
    - Check data integrity
    - Validate JSON format

    # These keywords must never be translated and transformed:
        - Action:
        - Thought:
        - Action Input:
        because they are part of the thinking process instead of the output. 

    """,
    expected_output="""
    Path to the consolidated JSON file.
    """,
    agent=project_analyst,
    tools = [SaveJiraData()],
    context = [fup_generation_task]
)


In [14]:
report_generation_task = Task(
    description="""
   # Slack Message Generation Task
    Generate a formatted slack message in the JSON format from the consolidated JSON generated in the previous task.

    ## Initial Data Loading
    CRITICAL - First Step:
    1. Get the path of the consolidated JSON file generated by the previous task
    2. Use this path to load the data using read_json action:
       ```
       consolidated_data = read_json(file_path=<path from previous task>)
       print("Data loaded successfully from previous task's output")
       ```

    ## Data Processing Sequence
    1. Data Cleaning:
       REQUIRED: Remove all teams without points of contact from consolidated_data
       STORE: Keep the filtered teams data for later use

    2. Workstream Analysis:
       A. CONSOLIDATE DATA:
          * Extract consolidated report with follow-ups
          * Separate work items by workstream:
            - Multi-repos Incremental
            - Multi-repos to scale

       B. ANALYZE KEY ELEMENTS:
          * Recent achievements and progress
          * Current challenges and blockers
          * Cross-team dependencies
          * Resource constraints
          * Timeline concerns

       C. CREATE WORKSTREAM SUMMARIES:
          For each workstream, generate text covering:
          * Key accomplishments
          * Major challenges
          * Action items and next steps

       D. CREATE A WORK SUMMARY:
          * Create a concise paragraph (max two sentences) summarizing the week's evolution.

    ## Message Formatting Requirements
    CRITICAL - ALL messages MUST follow these rules:
    1. Structure Rules:
       * Use "+" for string concatenation
       * Use "\n" for line breaks
       * Use "•" for bullet points
       * Bold titles with *
       * Include relevant emojis

    2. Content Guidelines:
       * Keep messages concise but specific. Executives should be able to read the summaries and understand what is going on without opening the Jira tickets.
       * Use professional yet relaxed tone
       * Include appropriate corporate humor
       * Bold critical information
       * Organize with bullet points

    ## JSON Generation
    CRITICAL - Follow these steps exactly:
    1. Create the base dictionary structure:
    {{
        "start_date": "<YYYY-MM-DD in text format>",
        "end_date": "<YYYY-MM-DD in text format>",
        "work_evolution": "Brief overview of overall progress and challenges",
        "workstreams_summary": 
            "*:signal_strength: Multi-Repos Incremental*\n\n" +
            "• *Achievement 1*: Detail\n" +
            "• *Achievement 2*: Detail\n\n" +
            "*🚧 Challenges*\n" +
            "• Challenge 1\n" +
            "• Challenge 2\n\n" +
            "*➡️ Next Steps*\n" +
            "• Action 1\n" +
            "• Action 2\n\n" +
            "*:rocket: Multi-Repos to Scale*\n\n" +
            [Same structure for second workstream],
        "teams": consolidated_data['teams']

    }}

    2. REQUIRED - Verify JSON Structure:
       * start_date and end_date are in correct format
       * work_evolution contains your summary
       * workstreams_summary follows the required format
       * teams array contains ALL teams from the original consolidated_data

    """,
    expected_output="""
    ## Save Output
    REQUIRED SEQUENCE:
    1. Generate output path:
       ```
       output_path = f"teams_json/{start_date}_to_{end_date}/slack_message.json"
       ```

    2. Save complete JSON using save_json action:
       ```
       save_json(
           file_path=output_path,
           data=final_json  # Contains ALL required data
       )
       ```

    ## Verification Checklist
    CRITICAL - Verify before completing:
    1. [ ] JSON contains ALL original teams data
    2. [ ] No data loss from original consolidated_data
    3. [ ] Only summaries and dates were updated
    4. [ ] Team structure remains unchanged
    5. [ ] File saved successfully

    Return format:
    "Slack message JSON saved successfully at: <full_path_to_json>"
    
    """,
    agent=project_analyst,
    tools=[JsonFileOperations()],
    context = [fups_consolidation_task]
)


### Crew

In [15]:
# Creating crew
jira_crew = Crew(
    agents=[analytics_engineer_agent, project_analyst],
    tasks=[data_ingestion_task, data_processing_task, 
           fup_generation_task, 
           fups_consolidation_task,
           #report_generation_task
          ],
    verbose=True,
    planning=True,
    process=Process.sequential
)

In [16]:
inputs = {
    'project_id': "14130",
    'labels': ["roadmap-mr-program-2025"],
    'start_date': start_date,
    'end_date': end_date
}

In [None]:
result = jira_crew.kickoff(inputs=inputs)