## Test of automatic release doc maker

### Step 0: Set initial varibles and load modules
Before we begin, lets import the needed modules and set the variables needed to comnnect

In [3]:
# Set debug level to either minimize or maximize output
debug = 0

# Import the needed modules
import httpx
import re
import os
from dotenv import load_dotenv, find_dotenv
import pandas as pd

# Load environment variables from .env file
dotenv_path = find_dotenv()
if not dotenv_path:
    raise FileNotFoundError("The .env file does not exist. Please create it and add the necessary variables.")
load_dotenv(dotenv_path)

organization = os.getenv('ORG')
project = os.getenv('PRJ')
repository = os.getenv('REP')
personal_access_token = os.getenv('PAT')

# Check if all required environment variables are set
if not all([organization, project, repository, personal_access_token]):
    raise EnvironmentError("One or more environment variables are missing. Please check your .env file.")

# Set the current and former release branch we like to compare
current_branch_name = 'releases/release/release_20250211_163813'  
former_branch_name = 'release/release_2024W46'  

### Step 1: Get all Pull Requests for the Release Branch
First, you'll need to get all pull requests for your specific release branch. Here's a Python script to do that:  
Here we get all the commits for the `current_branch_name` and the all the commits from the `former_branch_name` and the subtracts all the former commits, ending up with the commits in the latest release


In [None]:
def get_commits(branch_name):
    url = f'https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repository}/commits'
    params = {
        'searchCriteria.itemVersion.version': branch_name,
        'api-version': '7.1',
        '$top': 100,
        '$skip': 0
    }

    commits = []
    while True:
        response = httpx.get(url, params=params, auth=('', personal_access_token))
        
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            print(response.json())
            break
        
        response_data = response.json()
        
        if 'value' not in response_data:
            print("Error: 'value' key not found in the response")
            print(response_data)
            break
        
        commits.extend(response_data['value'])

        if len(response_data['value']) < params['$top']:
            break

        params['$skip'] += params['$top']
    
    return commits

# Get commits from the former and current release branches
former_commits = get_commits(former_branch_name)
current_commits = get_commits(current_branch_name)

# Extract commit IDs from the former release branch
former_commit_ids = {commit['commitId'] for commit in former_commits}

# Filter out commits that are already in the former release branch
new_commits = [commit for commit in current_commits if commit['commitId'] not in former_commit_ids]

# Print the new commits
for commit in new_commits:
    commit['date'] = commit['committer']['date']
    if debug >= 1:
        print(f"Commit ID: {commit['commitId']}, Date: {commit['committer']['date']}, Message: {commit['comment']}")

df_new_commits = pd.DataFrame(new_commits)

print(f"Found {len(new_commits)} commits in {current_branch_name}")

### Step 2: Get Work Items Related to Pull Requests
Next, you'll need to get the work items related to each pull request. You can do this by querying the work items associated with each pull request:

The trick here is to make it iterative. Since not all `commits` has a `work item` we need to loop over the `parents` to find the parent commit at see if that has work item, and if not - do it again

In [None]:
work_items = []
commits_without_work_items = []

# Regular expression to find work item IDs in the commit message
work_item_pattern = re.compile(r'items:\s*#(\d+)')

def find_work_items(commit, original_commit_id, organization, project, repository, personal_access_token, debug=0):
    commit_id = commit['commitId']
    commit_url = f'https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repository}/commits/{commit_id}?api-version=7.1'
    commit_response = httpx.get(commit_url, auth=('', personal_access_token))
    
    if debug == 2:
        print(commit_url)
    
    if commit_response.status_code == 200:
        commit_details = commit_response.json()
        comment = commit_details.get('comment', '')
        parents = commit_details.get('parents', [])
        
        # Add the parent commit ID to the commit dictionary
        if parents:
            commit['parentCommitId'] = parents[0]  # Assuming you want the first parent if there are multiple
        else:
            commit['parentCommitId'] = None  # Handle case where there are no parents
        
        # Debug: Print commit message
        if debug >= 2:
            print(f"Commit ID: {commit_id}, Message: {comment}")
        
        # Find all work item IDs in the commit message
        work_item_ids = work_item_pattern.findall(comment)
        if work_item_ids:
            for work_item_id in work_item_ids:
                work_items.append({"id": int(work_item_id), "commitId": str(commit_id), "originalCommitId": original_commit_id})
        else:
            commits_without_work_items.append(commit)
            # Recursively check parent commits
            if commit['parentCommitId']:
                parent_commit = {'commitId': commit['parentCommitId']}
                find_work_items(parent_commit, original_commit_id, organization, project, repository, personal_access_token, debug)
    else:
        print(f"Failed to fetch commit details for commit {commit_id}: {commit_response.text}")

# Example usage
for commit in new_commits:
    find_work_items(commit, commit['commitId'], organization, project, repository, personal_access_token, debug)

for item in work_items:
    if debug >= 1:
        print(f"Work Item ID: {item['id']}, Commit ID: {item['commitId']}, Original Commit ID: {item['originalCommitId']}")

df_work_items = pd.DataFrame(work_items)

print(f"Found {len(work_items)} work items")

# Print commits without work items
print(f"Found {len(commits_without_work_items)} commits without work items")
for commit in commits_without_work_items:
    if debug >= 1:
        print(f"Commit ID: {commit['commitId']}, Date: {commit['committer']['date']}")



Once we have found all our `work items` lets use that information and update our original list of new commits in this release

In [None]:
# Here we join the information on the commitId and originalCommitId
df_merged = pd.merge(df_new_commits, df_work_items, left_on='commitId', right_on='originalCommitId', how='left')
# Here we clean up the new merged dataframe, by renameing some columms amd remove redundant coloums
df_merged = df_merged.rename(columns={'id': 'workItem','commitId_x': 'commitId', 'commitId_y': 'parrent_commitId'}).drop(['originalCommitId'], axis=1)

Once we have found all the `work items`, we then need to find out wheter it is a type `Story`, `Bug` or `Task`  
However, since many `commits` resolve to fewer `work items`, lets first get a unique list of `work items` to test.

In [None]:
unique_work_items = [{'id': work_item} for work_item in df_merged['workItem'].unique().tolist()]

updated_work_items = []

for item in unique_work_items:
    work_item_id = int(item['id'])
    work_item_updates_url = f'https://dev.azure.com/{organization}/{project}/_apis/wit/workItems/{work_item_id}?api-version=7.1'
    if debug >= 2:
        print(work_item_updates_url)
    work_item_updates_response = httpx.get(work_item_updates_url, auth=('', personal_access_token))
    
    if work_item_updates_response.status_code == 200:
        try:
            work_item = work_item_updates_response.json()
            new_info = {
                'Title': work_item['fields']['System.Title'],
                'Type': work_item['fields']['System.WorkItemType'],
                'State': work_item['fields']['System.State'],
                'Assigned to': work_item['fields'].get('System.AssignedTo', {}).get('displayName', 'Ikke tildelt'),
                'Area' : work_item['fields'].get('System.AreaPath')
                }
            item.update(new_info)
            updated_work_items.append(item)
        except (KeyError, ValueError, TypeError) as e:
            print(f"Error processing work item {work_item_id}: {e}")
    else:
        print(f"Failed to fetch work item updates for work item {work_item_id}: {work_item_updates_response.text}")

if debug >= 1:
    for item in updated_work_items:
        print(item)

#df_workitems = pd.DataFrame(updated_work_items)

print(f"Updated {len(updated_work_items)} work items")

work_items_other = [item for item in updated_work_items if item['Type'] != 'Task']
work_items_task =  list(filter(lambda item: item['Type'] == 'Task', updated_work_items))

print(f"{len(work_items_other)} work items are of type Story/Bug")
print(f"{len(work_items_task)} work items are of type Task")

If it is a type `Store` og `Bug` then it is fine.  
If it is a type `Task`, then we need to find the parent `work item` and see what type than is.

### Step 3: Find Parent Story or Bug for Each Task
For each task, there was not a `Story` og `Bug` we need to find the parent story or bug.  
This is a bit tricky, since we need to iterate back over the parent relation, until we find a `work item` of type `Story` og `Bug`.  
Here we use our prior list of work item there is type `Task`.

In [None]:
task_ids = [task['id'] for task in work_items_task]

In [None]:
def get_work_item_updates(work_item_id):
    url = f'https://dev.azure.com/{organization}/{project}/_apis/wit/workitems/{work_item_id}/updates'
    params = {
        'api-version': '7.1'
    }
    response = httpx.get(url, params=params, auth=('', personal_access_token))
    return response.json()

def get_parent_work_item(task_updates):
    parent_id = None
    for update in task_updates['value']:
        if 'relations' in update:
            for relation in update['relations'].get('added', []):
                if relation['rel'] == 'System.LinkTypes.Hierarchy-Reverse':
                    parent_url = relation['url']
                    parent_id = parent_url.split('/')[-1]
            for relation in update['relations'].get('removed', []):
                if relation['rel'] == 'System.LinkTypes.Hierarchy-Reverse' and parent_id == relation['url'].split('/')[-1]:
                    parent_id = None
    if parent_id:
        return get_work_item_details(parent_id)
    return None

def get_work_item_details(work_item_id):
    url = f'https://dev.azure.com/{organization}/{project}/_apis/wit/workitems/{work_item_id}'
    params = {
        'api-version': '7.1'
    }
    response = httpx.get(url, params=params, auth=('', personal_access_token))
    return response.json()

# Extract task IDs
task_ids = [task['id'] for task in work_items_task]
# Fetch updates for each task
tasks = [get_work_item_updates(task_id) for task_id in task_ids]
# Generate parent_work_items list
parent_work_items = [(task_id, get_parent_work_item(task)) for task_id, task in zip(task_ids, tasks)]

parent_story_items = []

# Print the parent Work Items
for task_id, parent in parent_work_items:
    if parent and 'id' in parent and 'fields' in parent:
        parent_story_items.append({
            "task_id": task_id,
            "parent_id": parent["id"],
            "title": parent['fields']['System.Title'],
            "type": parent['fields']['System.WorkItemType'],
            'State': parent['fields']['System.State'],
            'Assigned to': parent['fields'].get('System.AssignedTo', {}).get('displayName', 'Ikke tildelt'),
            'Area' : work_item['fields'].get('System.AreaPath')
        })
        if debug >= 1:
            print(f"Task ID: {task_id}, Parent ID: {parent['id']}, Title: {parent['fields']['System.Title']}, Type: {parent['fields']['System.WorkItemType']}")
    else:
        print(f"Task ID: {task_id}, No parent found or missing expected fields")

## Step 4: Combining the data

Let's start combining the data back together.  
Now we have list of out `commits` and `work items`. We have found out which `work items` was of type `Story` or `Bug`, and we have found the parent `Story` or `Bug` for ther rest of the `work items` of type `Task`.

This is easier to to, when using dataframes.

In [None]:
df_work_items_other = pd.DataFrame(work_items_other)
df_parent_story_items = pd.DataFrame(parent_story_items)

df_merged_1 = pd.merge(df_merged, df_work_items_other, left_on='workItem', right_on='id', how='left')
df_merged_2 = pd.merge(df_merged_1, df_parent_story_items, left_on='workItem', right_on='task_id', how='left')

# Now a bit ugly code to make the merged df look better
# First lets combine the columns
def combine_and_drop_columns(df, column_pairs):
    """
    Combine columns and drop the redundant ones.

    Parameters:
    df (pd.DataFrame): The DataFrame to modify.
    column_pairs (list of tuples): List of tuples where each tuple contains the columns to combine.
                                   The first column in the tuple will be kept, and the second will be dropped.

    Returns:
    pd.DataFrame: The modified DataFrame.
    """
    for col1, col2 in column_pairs:
        df[col1] = df[col1].combine_first(df[col2])
        df.drop(columns=[col2], inplace=True)
    return df

# Define the column pairs to combine
column_pairs = [('id', 'task_id'), ('Title', 'title'), ('Type', 'type'),('parent_id','id'),('State_x','State_y'),('Assigned to_x','Assigned to_y'),('Area_x','Area_y')]

# Combine and drop columns
df_merged_2 = combine_and_drop_columns(df_merged_2, column_pairs)

# Change the type of a column
df_merged_2['parent_id'] = df_merged_2['parent_id'].astype('int64')

# Rename the merged columns
df_final = df_merged_2.rename(columns={'State_x':'State', 'Assigned to_x':'Assigned to', 'parent_id':'ID', 'Area_x':'Area'})


Then, let's save the list as an `Excel` file

In [None]:
# Create Excel filename from branch name
filename = './release/' + current_branch_name.split('/')[-1]
df_final.to_excel(filename + '.xlsx', index=False)

Now. let's format the result, so we better can share the result with the business.  
Here we format the ourput to use `Markdown` but it could easily be changed to eg. `HTML` if that is preferred.

In [None]:
# Import the nesecary library to display Markdown
from IPython.display import Markdown, display

# Select the required columns and drop duplicates, and then sort on 'Type' and then by 'Story_id'
df_release = df_final[['Type', 'ID', 'Title', 'State', 'Assigned to','Area']].drop_duplicates().sort_values(by=['Type', 'ID'])

# Function to convert Story_id to hyperlink
df_release['ID'] = df_release.apply(lambda x: f"[{x['ID']}](https://dev.azure.com/Semler-Gruppen/Insight/_workitems/edit/{x['ID']})", axis=1)

# Split the DataFrame into Bugs and Stories
df_bugs = df_release[df_release['Type'] == 'Bug'].drop(columns=['Type'])
df_stories = df_release[df_release['Type'] == 'User Story'].drop(columns=['Type'])

# Convert each DataFrame to Markdown format
bugs_markdown = df_bugs.to_markdown(index=False)
stories_markdown = df_stories.to_markdown(index=False)

# Create the complex Markdown structure
markdown_output = f"""
# {current_branch_name}

Here is the content of the last release `{current_branch_name}`

## Bug

{bugs_markdown}

## Story

{stories_markdown}
"""

# Display the Markdown output
display(Markdown(markdown_output))

#### Summary
1. Get all pull requests for the release branch.
2. Get tasks related to these pull requests.
3. Find the parent story or bug for each task.

In [None]:
# Save the Markdown output to a file named release.md
with open(filename + '.md', 'w', encoding='utf-8') as file:
    file.write(markdown_output)