## **Installations, Imports and Setup**

In [None]:
!pip install -U -q google-generativeai
!pip install -q hdbscan

In [None]:
import pandas as pd
import google.generativeai as genai
import numpy as np
from google.colab import userdata

# Clustering
from hdbscan import HDBSCAN
import hdbscan
import openai

# JIRA Ticket Integration
import requests
import json
import os

# New Data Clustering
from sklearn.metrics.pairwise import euclidean_distances

In [None]:
# Option 1: Add Gemini API Key from Google Colab Notebook Secrets
genai.configure(api_key=userdata.get('GEMINI_API_KEY'))

# Option 2: Simply add the gemini api key in the following way:
# genai.configure(api_key="YOUR_GEMINI_API_KEY_HERE")

In [None]:
# JIRA Configuration

JIRA_DOMAIN = "ultragenius.atlassian.net"
JIRA_API_TOKEN = userdata.get('JIRA_API_KEY')
# Or JIRA_API_TOKEN = 'YOUR_JIRA_API_TOKEN_HERE'
JIRA_EMAIL = "dev.chandan@ultragenius.club"
JIRA_PROJECT_KEY = "SCRUM"

## **Step 1:** Service Now Issues Reading and Embedding

In [None]:
df = pd.read_csv('data.csv')

In [None]:
df

Unnamed: 0,Description,Title,Category,Subcategory,Priority,Impact,Ticket ID
0,"The office printer is not working, showing an ...",Printer has stopped working,Hardware,Printer,Medium,Low,INC0024
1,Please grant me access to the shared resource.,Need access to a shared resource,Access,File Access,Medium,Low,INC0108
2,I'm having difficulties with the VPN.,Difficulties with VPN,Network,VPN,High,High,INC0085
3,"I can't move the cursor, my mouse isn't working.",Cannot move cursor with mouse,Hardware,Mouse,Medium,Low,INC0098
4,The camera on my laptop has stopped working.,Webcam not working on laptop,Hardware,Webcam,Medium,Low,INC0076
...,...,...,...,...,...,...,...
105,I need access to shared files.,Request access to shared files,Access,File Access,Medium,Low,INC0104
106,The laptop is lagging and freezing up.,Laptop performance lag,IT Support,Performance Issue,High,Moderate,INC0016
107,The wireless internet is not working on my lap...,Laptop Wi-Fi not connecting,Network,Wi-Fi Connectivity,High,High,INC0004
108,My built-in camera is having issues.,Issue with built in camera,Hardware,Webcam,Medium,Low,INC0081


In [None]:
# Since we will be using the title and description for clustering, we combine that into one string and save it as a new column
df['text'] = df['Title'] + ' ' + df['Description']

In [None]:
# Text preprocessing function (implement as needed)
def preprocess_text(text):
    return text.lower()

# Apply preprocessing
df['processed_text'] = df['text'].apply(preprocess_text)

In [None]:
# Listing the avaiable models for embedding using Gemini
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004


The following cell will generate the embeddings for the entire data (row wise).

This might take time, depending on the size of the data.

In [None]:
def get_gemini_embedding(text, model='models/text-embedding-004'):
    try:
        response = genai.embed_content(model=model, content=text)
        return str(response['embedding'])  # Assuming single embedding is returned
    except Exception as e:
        print(f"Error generating embedding for text: {text}")
        print(f"Error: {e}")
        return None # Or handle the error differently


# Apply the embedding function to the 'processed_text' column
df['gemini_embedding'] = df['processed_text'].apply(get_gemini_embedding)

In [None]:
# Convert embeddings to a list of numerical vectors
gemini_embeddings_num = [eval(embedding) for embedding in df['gemini_embedding']]

# Convert the list of vectors into a NumPy array
gemini_embeddings_np = np.array(gemini_embeddings_num)

In [None]:
gemini_embeddings_np.shape
# (x, 768) . x is number of rows in the data, 768 is the size of the embedding (by gemini)

(110, 768)

## **Step 2:** Clustering Algorithm

In [None]:
# Initialize and fit HDBSCAN
clusterer = HDBSCAN(min_cluster_size=5, metric='euclidean')  # Adjust parameters as needed
df['cluster'] = clusterer.fit_predict(gemini_embeddings_np)



In [None]:
cluster_summary = df.groupby('cluster').size()
print("Cluster Size Summary:\n", cluster_summary)

Cluster Size Summary:
 cluster
-1     6
 0    10
 1    10
 2    10
 3    10
 4    10
 5    10
 6    10
 7     9
 8     9
 9    16
dtype: int64


Anything that has a label of -1 is an outlier, rest are the clusters formed by analyzing the data.

**The formation of clusters are from results from fit_predict have already determined the optimal clustering based on HDBSCAN*'s internal stability calculations.**

## **Step 3:** JIRA Issue Creation.

1. Creating Parent Issues for each Cluster
2. Creating a Subtask for each Issue
3. Putting the Ticket Ids in the Parent Issue Comments (INC Number)
4. Proposing a solution in the comment of each subtask (issue).

In [None]:
# Url for creation of new issues
url = f"https://{JIRA_DOMAIN}/rest/api/3/issue"

In [None]:
# Headers
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json"
}

In [None]:
# Fetch all issue types and map them
issue_types_url = f"https://{JIRA_DOMAIN}/rest/api/3/issuetype"

issue_types_response = requests.get(
    issue_types_url,
    headers=headers,
    auth=(JIRA_EMAIL, JIRA_API_TOKEN)
)

if issue_types_response.status_code == 200:
    issue_types = issue_types_response.json()
    # Create a mapping of issue type names to their IDs
    issue_type_map = {issue_type['name']: issue_type['id'] for issue_type in issue_types}
    print("Available issue types and their IDs:", issue_type_map)
else:
    print("Failed to fetch issue types:", issue_types_response.text)
    exit()

# Correctly identify the "Subtask" issue type (case-sensitive)
if "Subtask" in issue_type_map:
    subtask_issue_type_id = issue_type_map["Subtask"]
else:
    print("Error: 'Subtask' issue type is not available in the project.")
    exit()

Available issue types and their IDs: {'Subtask': '10005', 'Task': '10001', 'Bug': '10002', 'Story': '10006', 'Epic': '10004'}


In [None]:
def prompt_gemini(prompt):
  model = genai.GenerativeModel("gemini-1.5-flash")
  response = model.generate_content(prompt)
  return response.text

In [None]:
# Iterate through the cluster to create a parent issue and then sub-tasks
for cluster_num in df['cluster'].unique():
    # Get all rows for this cluster
    cluster_df = df[df['cluster'] == cluster_num]

    ticket_ids = cluster_df['Ticket ID'].tolist()
    ticket_ids_combined = ', '.join(ticket_ids)

    # Create a summary combining titles from this cluster
    titles_combined = cluster_df['Title'].tolist()
    titles_combined = f"Cluster {cluster_num} Issues: {', '.join(titles_combined)}..."  # Taking first 3 titles as an example


    summary_from_gemini = prompt_gemini("Please combine the given text into one text within 250 characters, summarizing it appropriately. " + titles_combined)

    # Create a description combining all descriptions
    descriptions = cluster_df['Description'].tolist()
    descriptions = "\n\n".join([f"- {desc}" for desc in descriptions])

    description_summary = prompt_gemini("Please combine the given text into one text within 25000 characters, summarizing it appropriately. " + descriptions)

    full_description = f"{description_summary}\n\n Full Description: {descriptions}"

    # Ensure summary and description limits are respected
    summary = summary_from_gemini[:255] if len(summary_from_gemini) > 255 else summary_from_gemini
    description = full_description[:32767] if len(full_description) > 32767 else full_description

    # Create parent issue
    parent_payload = {
        "fields": {
            "project": {
                "key": JIRA_PROJECT_KEY
            },
            "summary": summary,  # Ensuring summary doesn't exceed Jira's limit
            "description": {
                "type": "doc",
                "version": 1,
                "content": [
                    {
                        "type": "paragraph",
                        "content": [
                            {
                                "text": f"Cluster {cluster_num} Issues:\n\n{description}",
                                "type": "text"
                            }
                        ]
                    }
                ]
            },
            "issuetype": {
                "name": "Task"
            }
        }
    }

    # Make the request to create a parent issue
    parent_response = requests.post(
        url,
        headers=headers,
        auth=(JIRA_EMAIL, JIRA_API_TOKEN),
        data=json.dumps(parent_payload)
    )

    # Save the Jira parent issue key and create sub-tasks
    if parent_response.status_code == 201:
        parent_key = parent_response.json()["key"]
        print(f"Parent issue created successfully for Cluster {cluster_num}! Key: {parent_key}")

        # Update the DataFrame with the Jira issue key
        df.loc[df['cluster'] == cluster_num, 'JIRA Issue Key'] = parent_key

        # Add ticket IDs as a comment in the parent issue
        parent_comment_payload = {
            "body": {
                "type": "doc",
                "version": 1,
                "content": [
                    {
                        "type": "paragraph",
                        "content": [
                            {
                                "text": f"Ticket IDs for this cluster: {ticket_ids_combined}",
                                "type": "text"
                            }
                        ]
                    }
                ]
            }
        }

        parent_comment_url = f"https://{JIRA_DOMAIN}/rest/api/3/issue/{parent_key}/comment"
        comment_response = requests.post(
            parent_comment_url,
            headers=headers,
            auth=(JIRA_EMAIL, JIRA_API_TOKEN),
            data=json.dumps(parent_comment_payload)
        )

        if comment_response.status_code == 201:
            print(f"Successfully added ticket IDs as a comment to the parent issue {parent_key}.")
        else:
            print(f"Failed to add comment to parent issue {parent_key}: {comment_response.status_code}")
            print(comment_response.text)

        # Create sub-tasks for each ticket in the cluster
        for ticket_id, title, desc in zip(cluster_df['Ticket ID'], cluster_df['Title'], cluster_df['Description']):
            # Format the description for Atlassian Document Format
            formatted_description = {
                "type": "doc",
                "version": 1,
                "content": [
                    {
                        "type": "paragraph",
                        "content": [
                            {
                                "text": desc,
                                "type": "text"
                            }
                        ]
                    }
                ]
            }

            subtask_payload = {
                "fields": {
                    "project": {"key": JIRA_PROJECT_KEY},
                    "parent": {"key": parent_key},  # Link to the parent issue
                    "summary": title,  # Sub-task title
                    "description": formatted_description,  # Proper Atlassian Document Format
                    "issuetype": {"id": subtask_issue_type_id}  # Use the ID for "Subtask"
                }
            }

            # Make the request to create a sub-task
            subtask_response = requests.post(
                url,
                headers=headers,
                auth=(JIRA_EMAIL, JIRA_API_TOKEN),
                data=json.dumps(subtask_payload)
            )

            if subtask_response.status_code == 201:
                subtask_key = subtask_response.json()["key"]
                print(f"Sub-task created successfully for Ticket ID {ticket_id}! Key: {subtask_key}")

                # Fetch the sub-task issue description and send it to Gemini
                subtask_issue_url = f"https://{JIRA_DOMAIN}/rest/api/3/issue/{subtask_key}"
                issue_response = requests.get(
                    subtask_issue_url,
                    headers=headers,
                    auth=(JIRA_EMAIL, JIRA_API_TOKEN)
                )

                if issue_response.status_code == 200:
                    issue_data = issue_response.json()
                    issue_description = issue_data['fields']['description']['content'][0]['content'][0]['text']

                    # Generate a proposed solution using Gemini
                    proposed_solution = prompt_gemini(
                        f"Propose a simple solution in 4 sentences for the following issue: {issue_description}"
                    )

                    # Add the proposed solution as a comment to the sub-task
                    subtask_comment_payload = {
                        "body": {
                            "type": "doc",
                            "version": 1,
                            "content": [
                                {
                                    "type": "paragraph",
                                    "content": [
                                        {
                                            "text": f"Proposed Solution: {proposed_solution}",
                                            "type": "text"
                                        }
                                    ]
                                }
                            ]
                        }
                    }

                    subtask_comment_url = f"https://{JIRA_DOMAIN}/rest/api/3/issue/{subtask_key}/comment"
                    comment_response = requests.post(
                        subtask_comment_url,
                        headers=headers,
                        auth=(JIRA_EMAIL, JIRA_API_TOKEN),
                        data=json.dumps(subtask_comment_payload)
                    )

                    if comment_response.status_code == 201:
                        print(f"Successfully added a proposed solution comment to sub-task {subtask_key}.")
                    else:
                        print(f"Failed to add comment to sub-task {subtask_key}: {comment_response.status_code}")
                        print(comment_response.text)
                else:
                    print(f"Failed to fetch issue details for sub-task {subtask_key}: {issue_response.status_code}")
                    print(issue_response.text)
            else:
                print(f"Failed to create sub-task for Ticket ID {ticket_id}: {subtask_response.status_code}")
                print(subtask_response.text)
    else:
        print(f"Failed to create parent issue for Cluster {cluster_num}: {parent_response.status_code}")
        print(parent_response.text)


Parent issue created successfully for Cluster 8! Key: SCRUM-1043
Successfully added ticket IDs as a comment to the parent issue SCRUM-1043.
Sub-task created successfully for Ticket ID INC0024! Key: SCRUM-1044
Successfully added a proposed solution comment to sub-task SCRUM-1044.
Sub-task created successfully for Ticket ID INC0030! Key: SCRUM-1045
Successfully added a proposed solution comment to sub-task SCRUM-1045.
Sub-task created successfully for Ticket ID INC0027! Key: SCRUM-1046
Successfully added a proposed solution comment to sub-task SCRUM-1046.
Sub-task created successfully for Ticket ID INC0023! Key: SCRUM-1047
Successfully added a proposed solution comment to sub-task SCRUM-1047.
Sub-task created successfully for Ticket ID INC0028! Key: SCRUM-1048
Successfully added a proposed solution comment to sub-task SCRUM-1048.
Sub-task created successfully for Ticket ID INC0022! Key: SCRUM-1049
Successfully added a proposed solution comment to sub-task SCRUM-1049.
Sub-task created suc

## **Step 4:** New Service Now Issues Reading and Embedding

In [None]:
df_new = pd.read_csv('data new issues 2.csv')

In [None]:
df_new

Unnamed: 0,Description,Title,Category,Subcategory,Priority,Impact,Ticket ID
0,Software license key is invalid.,Invalid Software License,Software,Software Licensing,High,High,LIC0001
1,Unable to activate software license.,Software Activation Failure,Software,Software Licensing,High,High,LIC0002
2,Software license expired.,Software License Expired,Software,Software Licensing,High,High,LIC0003
3,Requesting a new software license.,Software License Request,Software,Software Licensing,Medium,Low,LIC0004
4,Need assistance with software license transfer.,Software License Transfer,Software,Software Licensing,Medium,Low,LIC0005
5,Software license key not found.,Missing Software License Key,Software,Software Licensing,Medium,Low,LIC0006
6,Error message during software license installa...,Software License Installation Error,Software,Software Licensing,High,Moderate,LIC0007
7,Software license server unreachable.,Software License Server Down,Software,Software Licensing,High,High,LIC0008
8,Software license key is already in use.,Software License Key Conflict,Software,Software Licensing,Medium,Moderate,LIC0009
9,Need help understanding software license terms.,Software License Agreement Clarification,Software,Software Licensing,Low,Low,LIC0010


In [None]:
# Preprocess the new data
df_new['text'] = df_new['Title'] + ' ' + df_new['Description']
df_new['processed_text'] = df_new['text'].apply(preprocess_text)

In [None]:
# Get Gemini Embeddings
df_new['gemini_embedding'] = df_new['processed_text'].apply(get_gemini_embedding)
gemini_embeddings_new = [eval(embedding) for embedding in df_new['gemini_embedding']]

## **Step 5:** New Data Clustering

How does this algorithm work?

1. First, assign each new data point to the nearest cluster or create a new cluster if no existing cluster is close enough.  
2. After assigning all points, calculate the centroids (average position) of the updated clusters.  
3. Re-evaluate each point's assignment by comparing its distance to the updated centroids and reassign it if a better cluster exists.  
4. This ensures that points are grouped into the most appropriate clusters after considering all data.  

In [None]:
# This has to be adjusted according to data.
THRESHOLD = 0.95

In [None]:
# Step 1: Extract centroids of existing clusters
existing_clusters = df['cluster'].unique()
existing_centroids = []

for cluster_num in existing_clusters:
    if cluster_num != -1:  # Skip outliers
        cluster_points = gemini_embeddings_np[df['cluster'] == cluster_num]
        centroid = cluster_points.mean(axis=0)
        existing_centroids.append((cluster_num, centroid))

# Step 2: Initial assignment of new points
new_cluster_assignments = []

for idx, new_point in enumerate(gemini_embeddings_new):  # Assume new_embeddings_np contains embeddings for new data
    distances = [
        (cluster_num, euclidean_distances([new_point], [centroid])[0][0])
        for cluster_num, centroid in existing_centroids
    ]
    distances.sort(key=lambda x: x[1])  # Sort by distance

    # Threshold to decide if it's close enough to an existing cluster
    if distances[0][1] < THRESHOLD:
        assigned_cluster = distances[0][0]  # Assign to the closest cluster
    else:
        # If not close enough, create a new cluster
        new_cluster_number = max(existing_clusters) + 1
        assigned_cluster = new_cluster_number
        existing_clusters = np.append(existing_clusters, new_cluster_number)  # Update cluster list
        existing_centroids.append((new_cluster_number, new_point))  # Add new cluster centroid

    new_cluster_assignments.append(assigned_cluster)

df_new['cluster'] = new_cluster_assignments

# Step 3: Recalculate centroids after initial assignment
updated_centroids = {}
for cluster_num in set(df_new['cluster']):
    # Get indices of points in the current cluster
    indices = df_new.index[df_new['cluster'] == cluster_num].tolist()
    # Use indices to select points from gemini_embeddings_new
    cluster_points = np.array([gemini_embeddings_new[i] for i in indices])
    # Calculate the mean centroid for the cluster
    updated_centroids[cluster_num] = cluster_points.mean(axis=0)


# Step 4: Rebalance assignments based on updated centroids
for idx, new_point in enumerate(gemini_embeddings_new):
    distances = [
        (cluster_num, euclidean_distances([new_point], [centroid])[0][0])
        for cluster_num, centroid in updated_centroids.items()
    ]
    distances.sort(key=lambda x: x[1])  # Sort by distance
    assigned_cluster = distances[0][0]  # Assign to the closest cluster
    df_new.at[idx, 'cluster'] = assigned_cluster

In [None]:
cluster_summary = df_new.groupby('cluster').size()
print("Cluster Size Summary:\n", cluster_summary)

Cluster Size Summary:
 cluster
0      1
2      1
3      1
4      1
5      1
7      1
10    10
11     4
dtype: int64


## **Step 6:**  Verification of Correct Clustering

In [None]:
df.head(10)

Unnamed: 0,Description,Title,Category,Subcategory,Priority,Impact,Ticket ID,text,processed_text,gemini_embedding,cluster,JIRA Issue Key
0,"The office printer is not working, showing an ...",Printer has stopped working,Hardware,Printer,Medium,Low,INC0024,Printer has stopped working The office printer...,printer has stopped working the office printer...,"[0.10192397, -0.017983053, 0.023882145, -0.006...",8,SCRUM-1043
1,Please grant me access to the shared resource.,Need access to a shared resource,Access,File Access,Medium,Low,INC0108,Need access to a shared resource Please grant ...,need access to a shared resource please grant ...,"[0.031854056, 0.0036723881, -0.05087242, 0.015...",9,SCRUM-1053
2,I'm having difficulties with the VPN.,Difficulties with VPN,Network,VPN,High,High,INC0085,Difficulties with VPN I'm having difficulties ...,difficulties with vpn i'm having difficulties ...,"[0.07080625, -0.06750471, -0.0431193, -0.03540...",6,SCRUM-1070
3,"I can't move the cursor, my mouse isn't working.",Cannot move cursor with mouse,Hardware,Mouse,Medium,Low,INC0098,Cannot move cursor with mouse I can't move the...,cannot move cursor with mouse i can't move the...,"[0.060593273, -0.030656386, -0.025957093, -0.0...",5,SCRUM-1081
4,The camera on my laptop has stopped working.,Webcam not working on laptop,Hardware,Webcam,Medium,Low,INC0076,Webcam not working on laptop The camera on my ...,webcam not working on laptop the camera on my ...,"[0.07660065, -0.023592766, 0.031639587, -0.013...",4,SCRUM-1092
5,I'm unable to get a Wi-Fi signal on my device.,No Wi-Fi signal,Network,Wi-Fi Connectivity,High,High,INC0007,No Wi-Fi signal I'm unable to get a Wi-Fi sign...,no wi-fi signal i'm unable to get a wi-fi sign...,"[0.042093694, -0.046585858, -0.045194577, 0.03...",-1,SCRUM-1103
6,"I need to reset my password, I cannot access m...",Password reset for account access,Access,Account Access,Medium,Low,INC0038,Password reset for account access I need to re...,password reset for account access i need to re...,"[0.03365822, 0.015797935, -0.0228643, 0.010555...",2,SCRUM-1110
7,I have forgotten my password and am locked out...,"Password reset needed, locked out",Access,Account Access,Medium,Low,INC0032,"Password reset needed, locked out I have forgo...","password reset needed, locked out i have forgo...","[0.057538092, 0.005025697, -0.017235775, 0.019...",2,SCRUM-1110
8,"The network drive is down, I cannot find my fi...",Network drive is down,Network,File Access,High,High,INC0055,Network drive is down The network drive is dow...,network drive is down the network drive is dow...,"[0.069763936, 0.013370875, -0.008896004, 0.034...",9,SCRUM-1053
9,The webcam is not showing an image.,Webcam image not working,Hardware,Webcam,Medium,Low,INC0080,Webcam image not working The webcam is not sho...,webcam image not working the webcam is not sho...,"[0.057535276, -0.012962574, 0.025864387, -0.03...",4,SCRUM-1092


In [None]:
df_new.head(10)

Unnamed: 0,Description,Title,Category,Subcategory,Priority,Impact,Ticket ID,text,processed_text,gemini_embedding,cluster
0,Software license key is invalid.,Invalid Software License,Software,Software Licensing,High,High,LIC0001,Invalid Software License Software license key ...,invalid software license software license key ...,"[0.044763226, -0.011796819, -0.029431934, 0.03...",10
1,Unable to activate software license.,Software Activation Failure,Software,Software Licensing,High,High,LIC0002,Software Activation Failure Unable to activate...,software activation failure unable to activate...,"[0.075168885, 0.00609833, -0.025640411, -0.008...",10
2,Software license expired.,Software License Expired,Software,Software Licensing,High,High,LIC0003,Software License Expired Software license expi...,software license expired software license expi...,"[0.048967008, 0.0072030495, 0.0029084792, 0.03...",10
3,Requesting a new software license.,Software License Request,Software,Software Licensing,Medium,Low,LIC0004,Software License Request Requesting a new soft...,software license request requesting a new soft...,"[-0.010592011, -0.0052936864, 0.0016699637, 0....",10
4,Need assistance with software license transfer.,Software License Transfer,Software,Software Licensing,Medium,Low,LIC0005,Software License Transfer Need assistance with...,software license transfer need assistance with...,"[0.035767786, -0.017970959, -0.014700224, 0.04...",10
5,Software license key not found.,Missing Software License Key,Software,Software Licensing,Medium,Low,LIC0006,Missing Software License Key Software license ...,missing software license key software license ...,"[0.027402228, 0.025502907, -0.005874393, 0.032...",10
6,Error message during software license installa...,Software License Installation Error,Software,Software Licensing,High,Moderate,LIC0007,Software License Installation Error Error mess...,software license installation error error mess...,"[0.05077546, -0.00605729, -0.016597426, 0.0139...",10
7,Software license server unreachable.,Software License Server Down,Software,Software Licensing,High,High,LIC0008,Software License Server Down Software license ...,software license server down software license ...,"[0.058213685, 0.009553044, 0.021104611, 0.0296...",10
8,Software license key is already in use.,Software License Key Conflict,Software,Software Licensing,Medium,Moderate,LIC0009,Software License Key Conflict Software license...,software license key conflict software license...,"[0.04617447, 0.020274702, -0.0061735804, 0.043...",10
9,Need help understanding software license terms.,Software License Agreement Clarification,Software,Software Licensing,Low,Low,LIC0010,Software License Agreement Clarification Need ...,software license agreement clarification need ...,"[0.023461955, 0.0033461794, -0.022290785, 0.04...",10


## **Step 7:** Update or Create new JIRA Parent Issues.
1. Create new clusters (Parent Issues) over Jira if required.
2. Create new Sub-Tasks under existing or new cluster.
3. Proposing a solution in the comments of each sub task.

In [None]:
# Combine the new data with the existing data
df_combined = pd.concat([df, df_new], ignore_index=True)

In [None]:
df_combined

Unnamed: 0,Description,Title,Category,Subcategory,Priority,Impact,Ticket ID,text,processed_text,gemini_embedding,cluster,JIRA Issue Key
0,"The office printer is not working, showing an ...",Printer has stopped working,Hardware,Printer,Medium,Low,INC0024,Printer has stopped working The office printer...,printer has stopped working the office printer...,"[0.10192397, -0.017983053, 0.023882145, -0.006...",8,SCRUM-1043
1,Please grant me access to the shared resource.,Need access to a shared resource,Access,File Access,Medium,Low,INC0108,Need access to a shared resource Please grant ...,need access to a shared resource please grant ...,"[0.031854056, 0.0036723881, -0.05087242, 0.015...",9,SCRUM-1053
2,I'm having difficulties with the VPN.,Difficulties with VPN,Network,VPN,High,High,INC0085,Difficulties with VPN I'm having difficulties ...,difficulties with vpn i'm having difficulties ...,"[0.07080625, -0.06750471, -0.0431193, -0.03540...",6,SCRUM-1070
3,"I can't move the cursor, my mouse isn't working.",Cannot move cursor with mouse,Hardware,Mouse,Medium,Low,INC0098,Cannot move cursor with mouse I can't move the...,cannot move cursor with mouse i can't move the...,"[0.060593273, -0.030656386, -0.025957093, -0.0...",5,SCRUM-1081
4,The camera on my laptop has stopped working.,Webcam not working on laptop,Hardware,Webcam,Medium,Low,INC0076,Webcam not working on laptop The camera on my ...,webcam not working on laptop the camera on my ...,"[0.07660065, -0.023592766, 0.031639587, -0.013...",4,SCRUM-1092
...,...,...,...,...,...,...,...,...,...,...,...,...
125,Company phone keeps restarting.,Phone Repeatedly Restarting,Mobile,Mobile Device,High,Moderate,MOB0006,Phone Repeatedly Restarting Company phone keep...,phone repeatedly restarting company phone keep...,"[0.049068294, 0.012680858, 3.5514363e-06, -0.0...",11,
126,Tablet unresponsive to touch.,Unresponsive Tablet Touchscreen,Mobile,Mobile Device,High,Moderate,MOB0007,Unresponsive Tablet Touchscreen Tablet unrespo...,unresponsive tablet touchscreen tablet unrespo...,"[0.051861107, -0.04015347, -0.0338304, 0.01970...",5,
127,Need to unlock company phone.,Phone Unlock Request,Mobile,Mobile Device,Medium,Low,MOB0008,Phone Unlock Request Need to unlock company ph...,phone unlock request need to unlock company ph...,"[-0.021923712, 0.0033292328, 0.019099662, 0.03...",11,
128,Smartphone is locked out.,Smartphone Locked Out,Mobile,Mobile Device,High,High,MOB0009,Smartphone Locked Out Smartphone is locked out.,smartphone locked out smartphone is locked out.,"[0.04671359, -0.0065577975, -0.019082604, 0.01...",2,


In [None]:
# Update Jira issues
for cluster_num in df_new['cluster'].unique():
    cluster_df_new = df_new[df_new['cluster'] == cluster_num]
    cluster_df_existing = df[df['cluster'] == cluster_num]

    ticket_ids_new = cluster_df_new['Ticket ID'].tolist()
    ticket_ids_existing = cluster_df_existing['Ticket ID'].tolist()
    ticket_ids_combined = ', '.join(ticket_ids_existing + ticket_ids_new)

    titles_new = cluster_df_new['Title'].tolist()
    titles_existing = cluster_df_existing['Title'].tolist()
    titles_combined = f"Cluster {cluster_num} Issues: {', '.join(titles_existing + titles_new)}..."

    summary_from_gemini = prompt_gemini("Please combine the given text into one text within 250 characters, summarizing it appropriately. " + titles_combined)

    descriptions_new = cluster_df_new['Description'].tolist()
    descriptions_existing = cluster_df_existing['Description'].tolist()
    descriptions_combined = descriptions_existing + descriptions_new
    descriptions_combined_text = "\n\n".join([f"- {desc}" for desc in descriptions_combined])

    description_summary = prompt_gemini("Please combine the given text into one text within 25000 characters, summarizing it appropriately. " + descriptions_combined_text)
    full_description = f"{description_summary}\n\n Full Description: {descriptions_combined_text}"

    summary = summary_from_gemini[:255] if len(summary_from_gemini) > 255 else summary_from_gemini
    description = full_description[:32767] if len(full_description) > 32767 else full_description

    existing_jira_key = cluster_df_existing['JIRA Issue Key'].unique()
    if len(existing_jira_key) > 0:
        jira_key = existing_jira_key[0]
        payload = {
            "fields": {
                "summary": summary,
                "description": {
                    "type": "doc",
                    "version": 1,
                    "content": [
                        {
                            "type": "paragraph",
                            "content": [
                                {
                                    "text": f"Cluster {cluster_num} Issues:\n\n{description}",
                                    "type": "text"
                                }
                            ]
                        }
                    ]
                }
            }
        }

        response = requests.put(
            f"{url}/{jira_key}",
            headers=headers,
            auth=(JIRA_EMAIL, JIRA_API_TOKEN),
            data=json.dumps(payload)
        )

        if response.status_code == 204:
            print(f"Jira issue updated for Cluster {cluster_num}: {jira_key}")
        else:
            print(f"Failed to update Jira issue for Cluster {cluster_num}: {response.status_code}")
    else:
        payload = {
            "fields": {
                "project": {
                    "key": JIRA_PROJECT_KEY
                },
                "summary": summary,
                "description": {
                    "type": "doc",
                    "version": 1,
                    "content": [
                        {
                            "type": "paragraph",
                            "content": [
                                {
                                    "text": f"Cluster {cluster_num} Issues:\n\n{description}",
                                    "type": "text"
                                }
                            ]
                        }
                    ]
                },
                "issuetype": {
                    "name": "Task"
                }
            }
        }

        response = requests.post(
            url,
            headers=headers,
            auth=(JIRA_EMAIL, JIRA_API_TOKEN),
            data=json.dumps(payload)
        )

        if response.status_code == 201:
            jira_key = response.json()["key"]
            df_new.loc[df_new['cluster'] == cluster_num, 'JIRA Issue Key'] = jira_key
            print(f"New Jira issue created for Cluster {cluster_num}: {jira_key}")
        else:
            print(f"Failed to create Jira issue for Cluster {cluster_num}: {response.status_code}")

    # Add comments to parent issue
    comment_payload = {
        "body": {
            "type": "doc",
            "version": 1,
            "content": [
                {
                    "type": "paragraph",
                    "content": [
                        {
                            "text": f"Ticket IDs added: {', '.join(ticket_ids_new)}",
                            "type": "text"
                        }
                    ]
                }
            ]
        }
    }
    requests.post(
        f"{url}/{jira_key}/comment",
        headers=headers,
        auth=(JIRA_EMAIL, JIRA_API_TOKEN),
        data=json.dumps(comment_payload)
    )

    # Create sub-tasks
    for ticket_id, title, desc in zip(cluster_df_new['Ticket ID'], cluster_df_new['Title'], cluster_df_new['Description']):
        solution = prompt_gemini(f"Propose a solution for the following issue: {desc}")
        subtask_payload = {
            "fields": {
                "project": {"key": JIRA_PROJECT_KEY},
                "parent": {"key": jira_key},
                "summary": title,
                "description": {
                    "type": "doc",
                    "version": 1,
                    "content": [
                        {
                            "type": "paragraph",
                            "content": [
                                {"text": desc, "type": "text"}
                            ]
                        }
                    ]
                },
                "issuetype": {"id": subtask_issue_type_id}
            }
        }

        response = requests.post(
            url,
            headers=headers,
            auth=(JIRA_EMAIL, JIRA_API_TOKEN),
            data=json.dumps(subtask_payload)
        )

        if response.status_code == 201:
            subtask_key = response.json()["key"]
            print(f"Sub-task created for Ticket ID {ticket_id}: {subtask_key}")

            comment_payload = {
                "body": {
                    "type": "doc",
                    "version": 1,
                    "content": [
                        {
                            "type": "paragraph",
                            "content": [
                                {
                                    "text": f"Proposed Solution: {solution}",
                                    "type": "text"
                                }
                            ]
                        }
                    ]
                }
            }

            requests.post(
                f"{url}/{subtask_key}/comment",
                headers=headers,
                auth=(JIRA_EMAIL, JIRA_API_TOKEN),
                data=json.dumps(comment_payload)
            )
        else:
            print(f"Failed to create sub-task for Ticket ID {ticket_id}: {response.status_code}")


New Jira issue created for Cluster 10: SCRUM-1164
Sub-task created for Ticket ID LIC0001: SCRUM-1165
Sub-task created for Ticket ID LIC0002: SCRUM-1166
Sub-task created for Ticket ID LIC0003: SCRUM-1167
Sub-task created for Ticket ID LIC0004: SCRUM-1168
Sub-task created for Ticket ID LIC0005: SCRUM-1169
Sub-task created for Ticket ID LIC0006: SCRUM-1170
Sub-task created for Ticket ID LIC0007: SCRUM-1171
Sub-task created for Ticket ID LIC0008: SCRUM-1172
Sub-task created for Ticket ID LIC0009: SCRUM-1173
Sub-task created for Ticket ID LIC0010: SCRUM-1174
Jira issue updated for Cluster 7: SCRUM-1143
Sub-task created for Ticket ID MOB0001: SCRUM-1175
Jira issue updated for Cluster 3: SCRUM-1132
Sub-task created for Ticket ID MOB0002: SCRUM-1176
Jira issue updated for Cluster 0: SCRUM-1153
Sub-task created for Ticket ID MOB0003: SCRUM-1177
Jira issue updated for Cluster 4: SCRUM-1092
Sub-task created for Ticket ID MOB0004: SCRUM-1178
New Jira issue created for Cluster 11: SCRUM-1179
Sub-ta