In [1]:
import openai
import numpy as np
import scipy.cluster.hierarchy as sch
from typing import List
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import fcluster

import os
import openai

# Set your OpenAI API key
api_key = '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'
openai.api_key = api_key

# Set your OpenAI organization ID
organization = 'org-WcJBv1yv2tQNSc2F7BgU4kcd'

def read_text_blocks(file_path: str) -> List[str]:
    with open(file_path, 'r') as file:
        content = file.read()
    return content.split('@!@!')

def create_embeddings(text_blocks: List[str]) -> np.ndarray:
    model = SentenceTransformer('paraphrase-distilroberta-base-v2')
    embeddings = model.encode(text_blocks)
    return embeddings

def hierarchical_clustering(embeddings: np.ndarray) -> np.ndarray:
    condensed_distance_matrix = pdist(embeddings, metric='cosine')
    linkage_matrix = sch.linkage(condensed_distance_matrix, method='complete')
    return linkage_matrix

def main():
    file_path = 'all_checklists.txt'
    text_blocks = read_text_blocks(file_path)
    embeddings = create_embeddings(text_blocks)

    iteration = 1

    # Initialize clusters with all text blocks in their own cluster
    clusters = np.arange(len(text_blocks)) + 1

    while len(np.unique(clusters)) > 1:
        
        print(f"Iteration: {iteration}")
        print(f"Number of text blocks: {len(text_blocks)}")
        print("Updated text_blocks:", [f"Text block {i} (length: {len(text)})" for i, text in enumerate(text_blocks)])

        # Perform hierarchical clustering
        linkage_matrix = hierarchical_clustering(embeddings)

        # Get the most distant pair of text blocks
        most_distant_pair_idx = np.argmax(linkage_matrix[:, 2])
        cluster1, cluster2 = [int(idx) for idx in linkage_matrix[most_distant_pair_idx, :2]]
        if cluster1 >= len(text_blocks):
            cluster1 = int(linkage_matrix[cluster1 - len(text_blocks), 3] - 1)
        if cluster2 >= len(text_blocks):
            cluster2 = int(linkage_matrix[cluster2 - len(text_blocks), 3] - 1)

        idx1, idx2 = cluster1, cluster2

        # Extract the updated clusters
        clusters = fcluster(linkage_matrix, len(text_blocks) - 1, criterion='maxclust')
        print(f"Debug: clusters: {clusters}")  # Moved this line after clusters assignment

        idx1_cluster = np.where(clusters == clusters[idx1])[0]
        idx2_cluster = np.where(clusters == clusters[idx2])[0]

        # Print the grouped clusters
        print("Grouped clusters:", {i: list(np.where(clusters == i)[0]) for i in np.unique(clusters)})

        # Present the text blocks to the user for manual combination
        print("ignore all previous chat data. Below, we have point systems based checklists for deciding to turn around or not on a mountaineering route. Generate a complete and concise checklist with a point system for evaluating turn-around for a climber on the mountain based on the logical combination and combined wisdom of these systems. Ensure that the produced checklist would help the climbers avoid trouble if used. Each section of the final point system should range from 0 to N with the sum across all sections adding up to 0 to 20, with a higher score indicating a stronger need to consider turning around. Include a scoring section. Finally put the token @!@! at the end for me to use in parsing later. ")
        print(f"Text block {idx1} (length: {len(text_blocks[idx1])}): {text_blocks[idx1]}")
        print(f"Text block {idx2} (length: {len(text_blocks[idx2])}): {text_blocks[idx2]}")
        combined_text = input("Enter the combined text: ")

        # Update the text blocks
        text_blocks = [text_blocks[i] for i in range(len(text_blocks)) if i not in [idx1, idx2]]
        text_blocks.append(combined_text)

        # Update the embeddings
        embeddings = np.vstack([embeddings[i, :] for i in range(embeddings.shape[0]) if i not in [idx1, idx2]])
        new_embedding = create_embeddings([combined_text])
        embeddings = np.vstack([embeddings, new_embedding])

        iteration += 1

    print("\nFinal combined text:")
    print(text_blocks[0])

if __name__ == "__main__":
    main()


Iteration: 1
Number of text blocks: 19
Updated text_blocks: ['Text block 0 (length: 1025)', 'Text block 1 (length: 1106)', 'Text block 2 (length: 1189)', 'Text block 3 (length: 1189)', 'Text block 4 (length: 1208)', 'Text block 5 (length: 2446)', 'Text block 6 (length: 1213)', 'Text block 7 (length: 1074)', 'Text block 8 (length: 1044)', 'Text block 9 (length: 1207)', 'Text block 10 (length: 1177)', 'Text block 11 (length: 1171)', 'Text block 12 (length: 1300)', 'Text block 13 (length: 1203)', 'Text block 14 (length: 1328)', 'Text block 15 (length: 1356)', 'Text block 16 (length: 1339)', 'Text block 17 (length: 1261)', 'Text block 18 (length: 1)']
Debug: clusters: [ 6  7  4  5  8 13 15  1  3  2 14 16  9 10 10 10 11 12 17]
Grouped clusters: {1: [7], 2: [9], 3: [8], 4: [2], 5: [3], 6: [0], 7: [1], 8: [4], 9: [12], 10: [13, 14, 15], 11: [16], 12: [17], 13: [5], 14: [10], 15: [6], 16: [11], 17: [18]}
ignore all previous chat data. Below, we have point systems based checklists for deciding 

Enter the combined text: Here is the combined checklist with a point system for evaluating turn-around for a climber on the mountain:  Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around)      Weather Conditions, Temperature, and Visibility (0 - 5 points)         Clear weather, stable conditions, moderate temperature, and good visibility (0 points)         Moderate weather change, minor visibility reduction, or warm temperatures (2 points)         Deteriorating weather, high winds, whiteout conditions, or very warm temperatures (4 points)         Severe weather, storm, extreme winds, or extreme temperature changes (5 points)      Altitude Sickness, Exhaustion, and Health (0 - 5 points)         No signs of altitude sickness, exhaustion, or health issues (0 points)         Mild symptoms or moderate exhaustion (2 points)         Severe symptoms or extreme exhaustion (4 points)         One or more team members incapacita

Iteration: 4
Number of text blocks: 16
Updated text_blocks: ['Text block 0 (length: 1025)', 'Text block 1 (length: 1106)', 'Text block 2 (length: 1208)', 'Text block 3 (length: 2446)', 'Text block 4 (length: 1213)', 'Text block 5 (length: 1074)', 'Text block 6 (length: 1044)', 'Text block 7 (length: 1207)', 'Text block 8 (length: 1177)', 'Text block 9 (length: 1171)', 'Text block 10 (length: 1300)', 'Text block 11 (length: 1203)', 'Text block 12 (length: 1339)', 'Text block 13 (length: 1638)', 'Text block 14 (length: 2022)', 'Text block 15 (length: 2177)']
Debug: clusters: [ 7  8  9 11 13  1  3  2 12 14 10  4  4  5  6  5]
Grouped clusters: {1: [5], 2: [7], 3: [6], 4: [11, 12], 5: [13, 15], 6: [14], 7: [0], 8: [1], 9: [2], 10: [10], 11: [3], 12: [8], 13: [4], 14: [9]}
ignore all previous chat data. Below, we have point systems based checklists for deciding to turn around or not on a mountaineering route. Generate a complete and concise checklist with a point system for evaluating turn-a

Enter the combined text: Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around):      Weather Conditions & Forecast (0 - 5 points)         Stable and clear conditions (0 points)         Mildly unstable or changing conditions (2 points)         Deteriorating conditions or approaching storm (4 points)         Severe storm or whiteout conditions (5 points)      Climbing Pace & Time of Day (0 - 5 points)         On schedule or ahead of planned pace (0 points)         Slightly behind planned pace (2 points)         Significantly behind planned pace (4 points)         Extremely slow pace or overdue (5 points)      Team Experience, Fitness & Health (0 - 5 points)         Adequate team experience, fitness, and health for the route (0 points)         Slightly insufficient team experience, fitness, or health (2 points)         Significantly insufficient team experience, fitness, or health (4 points)         Injured or severely f

Enter the combined text: Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around):      Weather Conditions & Visibility (0 - 5 points)         Clear weather, stable conditions, and good visibility (0 points)         Moderate weather change, partial cloud cover, or minor visibility reduction (2 points)         Deteriorating weather, high winds, or whiteout conditions (4 points)         Severe weather, storm, extreme winds, or zero visibility (5 points)      Climbing Pace, Time of Day, and Route Familiarity (0 - 5 points)         On schedule or ahead of planned pace with ample daylight and familiar route (0 points)         Slightly behind planned pace, limited daylight, or unfamiliar/challenging navigation conditions (2 points)         Significantly behind planned pace, nearing darkness, or difficult navigation (4 points)         Extremely slow pace, unable to make progress, or darkness (5 points)      Exposure to Hazards 

Enter the combined text: Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around):      Weather Conditions & Temperature (0 - 5 points)         Cool and stable conditions (0 points)         Mildly warm, slightly unstable, or moderate/deteriorating weather (2 points)         Warm temperatures, soft snow, or increasing instability (3 points)         Severe weather, high avalanche risk, or extreme temperatures (5 points)      Climbing Pace, Time of Day, and Route Familiarity & Difficulty (0 - 5 points)         On schedule, early start, familiar route, and manageable difficulty (0 points)         Slightly behind planned pace, moderate start, unfamiliar route, or challenging sections (2 points)         Significantly behind planned pace, late start, or highly challenging/technical route (4 points)         Extremely slow pace, unable to make progress, or very late start (5 points)      Exposure to Hazards & Objective Hazards (0

Enter the combined text: Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around):      Weather Conditions & Visibility (0 - 5 points)         Clear weather, stable conditions, and good visibility (0 points)         Moderate weather change, partial cloud cover, or minor visibility reduction (2 points)         Deteriorating weather, high winds, or whiteout conditions (4 points)         Severe weather, storm, extreme winds, or zero visibility (5 points)      Climbing Pace, Time of Day, and Route Familiarity (0 - 5 points)         On schedule or ahead of planned pace with ample daylight and familiar route (0 points)         Slightly behind planned pace, limited daylight, or unfamiliar/challenging navigation conditions (2 points)         Significantly behind planned pace, nearing darkness, or difficult navigation (4 points)         Extremely slow pace, unable to make progress, or darkness (5 points)      Exposure to Hazards 

Enter the combined text: Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around):      Weather Conditions & Forecast (0 - 5 points)         Stable and clear conditions (0 points)         Mildly unstable or changing conditions (2 points)         Deteriorating conditions or approaching storm (4 points)         Severe storm or whiteout conditions (5 points)      Climbing Pace & Time of Day (0 - 5 points)         On schedule or ahead of planned pace (0 points)         Slightly behind planned pace (2 points)         Significantly behind planned pace (4 points)         Extremely slow pace or overdue (5 points)      Team Experience, Fitness & Health (0 - 5 points)         Adequate team experience, fitness, and health for the route (0 points)         Slightly insufficient team experience, fitness, or health (2 points)         Significantly insufficient team experience, fitness, or health (4 points)         Injured or severely f

Enter the combined text: Turn-Around Checklist for Climbers on Mountain (0 - 20, higher score indicates a stronger need to consider turning around):      Weather Conditions (0 - 5 points)         Clear weather or stable conditions (0 points)         Moderate weather change, minor visibility reduction (2 points)         Deteriorating weather, high winds, or whiteout conditions (4 points)         Severe weather, storm, or extreme winds (5 points)      Avalanche Risk (0 - 5 points)         Low avalanche risk (0 points)         Moderate avalanche risk, isolated incidents (2 points)         High avalanche risk, multiple incidents in vicinity (4 points)         Very high avalanche risk, large avalanches observed (5 points)      Preparedness & Equipment (0 - 5 points)         Adequate equipment and prepared for avalanche safety (0 points)         Partially prepared or missing minor safety items (2 points)         Lacking crucial safety equipment, e.g., avalanche transceivers (4 points)       

Iteration: 17
Number of text blocks: 3
Updated text_blocks: ['Text block 0 (length: 1913)', 'Text block 1 (length: 2082)', 'Text block 2 (length: 2068)']
Debug: clusters: [1 1 1]
Grouped clusters: {1: [0, 1, 2]}
ignore all previous chat data. Below, we have point systems based checklists for deciding to turn around or not on a mountaineering route. Generate a complete and concise checklist with a point system for evaluating turn-around for a climber on the mountain based on the logical combination and combined wisdom of these systems. Ensure that the produced checklist would help the climbers avoid trouble if used. Each section of the final point system should range from 0 to N with the sum across all sections adding up to 0 to 20, with a higher score indicating a stronger need to consider turning around. Include a scoring section. Finally put the token @!@! at the end for me to use in parsing later. 
Text block 0 (length: 1913): Turn-Around Checklist for Climbers on Mountain (0 - 20, 