In [None]:
# Import all libraries

import numpy as np
from sklearn.cluster import DBSCAN
import pandas as pd
from utilities import hydrogenBondCheckGivenIndices, calculateAtomTypes, BAConvoys
from CMC import CMC
from hyperparameters import Hyperparameters
import time

In [None]:
# # All atoms as input
# # Brute Force

fileLocation = r"data\allAtomsWithRings.csv" #location for information of atoms

filePath = r"..\data\dataFile.npy"

data = np.load(filePath)

In [None]:
#define the hyperparameters
dataSize = [50, 500]
Ks = [0, 1]
Ms = [25]

for varyk in Ks:
    for varyM in Ms:
        for size in dataSize:
            hyperParameters = Hyperparameters(m = varyM, k = varyk, epsilon = 3.5, t1 = 2,  t2 = 0.25, threshold=4)
            data = np.load(filePath)
            print(f"K {varyk + 1}, M {varyM}, S {size}")


            # Transpose the data for Convoy algorithm

            orData = np.transpose(data[0:size], (1,0,2)).tolist()

            # Clustering using DBSCAN

            totalTime = 0

            t1 = time.time()

            clustering_clf = DBSCAN(eps=hyperParameters.epsilon)

            # Min elements for convoy = m
            # Min consecutive timesteps = k

            clf = CMC(clustering_clf, k=hyperParameters.k, m=hyperParameters.m)

            # Convoy calculation using Test data
            convoys = clf.fit_predict(orData)
            data = data[0:size]


            sorted_list = sorted(convoys, key=lambda x: x.start_time)



            atomTypes = calculateAtomTypes(fileLocation)
            finalConvoys = {}
            for index, convoy in enumerate(sorted_list):

                finalConvoys[index] = None

                arr = hydrogenBondCheckGivenIndices(convoy.start_time, convoy.end_time, convoy.indices, data, atomTypes)

                finalConvoys[index] = [arr, convoy]


            for key, (indices, convoy) in finalConvoys.items():
                df_ranges = BAConvoys(indices, hyperParameters.k)

                if not df_ranges.empty:

                    # Splitting the 'Indices' tuple into separate columns
                    df_ranges[['Hydrogen', 'Nitrogen', 'Oxygen']] = pd.DataFrame(df_ranges['Indices'].tolist(), index=df_ranges.index)

                    # Group by indices
                    grouped = df_ranges.groupby(['Hydrogen', 'Nitrogen', 'Oxygen'])


                    # Initialize a list to hold the combined ranges
                    combined_ranges = []

                    # Process each group
                    for _, group in grouped:
                        group = group.sort_values(by='Start')
                        combined_start = group.iloc[0]['Start']
                        combined_end = group.iloc[0]['End']
                        total_gaps = 0

                        for i in range(1, len(group)):
                            gap = group.iloc[i]['Start'] - combined_end - 1
                            if gap <= hyperParameters.t1:
                                # Temporarily combine the ranges
                                combined_end = group.iloc[i]['End']
                                total_gaps += gap
                            else:
                                # Check if the last combined range meets the ratio condition
                                if (total_gaps / (combined_end - combined_start + 1)) <= hyperParameters.t2:
                                    combined_ranges.append((combined_start, combined_end, (group.iloc[0]['Hydrogen'], group.iloc[0]['Nitrogen'], group.iloc[0]['Oxygen'])))
                                combined_start = group.iloc[i]['Start']
                                combined_end = group.iloc[i]['End']
                                total_gaps = 0

                        # Check the last combined range
                        if total_gaps / (combined_end - combined_start + 1) <= hyperParameters.t2:
                            combined_ranges.append((combined_start, combined_end, (group.iloc[0]['Hydrogen'], group.iloc[0]['Nitrogen'], group.iloc[0]['Oxygen'])))

                    # Convert the combined ranges to a DataFrame
                    df_combined_ranges = pd.DataFrame(combined_ranges, columns=['Start', 'End', 'Indices'])
                    print(df_combined_ranges)

            totalTime += time.time() - t1
            print("Total time", totalTime)

