In [None]:
import pandas as pd
import numpy as np
import kmapper as km
import sklearn.cluster
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from openpyxl import Workbook
import persim
import ripser

In [None]:
# Initialize workbook and worksheet
wb = Workbook()
ws = wb.active

# List to hold all cluster DataFrames
all_clusters = []

# Loop through the range of hotels
for i in range(2, 7):
    name = 'empresa_' + str(i) + '_df.csv'
    dataFrame = pd.read_csv(name)

    path = "hotel_" + str(i) + "_periodicidad.html"
    nombre = "Mapper Hotel " + str(i)  # Adjust as needed, ensure 'hotel' is defined correctly

    # Select relevant columns
    data = dataFrame[['CANTIDAD_VENTAS_ALI','CANTIDAD_VENTAS_BEB', 'MONTO_TOTAL','DIA_SEMANA','CANTIDAD_REGISTOS_VTAS','DIA_FESTIVO','CANTIDAD_VENTAS']]
    z = data['MONTO_TOTAL']

    ripsernonperiod=ripser.ripser(data)['dgms']

    persim.plot_diagrams(ripsernonperiod,show=True)

    mapper = km.KeplerMapper(verbose=0)
    projected_data = mapper.fit_transform(data.to_numpy())

    covering = km.Cover(n_cubes=8, perc_overlap=0.1)

    clusterer = sklearn.cluster.DBSCAN(min_samples=20, metric='cosine')

    # Create the mapper graph
    G = mapper.map(projected_data, data, clusterer=clusterer, cover=covering)

    # Visualize the mapper graph
    mapper.visualize(G,
                     title=nombre,
                     color_values=z,
                     color_function_name='MONTO_TOTAL',
                     node_color_function=np.array(['average','std','sum','max','min']),
                     path_html=path)
    
    
    plt.figure(figsize=(6, 4))
    cont = 0
    for nodes in G['nodes']:
        cluster = G['nodes'][nodes]
        plt.hist(data['MONTO_TOTAL'][cluster])
        plt.title('hotel ' +str(i))
        cont += 1
    plt.show()

    # Collect clusters
    for node in G['nodes']:
        cluster = G['nodes'][node]
        clust = pd.DataFrame(dataFrame.loc[cluster])
        clust['Cluster'] = node
        clust['Hotel'] = str(i)
        all_clusters.append(clust)

# Concatenate all clusters into a single DataFrame after all hotels are processed
combined_clusters = pd.concat(all_clusters, ignore_index=True, sort=False)

# Save the combined DataFrame to an Excel file
combined_clusters.to_excel('combined_clusters.xlsx', index=False)
