In [None]:
import pandas as pd
import os
os.environ['IPYTHON_SUPPRESS_DEPRECATED_CONFIGS'] = '1'
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

# Define the route to the CSV file and the destination folder
route = '/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/df_final_version.csv'
destination_general = '/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/General_Ranking/'
destination_cluster = '/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/Cluster_Rankings/'
destination_aisles = '/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/Aisle_Rankings/'

# Load the dataset
df = pd.read_csv(route)

# Create the top 10 ranking of the most sold products
top10_products = df.groupby(['id_producto', 'nombre_producto']).size().reset_index(name='counts')
top10_products = top10_products.sort_values(by='counts', ascending=False).head(10)
top10_products = top10_products[['id_producto', 'nombre_producto']]

# Save the top 10 ranking to a CSV
top10_products.to_csv(f"{destination_general}ranking_general.csv", index=False)

# Create a ranking by aisle
aisle_rankings = df.groupby(['id_pasillo', 'pasillo', 'id_producto', 'nombre_producto']).size().reset_index(name='counts')
aisle_rankings = aisle_rankings.sort_values(by=['id_pasillo', 'counts'], ascending=[True, False])

# Save the ranking by aisle to CSVs
aisle_ids = aisle_rankings['id_pasillo'].unique()

for aisle_id in aisle_ids:
    aisle_df = aisle_rankings[aisle_rankings['id_pasillo'] == aisle_id]
    aisle_df = aisle_df[['id_producto', 'nombre_producto', 'id_pasillo', 'pasillo']]
    aisle_df.to_csv(f"{destination_aisles}ranking_aisle_{aisle_id}.csv", index=False)

# Create a ranking by cluster
cluster_rankings = df.groupby(['cluster', 'id_producto', 'nombre_producto']).size().reset_index(name='counts')
cluster_rankings = cluster_rankings.sort_values(by=['cluster', 'counts'], ascending=[True, False])

# Save the ranking by cluster to CSVs
cluster_numbers = cluster_rankings['cluster'].unique()

for cluster_number in cluster_numbers:
    cluster_df = cluster_rankings[cluster_rankings['cluster'] == cluster_number]
    cluster_df = cluster_df[['nombre_producto']].head(10)  # Select top 10 and only the product names
    cluster_df.to_csv(f"{destination_cluster}ranking_cluster_{cluster_number}.csv", index=False)

print("All CSV files saved successfully.")


Mounted at /content/drive
All CSV files saved successfully.
