In [59]:
import os
import pandas as pd
import openpyxl
from datetime import datetime

# Directory paths
sales_mix_directory = 'sales_mix'

# Dictionary to hold the data
date_product_data = {}

In [73]:
# Process each file
for file_name in os.listdir(sales_mix_directory):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(sales_mix_directory, file_name)
        
        # Load the workbook and get the active sheet
        wb = openpyxl.load_workbook(file_path, data_only=True)
        ws = wb.active
        
        # Find the header row. Assuming 'Name' and 'Quantity Sold' could be in any column.
        header_row_idx = None
        for rowIndex, row in enumerate(ws.iter_rows(values_only=True), start=1):
            if 'Name' in row and 'Quantity Sold' in row:
                header_row_idx = rowIndex
                headers = [cell for cell in row if cell is not None]
                name_idx = headers.index('Name')
                quantity_sold_idx = headers.index('Quantity Sold')
                break
        
        if header_row_idx is None:
            raise ValueError(f"'Name' and 'Quantity Sold' columns not found in the file: {file_name}")
        
        # Parse the date from the filename
        date_part = ' '.join(file_name.split(' ')[2:4]).replace(' - Copy', '')
        
        # Initialize the dictionary for the day if not already done
        if date_part not in date_product_data:
            date_product_data[date_part] = {}
            
        # Extracting data for each product
        for row in ws.iter_rows(min_row=header_row_idx+1, values_only=True):
            product_name = row[name_idx]
            quantity_sold = row[quantity_sold_idx] or 0  # Use 0 if None
            
            # Skip products based on names to remove or containing specific patterns
            names_to_remove = [
                "Total", "Smoothie", "Classic", "Hi Protein", "Spirit", "Superfood Plus",
                "Superfood", "Refresh", "Combo", "Regular Combo", "Snack Combo",
                "Ingredients (Smoothie)", "NO BOOSTER", "No Froyo", "No Yogurt","Pirates Nectar - R","Pirates Nectar - S","Hawaiian Sunset - R","Hawaiian Sunset - S","Acai Avalanche - R",
                "Secret/Feature", "Specialty", "Shot", "Condiments (Booster Ball)","Lemon Ginger Tumeric & Coconut S","Lemon Ginger Tumeric and Coconut","Ocean Mist - S","Ocean Mist - R","Gingerbread - R","Wildberry Rush - R","Mangosicle - R","Mangosicle - S ","Oatrageous Mocha - S","Oatrageous Mocha - R",
                "Booster Ball", "Booster Blends", "Grilled Fresh", "Lunch", "Breakfast","Macadamia Nut - Booster Ball","Candy Cane - Booster Ball","Birthday Cake - Booster Ball","Apple, Lemon & Ginger Shot - S",
                "Merchandise", "Retail", "Fresh Juice", "Condiments (Fresh Juice)","Egg White & Chorizo Wrap","Egg White & Cheese Wrap","Chocolate Peanut - Protein & Co","Caramel Cashew - Protein & Co","Mango Teazer - TB","Monster - Booster Ball",
                "Instructions", "Condiments (Inst.)", "Grilled Cheese", "Canadian Maple - Booster Ball","Go Mango - R", "Wildberry Rush - S","Chipotle Steak Panini", "Raspberry Rapture - R","Gingerbread - S","Artisan Grilled Cheese",
                "No Raspberry", "Special Prep", "whole wheat", "Split In 2 Cups", "Don't Make", "Almighty Acai Blend", "Condiments (Retail/Merchandise)", "Chipotle"]
            
            if any(pattern.lower() in product_name.lower() for pattern in ['allergy', 'combo', '$', 'add', 'No']):
                continue
            if product_name in names_to_remove:
                continue
            
            # Add the quantity sold to the dictionary
            date_product_data[date_part][product_name] = date_product_data[date_part].get(product_name, 0) + quantity_sold


  warn("Workbook contains no default style, apply openpyxl's default")


In [74]:
# Convert the dictionary to a DataFrame and transpose
product_sales_df = pd.DataFrame.from_dict(date_product_data, orient='columns')

In [75]:
# Filter out unwanted product names and transpose the DataFrame
product_sales_df = product_sales_df.drop(index=names_to_remove, errors='ignore').transpose()

# product_sales_df.index = pd.to_datetime(product_sales_df.index + ', 2024', format='%b %d, %Y')


In [76]:
# product_sales_df.index = pd.to_datetime(product_sales_df.index + ', 2024', format='%b %d, %Y')


In [81]:
def add_correct_year(date_str):
    date = pd.to_datetime(date_str)
    if date.month == 12:
        return date.replace(year=2023)
    else:
        return date.replace(year=2024)

# Apply the function to the index
product_sales_df.index = pd.to_datetime(product_sales_df.index)  # Ensure the index is in datetime format
product_sales_df.index = product_sales_df.index.map(add_correct_year)

# Sort the dataframe by index (date)
product_sales_df.sort_index(inplace=True)



In [87]:

# Convert NaN values to 0
product_sales_df = product_sales_df.fillna(0)

# Replace negative values with 0 and convert types from float to integer
product_sales_df = product_sales_df.clip(lower=0).astype(int)

In [88]:
# Now save the transposed DataFrame
output_excel_path = 'sales_mix_clean_open/combined_sales_data.csv'
product_sales_df.to_csv(output_excel_path, index_label='Date')

In [89]:
# Convert the transposed DataFrame to JSON
json_result = product_sales_df.to_json(orient='columns', date_format='iso')

# Write the JSON output to a file
json_output_path = 'sales_mix_clean_open/combined_sales_data.json'
with open(json_output_path, 'w') as json_file:
    json_file.write(json_result)

In [90]:
print(product_sales_df.index.dtype)
print("_________________________")
print(product_sales_df.head().dtypes)

datetime64[ns]
_________________________
Nuttin' Better - R                int64
Banana's A Whey - R               int64
Ripped Berry - R                  int64
Strawberry Storm - R              int64
High Impact Acai - R              int64
Mind Over Matcha - R              int64
Pomegranate Punch - R             int64
Tropical Tornado - R              int64
Strawberry Sunshine - R           int64
Strawberry Sunshine - S           int64
Mango Hurricane - R               int64
Tropical Tornado - S              int64
Very Berry - S                    int64
Mango Hurricane - S               int64
Very Berry - R                    int64
Funky Monkey - R                  int64
Pineapple Freeze - R              int64
Berry Cream Sensation - S         int64
Canadian Colada - R               int64
Funky Monkey - S                  int64
Pineapple Freeze - S              int64
Berry Cream Sensation - R         int64
Tropi-Kale - R                    int64
Coco Crush - R                    int64