In [1]:
import pandas as pd
import numpy as np

# Read cost.txt into a dataframe
cost_of_products = pd.read_excel('data_files/Book1.xlsx')

In [2]:
reserve = cost_of_products.copy()

In [13]:
cost_of_products = reserve.copy()

In [15]:
# rename the columns
cost_of_products = cost_of_products.rename(columns={'Material/Model': 'Model', 
                        'Description': 'Model_Description',                          
                        'Description2': 'Option_Description', 
                        'Value': 'Option',
                        'COGS': 'COGS',
                        'Product Hierarchy': 'BU'})

# extract the first 5 characters of the 'BU' column
cost_of_products['BU'] = cost_of_products['BU'].str.slice(stop=5)

# keep only the desired columns
cost_of_products = cost_of_products[['Model', 'Model_Description', 'Characteristic', 'Option', 'Option_Description', 'COGS', 'BU']]

cost_of_products['Characteristic_'] = cost_of_products['Characteristic'].astype('str')
# update values
cost_of_products['Characteristic_'] = cost_of_products['Characteristic_'].apply(lambda x: x.split('_', 1)[1] if '_' in x else x)

cost_of_products['Option_'] = cost_of_products['Option'].astype('str')
cost_of_products['Option_Description_'] = cost_of_products['Option_Description'].astype('str')

In [16]:
cost_of_products_opt = cost_of_products.copy()
# Reduce the memory usage of the dataframe and improve performance
def check_unique_values(df):
    changed_columns = []
    for col in df.columns:
        unique_values = df[col].nunique()
        if unique_values < 50:
            df[col] = df[col].astype('category')
            changed_columns.append(col)
    if len(changed_columns) > 0:
        print("The following columns were changed to categorical data type: ")
        for col in changed_columns:
            print(col)
    else:
        print("No columns were changed to categorical data type.")
        
check_unique_values(cost_of_products_opt)

The following columns were changed to categorical data type: 
BU


In [114]:
writer = pd.ExcelWriter('data_files/cogs_prep.xlsx', engine='xlsxwriter')
cost_of_products_opt.to_excel(writer, sheet_name='cogs')
writer.save()

In [25]:
index_step = 10000

def create_product_models_from_dataframe(df):
    # Create a list to store ProductModel objects
    product_models = []

    # Iterate over the rows in the DataFrame
    for index, row in df.iterrows():
        if index % index_step == 0:
            print(f'Row {index} from {len(df)}')
        
        # Check if the row has NaN in the Characteristic column
        if pd.isna(row['Characteristic']):
            # Create a new ProductModel object
            model = ProductModel(row['Model'], row['Model_Description'], row['COGS'])
            # Append the model to the list
            product_models.append(model)
        # If the row has a value in the Characteristic column
        else:
            # Check the length of the Characteristic value
            if len(row['Characteristic_']) == 2:
                # Add a characteristic to an existing model
                model = next((m for m in product_models if m.model_code == row['Model']), None)
                if model:
                    model.add_characteristic(row['Option'], row['Option_Description'], row['COGS'])
                else:
                    # If no model exists for this code, create a new one
                    model = ProductModel(row['Model'], row['Model_Description'], row['COGS'])
                    model.add_characteristic(row['Option'], row['Option_Description'], row['COGS'])
                    product_models.append(model)
            elif len(row['Characteristic_']) > 2:
                # Add an option to an existing model
                model = next((m for m in product_models if m.model_code == row['Model']), None)
                if model:
                    model.add_option(row['Option'], row['Option_Description'], row['COGS'])
                else:
                    # If no model exists for this code, create a new one
                    model = ProductModel(row['Model'], row['Model_Description'], row['COGS'])
                    model.add_option(row['Option'], row['Option_Description'], row['COGS'])
                    product_models.append(model)

    return product_models

In [18]:
# Identify 'easy' position with not complex code

# Read Orders Data

orders_data = pd.read_excel('data_files/orders_data.xlsx')

codes = orders_data.loc[:, ['ms_code', 'order_intake_quantity']]
# keep unique codes only
codes_unique = codes.drop_duplicates(subset='ms_code')
codes_unique = codes_unique.rename(columns={'ms_code': 'Model'})

prices = cost_of_products_opt.copy()
# Get the indices of the rows where 'Characteristic' is not NaN
indices_to_drop = prices.index[prices['Characteristic'].notna()].tolist()
# Drop the rows where 'Characteristic' is not NaN
prices.drop(index=indices_to_drop, inplace=True)

easy_positions = pd.merge(codes_unique[['Model']], prices[['Model', 'COGS']], on='Model', how='inner')

# Save the result
writer = pd.ExcelWriter('data_files/result.xlsx', engine='xlsxwriter')
easy_positions.to_excel(writer, sheet_name='result')
writer.save()

In [19]:
# Create a boolean mask indicating which positions are in the second dataframe
mask = codes_unique['Model'].isin(easy_positions['Model'])
# Invert the boolean mask to keep only the positions not in the second dataframe
mask = ~mask
# Select the rows corresponding to the positions in the second dataframe
complex_position = codes_unique.loc[mask]
complex_position.reset_index(inplace=True, drop=True)

In [247]:
writer = pd.ExcelWriter('data_files/df_group1.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='result')
writer.save()

In [22]:
# prepare price source

# Create a boolean mask indicating which positions are in the df complex_positions dataframe
mask_1 = cost_of_products_opt['Model'].isin(df['group1'])
# Invert the boolean mask to keep only the positions not in the second dataframe
#mask_1 = ~mask_1
# Select the rows corresponding to the positions in the second dataframe
price_prepared = cost_of_products_opt.loc[mask_1]
price_prepared.reset_index(inplace=True, drop=True)

In [262]:
writer = pd.ExcelWriter('data_files/price_prepared.xlsx', engine='xlsxwriter')
price_prepared.to_excel(writer, sheet_name='result')
writer.save()

In [88]:
# Define the regex pattern to match separators ('-' and '/')
pattern = r'([-/][^-/]+|^[^-/]+)'


def extract_groups(code):
    code = str(code)
    # Split the code into groups using regex
    groups = re.findall(pattern, code)
    # Split groups containing the first dash on a letter
    
    i = 1
    while i < len(groups):
        if '-' in groups[i] and len(groups[i]) > 3:
            group_parts = [char for char in groups[i]]
            # insert a list of elements at a specific position
            groups[i:i+1] = group_parts
            i += len(group_parts) - 1
        else:
            i += 1
            
    
    for i in range(len(groups)-2):
        if groups[i] == '-':
            groups[i:i+2] = [''.join(groups[i:i+2])]
    
    # Combine all the groups into a dictionary
    groups_dict = {'group{}'.format(i+1): groups[i] for i in range(len(groups))}
    return groups_dict

In [97]:
#model_cost = pd.read_excel('data_files/EJX130A.xlsx')
#model_cost = price_prepared.copy()

# Define the ProductModel class
class ProductModel:
    def __init__(self, model_code, model_description, price, characteristics=None, options=None):
        self.model_code = model_code
        self.model_description = model_description
        self.price = price
        self.characteristics = characteristics or []
        self.options = options or []

    def add_characteristic(self, name, description, price):
        self.characteristics.append({
            'name': name,
            'description': description,
            'price': price
        })

    def add_option(self, name, description, price):
        self.options.append({
            'name': name,
            'description': description,
            'price': price
        })
        
    def define_cost(self, ms_code):
        mis_val = []
        group_dict = extract_groups(ms_code)
        self.total_cost = 0
        keys = list(group_dict.keys())
        values = list(group_dict.values())
        
        if group_dict[keys[0]] != self.model_code:
            return 0
        else:
            self.total_cost += self.price
            values.remove(self.model_code)
            counter = 0
            for char in self.characteristics:
                if str(char['name']) in values and str(char['name']) == values[0]:
                    self.total_cost += char['price']
                    #print(str(char['name']), char['price'])
                    values.remove(str(char['name']))
                    #print(values)
            for opt in self.options:
                if str(opt['name']) in values and str(opt['name']) == values[0]:
                    self.total_cost += opt['price']
                    #print(str(opt['name']), opt['price'])
                    values.remove(str(opt['name']))
            missing_values = values
            if missing_values:
                mis_val = mis_val + missing_values
        
        return round(self.total_cost,2), mis_val
        
    def __str__(self):
        return f'Model code: {self.model_code}, Description: {self.model_description}, Price: {self.price}\n Characteristics: {self.characteristics}\n Options: {self.options}'  

In [None]:
price_list = create_product_models_from_dataframe(price_prepared)

In [None]:
index_step = 5000

costs = {}
mis_values = {}
ok = ''
for index, model in enumerate(complex_position.Model):
    if index % index_step == 0:
        print(f'Row {index} from {len(complex_position.Model)}, {ok}')
    for i in range(len(price_list)):
        try:
            if price_list[i].define_cost(model):
                costs[model] = price_list[i].define_cost(model)[0]
                mis_values[model] = price_list[i].define_cost(model)[1]
                ok = model
                
        except:
            costs[model] = 'check'
            mis_values[model] = 'check'

In [117]:
# transfer dict to df

cost_df = pd.DataFrame.from_dict(costs, orient='index', columns=['Cost'])
cost_df.reset_index(inplace = True)

missing_df = pd.DataFrame.from_dict(mis_values, orient='index')
missing_df.reset_index(inplace = True)

In [127]:
compl_positions = pd.merge(cost_df, missing_df, on='index', how='left')

In [128]:
writer = pd.ExcelWriter('data_files/compl_positions.xlsx', engine='xlsxwriter')
compl_positions.to_excel(writer, sheet_name='result')
writer.save()

In [132]:
compl_positions = compl_positions.rename(columns={'index': 'Model', 'Cost': 'COGS', '0': 'Check'})

In [134]:
cogs_price_positions = pd.concat([compl_positions, easy_positions], axis=0)

In [153]:
import numpy as np

def check_value(value):
    if pd.isna(value) or value == 'None' or value == np.NaN:
        return 0
    else: 
        return 1
    
cogs_price_positions['Chk'] = cogs_price_positions[0].apply(check_value)

In [155]:
writer = pd.ExcelWriter('data_files/priced_positions.xlsx', engine='xlsxwriter')
cogs_price_positions.to_excel(writer, sheet_name='result')
writer.save()