# Load

In [2]:
import pandas as pd
from data_retrieval import connect_db, fetch_trx_data, fetch_cat_data, fetch_cat1_data, fetch_item_descriptions, fetch_private_label_data, fetch_sustainability_data, fetch_segment_data
from data_processing import multi_aggregate_data, apply_custom_calculations
from data_analysis import create_baskets_365_qty, flatten_baskets
from modelling import create_cooccurrence_matrix_with_recommendations_15, create_pair_frequency_matrix,add_freq
from modeling_2 import replace_low_values,get_top_5_cat3_items, get_top_5_cat1_items, create_cat3_to_top_item_map, replace_recommendations
from modeling_3 import filter_mfg_name, replace_item_cde_with_cat3_set,map_and_add_recommendations_top3, map_and_add_recommendations_cat1_top3, transform_recommendations, filter_fs_segment, filter_print_segment, filter_pkg_segment, reorder_private, reorder_alliance, reorder_sustainable, add_descriptions
from modeling_4 import remove_duplicate_and_self_references, shift_recommendations_left, copy_rows_with_0_to_2_recommendation, copy_rows_with_0_or_1_recommendation, add_recommendations2, minimum_three_recommendations, are_values_unique, remove_empty_related_items

In [3]:
# Sittun's code review edit suggestions

#import modelling as m1
#import modeling_2 as m2

In [4]:
#m2.replace_low_values()

In [5]:
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

In [6]:
# Load items 
df = pd.read_excel('data/AB List_dc_removed.xlsx', usecols=['Item Number'])
print(f">> Loaded {len(df)} e-commerce items with their item_cde")

>> Loaded 4291 e-commerce items with their item_cde


In [7]:
# Rename the columns
df.rename(columns={'Item Number': 'item_cde'}, 
          inplace=True)
print(f">> Renamed the columns to {df.columns.tolist()}")

>> Renamed the columns to ['item_cde']


In [8]:
# connecting to db
conn = connect_db()
print(">> Connected to database")

>> Connected to database


In [9]:
# getting transaction data from db
trx_df = fetch_trx_data(conn, df)
print(f">> Fetched the transaction data containing {len(trx_df)} rows")

>> Fetched the transaction data containing 1333380 rows


## If Manufacturer filteration is needed (when scope not given)

In [10]:
#trx_df_new = trx_df[trx_df['mfg_name'].str.contains('3M', na=False)]


In [11]:
# OPTIONAL: Apply if mfg filter needed
#mfg_df=filter_mfg_name(trx_df, '')

## Filter segment as needed

In [12]:
#Get only the non Print df entries
trx_df_seg = filter_fs_segment(trx_df)
print(f">> Size of data for non print  data is Length: {len(trx_df)}")

>> Size of data for non print  data is Length: 1333380


In [13]:
# aggregate data at bill-to
aggregated_df = multi_aggregate_data(trx_df_seg)
print(f">> Aggregated data at bill-to level.Length: {len(aggregated_df)} rows")

>> Aggregated data at bill-to level.Length: 8244 rows


In [14]:
# custom column calculation for avg time interval
custom_calculated_df = apply_custom_calculations(aggregated_df)
print(f">> Calculated custom columns.Length: {len(custom_calculated_df)} rows")

>> Calculated custom columns.Length: 8244 rows


In [15]:
# make baskets for all possible starting date
baskets_df_365_qty = create_baskets_365_qty(custom_calculated_df)
print(f">> Made baskets with size: {len(baskets_df_365_qty)} baskets")

>> Made baskets with size: 8244 baskets


In [16]:
# flatten the baskets
flattened_basket_list_365 = flatten_baskets(baskets_df_365_qty)
print(f">> Flattened baskets. Length: {len(flattened_basket_list_365)}")

>> Flattened baskets. Length: 160239


In [17]:
# make item level recommendations
co_occurrence_matrix, recommendation_df = create_cooccurrence_matrix_with_recommendations_15(flattened_basket_list_365)
print(f">> Made item level recommendations for: {len(recommendation_df)} items")

100%|██████████| 160239/160239 [00:09<00:00, 16519.46it/s]
100%|██████████| 1435/1435 [00:03<00:00, 386.14it/s] 


>> Made item level recommendations for: 1435 items


In [18]:
# Get the pair frequency matrix
pair_freq_matrix = create_pair_frequency_matrix(flattened_basket_list_365)
print(f">> Created pair wise frequency matrix of Length: {len(pair_freq_matrix)}")

>> Created pair wise frequency matrix of Length: 1452


In [19]:
#add freq
recommendation_df=add_freq(recommendation_df,pair_freq_matrix)
print(f">>Added freq to the df {len(recommendation_df)}")

>>Added freq to the df 1435


In [20]:
recommendation_df

Unnamed: 0,item_cde,Recommendation 1,Recommendation 2,Recommendation 3,Recommendation 4,Recommendation 5,Recommendation 6,Recommendation 7,Recommendation 8,Recommendation 9,...,frequency 6,frequency 7,frequency 8,frequency 9,frequency 10,frequency 11,frequency 12,frequency 13,frequency 14,frequency 15
0,10012415,10012427,10357365,10098323,10012724,10012416,10597676,10943252,10546160,10928677,...,154.0,199.0,153.0,108.0,99.0,184.0,150.0,116.0,155.0,361.0
1,10012416,10012724,10357365,20012592,10012427,11153332,11153329,10012415,10012428,11153335,...,94.0,455.0,366.0,72.0,192.0,101.0,161.0,98.0,105.0,125.0
2,10012418,10357365,20037330,10050648,20077538,10084953,10771735,10801559,10058085,10282034,...,276.0,272.0,274.0,262.0,262.0,424.0,262.0,264.0,262.0,259.0
3,10012422,10098323,10781406,10049812,10928677,10282034,10805288,10532757,10012427,10546160,...,255.0,196.0,412.0,293.0,262.0,404.0,258.0,187.0,509.0,189.0
4,10012427,10012416,10012415,10012724,10357365,10098323,10928677,10049812,10597676,20012592,...,303.0,303.0,166.0,153.0,127.0,97.0,118.0,100.0,106.0,181.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1430,20103405,10396065,10042792,10710453,10050919,10049812,10298224,10941576,10546164,10042222,...,471.0,715.0,261.0,228.0,254.0,205.0,200.0,200.0,199.0,200.0
1431,20103861,20037330,20077538,10049681,20064073,20064071,10474720,10447705,20064070,10059527,...,12.0,12.0,12.0,4.0,7.0,3.0,2.0,2.0,3.0,3.0
1432,20112154,10574148,20088626,10098323,20088556,20090322,10805851,11010266,11010325,10530193,...,10.0,10.0,6.0,10.0,20.0,7.0,9.0,7.0,9.0,6.0
1433,20121463,20037330,10805191,10781409,11010274,10805205,10814854,20064077,10265242,10907916,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [21]:
def replace_low_values_new(df,threshold=100):
    # Create the list of frequency columns
    frequency_columns = [f'frequency {i}' for i in range(1, 16)]
    
    # Replace values < threshold with 'replace_with'
    df[frequency_columns] = df[frequency_columns].applymap(lambda x: 'CAT' if x < threshold else x)
    
    return df

In [22]:
#Replace recommendations with low basket count with the top cat 3 item
recommendation_df=replace_low_values_new(recommendation_df,100)
print(f">>Replaced items of baskets with low thresholds: {len(recommendation_df)} rows")

>>Replaced items of baskets with low thresholds: 1435 rows


In [23]:
# Get the cat3 top 5 items
top_5_items_cat3=get_top_5_cat3_items(trx_df_seg)
print(f">> Got cat3 top 5 items: {len(top_5_items_cat3)}")

>> Got cat3 top 5 items: 118


In [24]:
# Get the cat1 top 5 items
top_5_items_cat1=get_top_5_cat1_items(trx_df_seg)
print(f">> Got cat1 top 5 items: {len(top_5_items_cat1)}")

>> Got cat1 top 5 items: 12


In [25]:
# getting cat3 data from db
conn = connect_db()

cat3_df = fetch_cat_data(conn, df)
print(f">> Fetched the cat3 data containing {len(cat3_df)} rows")

%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 

In [80]:
cat3_df[cat3_df['item_cde']=='10953539']

Unnamed: 0,item_cde,Category
2211,10953539,Janitorial Supplies.Mops / Squeegees.Dust Mops


In [26]:
conn = connect_db()
print(">> Connected to database")

>> Connected to database


In [27]:
# getting cat1 data from db
cat1_df = fetch_cat1_data(conn, df)
print(f">> Fetched the cat1 data containing {len(cat1_df)} rows")

%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 

In [28]:
# Map item to top item in cat3
item_to_cat3_top_map= create_cat3_to_top_item_map(df, cat3_df, top_5_items_cat3)
print(f">> Map items from item to top items in cat3 {len(item_to_cat3_top_map)} rows")

>> Map items from item to top items in cat3 1677 rows


In [29]:
# connecting to db
conn = connect_db()
print(">> Connected to database")

>> Connected to database


In [30]:
# getting segment data from db
seg_df = fetch_segment_data(conn, df)
print(f">> Fetched the segment data containing {len(seg_df)} rows")

>> Fetched the segment data containing 4291 rows


In [31]:
#filter Packaging segment
seg_pkg_df=seg_df[seg_df['segment'].str.contains('Facility', na=False)]
print(f">> Items in segment data containing {len(seg_pkg_df)} rows")

>> Items in segment data containing 1683 rows


In [32]:
# Replace items with low freq with top items

#new_recommendation_df=replace_recommendations(recommendation_df,item_to_cat3_top_map)
#print(f">> Replacde items with low freq with top items with {len(new_recommendation_df)} rows")

# Cat 3 operations

In [33]:
# Make cat3 level baskets for co-occurance
cat3_basket_365=replace_item_cde_with_cat3_set(flattened_basket_list_365, cat3_df)
print(f">> Made cat3 level baskets for co-occurance size: {len(cat3_basket_365)}")

>> Made cat3 level baskets for co-occurance size: 160239


In [34]:
# Create co-occurance df for cat3 level
cat3_co_occurrence_matrix, cat3_recommendation_df=create_cooccurrence_matrix_with_recommendations_15(cat3_basket_365, top_n=5)
print(f">> Create co-occurance df for cat3 level df Length: {len(cat3_recommendation_df)}")

  0%|          | 0/160239 [00:00<?, ?it/s]

100%|██████████| 160239/160239 [00:02<00:00, 55686.68it/s]
100%|██████████| 117/117 [00:00<00:00, 1172.06it/s]

>> Create co-occurance df for cat3 level df Length: 117





In [81]:
cat3_recommendation_df

Unnamed: 0,Recommendation 1,Recommendation 2,Recommendation 3,Recommendation 4,Recommendation 5
Can Liners.High Density.MISSING,"Towels, Tissues.Towels.Hard Wound Roll","Towels, Tissues.Toilet Tissue.Standard Roll",Skincare.Soaps / Hand Cleaners.MISSING,Can Liners.Low Density.MISSING,"Towels, Tissues.Toilet Tissue.Jumbo Roll"
Can Liners.Low Density Repro.MISSING,"Towels, Tissues.Towels.Hard Wound Roll","Towels, Tissues.Toilet Tissue.Standard Roll",Can Liners.Low Density.MISSING,"Towels, Tissues.Toilet Tissue.Jumbo Roll",Can Liners.High Density.MISSING
Can Liners.Low Density.MISSING,"Towels, Tissues.Towels.Hard Wound Roll",Can Liners.High Density.MISSING,"Towels, Tissues.Toilet Tissue.Standard Roll","Towels, Tissues.Toilet Tissue.Jumbo Roll","Towels, Tissues.Facial Tissue.MISSING"
Can Liners.Paper.MISSING,Skincare.Sanitizers / Sanitizing Wipes.MISSING,Personal Protection and Safety.Gloves.Vinyl,Food Service.Cups / Lids.Paper,Can Liners.High Density.MISSING,Food Service.Cups / Lids.Lids
Can Liners.Specialty / Polypropylene.MISSING,Can Liners.High Density.MISSING,Can Liners.Low Density.MISSING,"Towels, Tissues.Towels.Hard Wound Roll","Towels, Tissues.Toilet Tissue.Standard Roll",Skincare.Soaps / Hand Cleaners.MISSING
...,...,...,...,...,...
"Towels, Tissues.Towels.Multi Fold","Towels, Tissues.Towels.Hard Wound Roll","Towels, Tissues.Toilet Tissue.Jumbo Roll","Towels, Tissues.Toilet Tissue.Standard Roll",Can Liners.High Density.MISSING,Food Service.Cups / Lids.Plastic
"Towels, Tissues.Towels.Single Fold","Towels, Tissues.Toilet Tissue.Standard Roll",Can Liners.High Density.MISSING,"Towels, Tissues.Towels.Hard Wound Roll","Towels, Tissues.Toilet Tissue.Jumbo Roll","Towels, Tissues.Towels.Multi Fold"
Wipers.Chemical Wipers.MISSING,"Towels, Tissues.Towels.Hard Wound Roll","Towels, Tissues.Toilet Tissue.Jumbo Roll",Can Liners.High Density.MISSING,Can Liners.Low Density.MISSING,Chemicals.Cleaners.MISSING
Wipers.Fabric.MISSING,Can Liners.High Density.MISSING,Chemicals.Cleaners.MISSING,Janitorial Supplies.Mops / Squeegees.Wet Mops,Janitorial Supplies.Mops / Squeegees.Dust Mops,Skincare.Soaps / Hand Cleaners.MISSING


In [82]:
cat3_recommendation_df.loc["Janitorial Supplies.Mops / Squeegees.Dust Mops"]


Recommendation 1                       Chemicals.Cleaners.MISSING
Recommendation 2                  Can Liners.High Density.MISSING
Recommendation 3           Skincare.Soaps / Hand Cleaners.MISSING
Recommendation 4    Janitorial Supplies.Mops / Squeegees.Wet Mops
Recommendation 5      Chemicals.Warewashing / Dishwashing.MISSING
Name: Janitorial Supplies.Mops / Squeegees.Dust Mops, dtype: object

In [36]:
def map_and_add_recommendations_new(df, cat3_df, new_recommendation_df, cat3_recommendation_df, top_5_items_cat3):
    # Create a map from item_cde to Category
    item_to_cat3 = pd.Series(cat3_df['Category'].values, index=cat3_df['item_cde']).to_dict()

    # Create a map from each category to its top item code
    category_to_top5_items = top_5_items_cat3.set_index('cat')['item_cde'].apply(lambda x: x[:3]).to_dict()
    print(category_to_top5_items)
    # Initialize a list to hold new rows to add
    rows_to_add = []

    # Iterate over each item in df
    for item in df['item_cde']:
        item = str(item)
        category = item_to_cat3.get(item)
        
        # Check if the item is in new_recommendation_df
        if item not in new_recommendation_df['item_cde'].values:
            # Get recommendations for the category
            if category in cat3_recommendation_df.index:
                recommendations = cat3_recommendation_df.loc[category].to_dict()
                new_row = {'item_cde': item}

                # Replace category recommendations with item codes from the map
                for i in range(0, 5):
                    recommendation_col = f'Recommendation {i+1}'
                    recommendation_col1 = f'Recommendation {i*3+1}'
                    recommendation_col2 = f'Recommendation {i*3+2}'
                    recommendation_col3 = f'Recommendation {i*3+3}'
                    cat_recommendation = recommendations.get(recommendation_col)
                    
                    if cat_recommendation in category_to_top5_items:
                        top_items = category_to_top5_items[cat_recommendation]
                        
                        # Safely assign values, ensuring there are enough items in the list
                        if len(top_items) > 0:
                            new_row[recommendation_col1] = top_items[0]
                        if len(top_items) > 1:
                            new_row[recommendation_col2] = top_items[1]
                        if len(top_items) > 2:
                            new_row[recommendation_col3] = top_items[2]
                    else:
                        new_row[recommendation_col] = ''


                rows_to_add.append(new_row)

    # Create a DataFrame for the new rows
    if rows_to_add:
        rows_to_add_df = pd.DataFrame(rows_to_add)
    else:
        rows_to_add_df = pd.DataFrame(columns=new_recommendation_df.columns)

    # Concatenate the new rows to new_recommendation_df
    updated_recommendation_df = pd.concat([new_recommendation_df, rows_to_add_df], ignore_index=True)

    return updated_recommendation_df

In [37]:
# Map cat3 and add cat3 level recos
new_cat3_recommendation_df = map_and_add_recommendations_new(seg_pkg_df, cat3_df, recommendation_df, cat3_recommendation_df,top_5_items_cat3)
print(f">> Product+cat3 level df of Length: {len(new_cat3_recommendation_df)} rows")

{'Can Liners.High Density.MISSING': ['11023420', '10961235', '10805274'], 'Can Liners.Low Density Repro.MISSING': ['10779728', '10805844', '10805836'], 'Can Liners.Low Density.MISSING': ['10287922', '10805851', '11119005'], 'Can Liners.Paper.MISSING': ['10329848', '10875994', '10733145'], 'Can Liners.Specialty / Polypropylene.MISSING': ['10454701', '10099567', '10287778'], 'Chemicals.Air Care / Odor Control (Deodorizers, Neutralizers).MISSING': ['10801177', '10307810', '10817129'], 'Chemicals.Bleach / Ammonia.MISSING': ['10624692'], 'Chemicals.Cleaners.MISSING': ['10943393', '10477987', '11079423'], 'Chemicals.Dispensers.MISSING': ['20074842', '20012363', '10847858'], 'Chemicals.Hard Floor Care.MISSING': ['10631239', '10731351', '10089883'], 'Chemicals.Ice Treatments.MISSING': ['10420423', '10080880', '10247011'], 'Chemicals.Laundry Products.MISSING': ['10968242', '10907476', '11144105'], 'Chemicals.Pest Control.MISSING': ['11100655'], 'Chemicals.Polishes / Waxes.MISSING': ['10820474']

In [38]:
new_cat3_recommendation_df

Unnamed: 0,item_cde,Recommendation 1,Recommendation 2,Recommendation 3,Recommendation 4,Recommendation 5,Recommendation 6,Recommendation 7,Recommendation 8,Recommendation 9,...,frequency 6,frequency 7,frequency 8,frequency 9,frequency 10,frequency 11,frequency 12,frequency 13,frequency 14,frequency 15
0,10012415,10012427,10357365,10098323,10012724,10012416,10597676,10943252,10546160,10928677,...,154.0,199.0,153.0,108.0,CAT,184.0,150.0,116.0,155.0,361.0
1,10012416,10012724,10357365,20012592,10012427,11153332,11153329,10012415,10012428,11153335,...,CAT,455.0,366.0,CAT,192.0,101.0,161.0,CAT,105.0,125.0
2,10012418,10357365,20037330,10050648,20077538,10084953,10771735,10801559,10058085,10282034,...,276.0,272.0,274.0,262.0,262.0,424.0,262.0,264.0,262.0,259.0
3,10012422,10098323,10781406,10049812,10928677,10282034,10805288,10532757,10012427,10546160,...,255.0,196.0,412.0,293.0,262.0,404.0,258.0,187.0,509.0,189.0
4,10012427,10012416,10012415,10012724,10357365,10098323,10928677,10049812,10597676,20012592,...,303.0,303.0,166.0,153.0,127.0,CAT,118.0,100.0,106.0,181.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,20129035,10825035,,,10796740,,,10169745,10049812,10574148,...,,,,,,,,,,
1671,20129190,10825035,,,10796740,,,10169745,10049812,10574148,...,,,,,,,,,,
1672,10492547,10287922,10805851,11119005,10943393,10477987,11079423,20062382,20064066,20064065,...,,,,,,,,,,
1673,20036754,11071078,10853740,20021543,20062382,20064066,20064065,20012592,11108727,11153332,...,,,,,,,,,,


# Cat 1 Operations

In [39]:
# Make cat1 level baskets for co-occurance
cat1_basket_365=replace_item_cde_with_cat3_set(flattened_basket_list_365, cat1_df)
print(f">> Made cat1 level baskets for co-occurance size: {len(cat1_basket_365)}")

>> Made cat1 level baskets for co-occurance size: 160239


In [40]:
# Create co-occurance df for cat1 level
cat1_co_occurrence_matrix, cat1_recommendation_df=create_cooccurrence_matrix_with_recommendations_15(cat1_basket_365, top_n=5)
print(f">> Create co-occurance df for cat3 level df Length: {len(cat1_recommendation_df)}")

100%|██████████| 160239/160239 [00:00<00:00, 194327.08it/s]
100%|██████████| 12/12 [00:00<?, ?it/s]

>> Create co-occurance df for cat3 level df Length: 12





In [41]:
cat1_recommendation_df

Unnamed: 0,Recommendation 1,Recommendation 2,Recommendation 3,Recommendation 4,Recommendation 5
Can Liners,"Towels, Tissues",Skincare,Chemicals,Janitorial Supplies,Wipers
Chemicals,Can Liners,Janitorial Supplies,Skincare,"Towels, Tissues",Personal Protection and Safety
Food Service,"Towels, Tissues",Personal Protection and Safety,Can Liners,Skincare,Janitorial Supplies
Janitorial Power Equipment,Chemicals,"Towels, Tissues",Janitorial Supplies,Skincare,Personal Protection and Safety
Janitorial Supplies,Can Liners,"Towels, Tissues",Personal Hygiene,Skincare,Chemicals
Personal Hygiene,Janitorial Supplies,Wipers,"Towels, Tissues",Can Liners,Skincare
Personal Protection and Safety,Can Liners,"Towels, Tissues",Skincare,Chemicals,Janitorial Supplies
Receptacles and Material Handling,Janitorial Supplies,Can Liners,Chemicals,"Towels, Tissues",Skincare
"Retail, Office and School Supplies",Can Liners,"Towels, Tissues",Skincare,Food Service,
Skincare,Can Liners,"Towels, Tissues",Janitorial Supplies,Chemicals,Personal Protection and Safety


In [42]:
def map_and_add_recommendations_cat1_new(df, cat3_df, new_recommendation_df, cat3_recommendation_df, top_5_items_cat3):
    # Create a map from item_cde to Category
    item_to_cat3 = pd.Series(cat3_df['Category'].values, index=cat3_df['item_cde']).to_dict()

    # Create a map from each category to its top item code
    category_to_top5_items = top_5_items_cat3.set_index('cat1')['item_cde'].apply(lambda x: x[:3]).to_dict()
    print(category_to_top5_items)
    # Initialize a list to hold new rows to add
    rows_to_add = []

    # Iterate over each item in df
    for item in df['item_cde']:
        item = str(item)
        category = item_to_cat3.get(item)
        
        # Check if the item is in new_recommendation_df
        if item not in new_recommendation_df['item_cde'].values:
            # Get recommendations for the category
            if category in cat3_recommendation_df.index:
                recommendations = cat3_recommendation_df.loc[category].to_dict()
                new_row = {'item_cde': item}

                # Replace category recommendations with item codes from the map
                for i in range(0, 5):
                    recommendation_col = f'Recommendation {i+1}'
                    recommendation_col1 = f'Recommendation {i*3+1}'
                    recommendation_col2 = f'Recommendation {i*3+2}'
                    recommendation_col3 = f'Recommendation {i*3+3}'
                    cat_recommendation = recommendations.get(recommendation_col)
                    
                    if cat_recommendation in category_to_top5_items:
                        top_items = category_to_top5_items[cat_recommendation]
                        
                        # Safely assign values, ensuring there are enough items in the list
                        if len(top_items) > 0:
                            new_row[recommendation_col1] = top_items[0]
                        if len(top_items) > 1:
                            new_row[recommendation_col2] = top_items[1]
                        if len(top_items) > 2:
                            new_row[recommendation_col3] = top_items[2]
                    else:
                        new_row[recommendation_col] = ''


                rows_to_add.append(new_row)

    # Create a DataFrame for the new rows
    if rows_to_add:
        rows_to_add_df = pd.DataFrame(rows_to_add)
    else:
        rows_to_add_df = pd.DataFrame(columns=new_recommendation_df.columns)

    # Concatenate the new rows to new_recommendation_df
    updated_recommendation_df = pd.concat([new_recommendation_df, rows_to_add_df], ignore_index=True)

    return updated_recommendation_df

In [43]:
# Map cat1 and add cat3 level recos
new_cat1_recommendation_df = map_and_add_recommendations_cat1_new(seg_pkg_df, cat1_df, new_cat3_recommendation_df, cat1_recommendation_df, top_5_items_cat1)
print(f">> Product+cat1+cat3 level df of Length: {len(new_cat1_recommendation_df)} rows")

{'Can Liners': ['11023420', '10779728', '10961235'], 'Chemicals': ['10943393', '10477987', '10762937'], 'Food Service': ['11071078', '10853740', '10219365'], 'Janitorial Power Equipment': ['11131087', '11124327', '10910819'], 'Janitorial Supplies': ['10771731', '10796740', '10771735'], 'Personal Hygiene': ['10941576', '10048312', '10836144'], 'Personal Protection and Safety': ['20064078', '20064079', '20062382'], 'Receptacles and Material Handling': ['10803501', '11117823', '10365174'], 'Retail, Office and School Supplies': ['10660540', '10459246', '10710015'], 'Skincare': ['10943252', '10568358', '11153989'], 'Towels, Tissues': ['10169745', '10421097', '10049812'], 'Wipers': ['10473204', '10528401', '10751809']}
>> Product+cat1+cat3 level df of Length: 1683 rows


# Reorder: priority sustainable, private, alliance

In [44]:
def reorder_alliance_new(df, pvt_a_label_df):
    """
    Reorders the recommendations for each item_cde based on private_label_sw
    and adds columns indicating whether each recommendation is private branded.
    Parameters:
        df: A DataFrame containing 'item_cde' and 'Recommendation 1' to 'Recommendation 15'.
        private_label_df: A DataFrame containing 'item_cde' and 'private_label_sw'.
    Returns:
        reordered_df: A DataFrame with reordered recommendations and private branding info.
    """
    pvt_a_label_df['Item Number'] = pvt_a_label_df['Item Number'].astype(str)

    alliance_brand_dict = {item: 1 if item in pvt_a_label_df['Item Number'].values else 0 for item in df['item_cde']}
    
    print(alliance_brand_dict)
    
    reordered_recommendations = []

    for _, row in df.iterrows():
        item_cde = row['item_cde']
        recommendations = [(row[f'Recommendation {i}'][0] if isinstance(row[f'Recommendation {i}'], list) else row[f'Recommendation {i}'],
        alliance_brand_dict.get(row[f'Recommendation {i}'][0] if isinstance(row[f'Recommendation {i}'], list) else row[f'Recommendation {i}']))
        for i in range(1, 16)]

        
        # Sort recommendations based on private_label_sw ('Y' should come first)
        recommendations.sort(key=lambda x: x[1] != 1)
        
        reordered_row = {'item_cde': item_cde}
        for i, (rec, private_label) in enumerate(recommendations):
            reordered_row[f'Recommendation {i+1}'] = rec
            reordered_row[f'Recommendation {i+1}_alliance'] = private_label
        
        reordered_recommendations.append(reordered_row)
    
    reordered_df = pd.DataFrame(reordered_recommendations)
    return reordered_df

In [45]:
alliance_df=pd.read_excel("data/Alliance + PB Scope - AB.xlsx")

# Re-order private items to top
reorder_private_df1 = reorder_alliance_new(new_cat1_recommendation_df, alliance_df)
print(f">> Re-ordered the private label data containing {len(reorder_private_df1)} rows")

{'10012415': 0, '10012416': 0, '10012418': 0, '10012422': 0, '10012427': 0, '10012428': 0, '10012628': 0, '10012665': 0, '10012712': 0, '10012724': 0, '10030346': 1, '10031312': 1, '10031380': 1, '10031381': 1, '10032870': 1, '10032876': 1, '10032881': 1, '10032974': 1, '10041829': 1, '10041874': 1, '10041936': 1, '10041937': 1, '10042222': 1, '10042223': 1, '10042505': 1, '10042582': 0, '10042666': 0, '10042733': 1, '10042792': 1, '10042795': 1, '10042826': 1, '10042855': 1, '10042856': 1, '10042857': 1, '10042907': 0, '10043018': 0, '10043023': 0, '10043046': 0, '10043110': 1, '10043111': 1, '10043114': 1, '10043208': 1, '10043210': 1, '10043214': 1, '10043271': 0, '10043489': 1, '10047925': 0, '10048312': 0, '10049455': 1, '10049547': 1, '10049548': 1, '10049556': 1, '10049564': 1, '10049575': 0, '10049648': 1, '10049653': 1, '10049674': 1, '10049675': 1, '10049681': 1, '10049683': 1, '10049693': 1, '10049694': 1, '10049812': 1, '10050648': 0, '10050677': 1, '10050907': 1, '10050908

In [46]:
reorder_private_df1

Unnamed: 0,item_cde,Recommendation 1,Recommendation 1_alliance,Recommendation 2,Recommendation 2_alliance,Recommendation 3,Recommendation 3_alliance,Recommendation 4,Recommendation 4_alliance,Recommendation 5,...,Recommendation 11,Recommendation 11_alliance,Recommendation 12,Recommendation 12_alliance,Recommendation 13,Recommendation 13_alliance,Recommendation 14,Recommendation 14_alliance,Recommendation 15,Recommendation 15_alliance
0,10012415,10098323,1,10943252,1.0,10546160,1.0,10928677,1.0,10097190,...,10357365,0.0,10012724,0.0,10012416,0.0,10597676,0.0,10659222,0.0
1,10012416,10098323,1,10097190,1.0,10943252,1.0,10546160,1.0,10012724,...,10012415,0.0,10012428,0.0,11153335,0.0,20001899,0.0,10597676,0.0
2,10012418,20037330,1,20077538,1.0,10771735,1.0,10941576,1.0,20062456,...,10058085,0.0,10282034,0.0,10801628,0.0,10187330,0.0,10659222,0.0
3,10012422,10098323,1,10781406,1.0,10049812,1.0,10928677,1.0,10805288,...,10532757,0.0,10012427,0.0,11119999,0.0,10624692,0.0,10474067,0.0
4,10012427,10098323,1,10928677,1.0,10049812,1.0,10943252,1.0,10097190,...,10012724,0.0,10357365,0.0,10597676,0.0,20012592,0.0,10659222,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,20031557,10943393,1,10169745,1.0,10421097,1.0,10049812,1.0,10771731,...,10762937,0.0,10796740,0.0,20064078,0.0,20064079,0.0,20062382,0.0
1679,20034349,11023420,1,10779728,1.0,10961235,1.0,10169745,1.0,10421097,...,10943393,1.0,10048312,0.0,10836144,0.0,10477987,0.0,10762937,0.0
1680,20062885,10943393,1,10169745,1.0,10421097,1.0,10049812,1.0,10771731,...,10762937,0.0,10796740,0.0,20064078,0.0,20064079,0.0,20062382,0.0
1681,20078473,11023420,1,10779728,1.0,10961235,1.0,10169745,1.0,10421097,...,10771731,1.0,10771735,1.0,10477987,0.0,10762937,0.0,10796740,0.0


In [47]:
conn = connect_db()
print(">> Connected to database")

>> Connected to database


In [48]:
# Get private label information
private_l_df = fetch_private_label_data(conn, df)
print(f">> Fetched the private label data containing {len(private_l_df)} rows")

>> Fetched the private label data containing 4291 rows


In [49]:
# Re-order private items to top
reorder_private_df2 = reorder_private(reorder_private_df1, private_l_df)
print(f">> Re-ordered the private label data containing {len(reorder_private_df2)} rows")

>> Re-ordered the private label data containing 1683 rows


In [50]:
reorder_private_df2

Unnamed: 0,item_cde,Recommendation 1,Recommendation 1_private,Recommendation 2,Recommendation 2_private,Recommendation 3,Recommendation 3_private,Recommendation 4,Recommendation 4_private,Recommendation 5,...,Recommendation 11,Recommendation 11_private,Recommendation 12,Recommendation 12_private,Recommendation 13,Recommendation 13_private,Recommendation 14,Recommendation 14_private,Recommendation 15,Recommendation 15_private
0,10012415,10098323,N,10943252,N,10546160,N,10928677,N,10097190,...,10357365,N,10012724,N,10012416,N,10597676,N,10659222,N
1,10012416,10098323,N,10097190,N,10943252,N,10546160,N,10012724,...,10012415,N,10012428,N,11153335,N,20001899,N,10597676,N
2,10012418,20037330,Y,20077538,Y,10941576,Y,20062456,Y,10771735,...,10058085,N,10282034,N,10801628,N,10187330,N,10659222,N
3,10012422,10805288,Y,20037330,Y,10098323,N,10781406,N,10049812,...,10532757,N,10012427,N,11119999,N,10624692,N,10474067,N
4,10012427,10098323,N,10928677,N,10049812,N,10943252,N,10097190,...,10012724,N,10357365,N,10597676,N,20012592,N,10659222,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,20031557,10943393,N,10169745,N,10421097,N,10049812,N,10771731,...,10762937,N,10796740,N,20064078,N,20064079,N,20062382,N
1679,20034349,10961235,Y,10941576,Y,11023420,N,10779728,N,10169745,...,10943393,N,10048312,N,10836144,N,10477987,N,10762937,N
1680,20062885,10943393,N,10169745,N,10421097,N,10049812,N,10771731,...,10762937,N,10796740,N,20064078,N,20064079,N,20062382,N
1681,20078473,10961235,Y,11023420,N,10779728,N,10169745,N,10421097,...,10771731,N,10771735,N,10477987,N,10762937,N,10796740,N


In [51]:
conn = connect_db()
print(">> Connected to database")

>> Connected to database


In [52]:
# Get sustainable label information
private_l_df = fetch_sustainability_data(conn, df)
print(f">> Fetched the private label data containing {len(private_l_df)} rows")

>> Fetched the private label data containing 1836 rows


In [53]:
private_l_df['sustainable'][0]

'true'

In [54]:
def reorder_sustainable_new(df, sustainable_df):
    """
    Reorders the recommendations for each item_cde based on private_label_sw
    and adds columns indicating whether each recommendation is private branded.
    Parameters:
        df: A DataFrame containing 'item_cde' and 'Recommendation 1' to 'Recommendation 15'.
        private_label_df: A DataFrame containing 'item_cde' and 'private_label_sw'.
    Returns:
        reordered_df: A DataFrame with reordered recommendations and private branding info.
    """
    private_label_dict = dict(zip(sustainable_df['item_cde'], sustainable_df['sustainable']))
    
    reordered_recommendations = []

    for _, row in df.iterrows():
        item_cde = row['item_cde']
        recommendations = [(row[f'Recommendation {i}'], private_label_dict.get(row[f'Recommendation {i}'], 'f')) 
                           for i in range(1, 16)]
        
        # Sort recommendations based on private_label_sw ('Y' should come first)
        recommendations.sort(key=lambda x: x[1] != 'true')
        
        reordered_row = {'item_cde': item_cde}
        for i, (rec, private_label) in enumerate(recommendations):
            reordered_row[f'Recommendation {i+1}'] = rec
            reordered_row[f'Recommendation {i+1}_private'] = private_label
        
        reordered_recommendations.append(reordered_row)
    
    reordered_df = pd.DataFrame(reordered_recommendations)
    return reordered_df

In [55]:
# Re-order private items to top
reorder_private_df3 = reorder_sustainable_new(reorder_private_df2, private_l_df)
print(f">> Re-ordered the private label data containing {len(reorder_private_df2)} rows")

>> Re-ordered the private label data containing 1683 rows


In [56]:
reorder_private_df3

Unnamed: 0,item_cde,Recommendation 1,Recommendation 1_private,Recommendation 2,Recommendation 2_private,Recommendation 3,Recommendation 3_private,Recommendation 4,Recommendation 4_private,Recommendation 5,...,Recommendation 11,Recommendation 11_private,Recommendation 12,Recommendation 12_private,Recommendation 13,Recommendation 13_private,Recommendation 14,Recommendation 14_private,Recommendation 15,Recommendation 15_private
0,10012415,10098323,true,10943252,true,10546160,true,10097190,true,10049812,...,10357365,f,10012724,f,10012416,f,10597676,f,10659222,f
1,10012416,10098323,true,10097190,true,10943252,true,10546160,true,10012724,...,10012415,f,10012428,f,11153335,f,20001899,f,10597676,f
2,10012418,20037330,true,20077538,true,20062456,true,10395151,true,10941576,...,10058085,f,10282034,f,10801628,f,10187330,f,10659222,f
3,10012422,10805288,true,20037330,true,10098323,true,10049812,true,10546160,...,10532757,f,10012427,f,11119999,f,10624692,f,10474067,f
4,10012427,10098323,true,10049812,true,10943252,true,10097190,true,10546160,...,10012724,f,10357365,f,10597676,f,20012592,f,10659222,f
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,20031557,10169745,true,10421097,true,10049812,true,10943252,true,10568358,...,10762937,f,10796740,f,20064078,f,20064079,f,20062382,f
1679,20034349,10169745,true,10421097,true,10049812,true,10943252,true,10568358,...,10779728,f,10943393,f,10048312,f,10836144,f,10762937,f
1680,20062885,10169745,true,10421097,true,10049812,true,10943252,true,10568358,...,10762937,f,10796740,f,20064078,f,20064079,f,20062382,f
1681,20078473,10169745,true,10421097,true,10049812,true,10943252,true,10568358,...,10943393,f,10771731,f,10771735,f,10762937,f,10796740,f


In [57]:
#Select columns
reorder_df=reorder_private_df3[['item_cde','Recommendation 1','Recommendation 2','Recommendation 3','Recommendation 4','Recommendation 5', 'Recommendation 6','Recommendation 7','Recommendation 8','Recommendation 9','Recommendation 10']]

In [58]:
# Remove duplicate recommendations and self-references
reorder_df_cleaned = remove_duplicate_and_self_references(reorder_df)
print(f">>Removed duplicate recommendations and self-references data containing {len(reorder_df_cleaned)} rows")

>>Removed duplicate recommendations and self-references data containing 1683 rows


In [59]:
def shift_recommendations_left_new(reorder_with_desc_cleaned):
    """
    Push non-empty recommendations left when there are empty slots before them.
    
    Parameters:
        reorder_with_desc_cleaned: A DataFrame containing columns for item codes and their recommendations.
    
    Returns:
        A DataFrame with non-empty recommendations shifted left.
    """
    recommendation_cols = [
        'Recommendation 1', 'Recommendation 2', 
        'Recommendation 3', 'Recommendation 4', 'Recommendation 5',
        'Recommendation 6', 'Recommendation 7', 
        'Recommendation 8', 'Recommendation 9', 'Recommendation 10'
    ]

    def shift_left(row):
        # Extract recommendations into a list
        recommendations = [row[col] for col in recommendation_cols]
        
        # Filter out empty recommendations
        recommendations = [rec for rec in recommendations if pd.notnull(rec)]
        
        # Extend the list to maintain the length of the recommendation columns
        while len(recommendations) < len(recommendation_cols):
            recommendations.append(None)
        
        return pd.Series(recommendations, index=recommendation_cols)
    
    # Apply the function to each row
    reorder_with_desc_cleaned[recommendation_cols] = reorder_with_desc_cleaned.apply(shift_left, axis=1)

    return reorder_with_desc_cleaned

In [60]:
#Shift non empty recommendations to left
reorder_shift_left=shift_recommendations_left_new(reorder_df_cleaned)
print(f">>Shift non empty recommendations data containing {len(reorder_shift_left)} rows")

>>Shift non empty recommendations data containing 1683 rows


# Atleast 8 recommendation

In [61]:
#Check # of rows with less trhan 2 recommendation
rows_with_0_or_1_recommendation = copy_rows_with_0_or_1_recommendation(reorder_shift_left)
print(f">>Items with less than 2 recommendations are Length: {len(rows_with_0_or_1_recommendation)} rows")

>>Items with less than 2 recommendations are Length: 13 rows


In [62]:
rows_with_0_to_2_recommendation = copy_rows_with_0_to_2_recommendation(reorder_shift_left)
print(f">>Items with less than 3 recommendations are Length: {len(rows_with_0_to_2_recommendation)} rows")

>>Items with less than 3 recommendations are Length: 1683 rows


In [63]:
#Make atleast 2 recommendations
#updated_reorder_df = add_recommendations2(reorder_shift_left, cat3_df, top_5_items_cat3, cat1_df, top_5_items_cat1)
#print(f">>Atleast two recommendations added for data with {len(updated_reorder_df)} rows")

In [64]:
def minimum_three_recommendations_new(reorder_df, cat3_df, top_5_items_cat3, cat1_df, top_5_items_cat1):
    # Create dictionaries to map item codes to categories
    item_to_category_dict_cat3 = dict(zip(cat3_df['item_cde'], cat3_df['Category']))
    item_to_category_dict_cat1 = dict(zip(cat1_df['item_cde'], cat1_df['Category']))

    # Define the recommendation columns
    recommendation_cols = ['Recommendation 1', 'Recommendation 2', 'Recommendation 3', 'Recommendation 4', 'Recommendation 5',
    'Recommendation 6', 'Recommendation 7', 'Recommendation 8', 'Recommendation 9', 'Recommendation 10']

    # Function to find rows with fewer than 3 recommendations
    def copy_rows_with_fewer_than_3_recommendations(df):
        def count_non_none(row):
            return sum(pd.notna(row[col]) for col in recommendation_cols)
        return df[df.apply(count_non_none, axis=1) < 8]

    # Filter rows that need more recommendations
    rows_with_fewer_than_3_recommendations = copy_rows_with_fewer_than_3_recommendations(reorder_df)

    for idx, row in rows_with_fewer_than_3_recommendations.iterrows():
        current_item = row['item_cde']
        category_cat3 = item_to_category_dict_cat3.get(current_item)
        category_cat1 = item_to_category_dict_cat1.get(current_item)
        
        if category_cat3:
            # Get top items for the category from cat3
            top_items_cat3 = top_5_items_cat3[top_5_items_cat3['cat'] == category_cat3]['item_cde'].values
            # Get top items for the category from cat1
            top_items_cat1 = top_5_items_cat1[top_5_items_cat1['cat1'] == category_cat1]['item_cde'].values if category_cat1 else []
            
            # Flatten the lists if they are not already
            if top_items_cat3 and isinstance(top_items_cat3[0], list):
                top_items_cat3 = [item for sublist in top_items_cat3 for item in sublist]

            if top_items_cat1 and isinstance(top_items_cat1[0], list):
                top_items_cat1 = [item for sublist in top_items_cat1 for item in sublist]

            # Concatenate the top items lists
            combined_top_items = list(top_items_cat3) + list(top_items_cat1)
            
            # Ensure items are unique and not the current item
            unique_top_items = []
            seen_items = set()
            for item in combined_top_items:
                if item != current_item and item not in seen_items:
                    unique_top_items.append(item)
                    seen_items.add(item)
            
            # Fill recommendations to ensure at least 3
            filled_recommendations = []
            for col in recommendation_cols:
                if pd.notna(row[col]) and row[col] not in filled_recommendations:
                    filled_recommendations.append(row[col])
            
            # Add additional items to fill up to 3 recommendations
            for item in unique_top_items:
                if len(filled_recommendations) >= 8:
                    break
                if item not in filled_recommendations:
                    filled_recommendations.append(item)
            
            # Update the recommendations in the DataFrame
            for i, recommendation in enumerate(filled_recommendations[:8]):
                reorder_df.at[idx, recommendation_cols[i]] = recommendation

    return reorder_df

In [65]:
updated_reorder_df = minimum_three_recommendations_new(reorder_df, cat3_df, top_5_items_cat3, cat1_df, top_5_items_cat1)
print(f">>Atleast three recommendations added for data with {len(updated_reorder_df)} rows")

>>Atleast three recommendations added for data with 1683 rows


In [66]:
updated_reorder_df

Unnamed: 0,item_cde,Recommendation 1,Recommendation 2,Recommendation 3,Recommendation 4,Recommendation 5,Recommendation 6,Recommendation 7,Recommendation 8,Recommendation 9,Recommendation 10
0,10012415,10098323,10943252,10546160,10097190,10049812,10942870,10396065,10928677,10778416,10012427
1,10012416,10098323,10097190,10943252,10546160,10012724,10357365,20012592,10012427,11153332,11153329
2,10012418,20037330,20077538,20062456,10395151,10941576,10771735,10357365,10050648,10084953,10801559
3,10012422,10805288,20037330,10098323,10049812,10546160,10396065,10781406,10928677,20003479,10282034
4,10012427,10098323,10049812,10943252,10097190,10546160,10396065,10042733,10928677,10012416,10012415
...,...,...,...,...,...,...,...,...,...,...,...
1678,20031557,10169745,10421097,10049812,10943252,10568358,11153989,10477987,10943393,10771731,10771735
1679,20034349,10169745,10421097,10049812,10943252,10568358,11153989,10477987,10961235,10941576,11023420
1680,20062885,10169745,10421097,10049812,10943252,10568358,11153989,10477987,10943393,10771731,10771735
1681,20078473,10169745,10421097,10049812,10943252,10568358,11153989,10477987,10961235,11023420,10779728


# Check if updated df has any less than 2 reco rows

In [67]:
rows_with_0_to_2_recommendation = copy_rows_with_0_to_2_recommendation(updated_reorder_df)
print(f">>Length of data with less than 2 recommendations {len(rows_with_0_to_2_recommendation)} rows")

>>Length of data with less than 2 recommendations 1683 rows


In [68]:
# Add unique check column to ensure every recommendation is unique in a given row
updated_reorder_df['unique_check'] = updated_reorder_df.apply(are_values_unique, axis=1)
print(f">>Unique check column added for data with {len(updated_reorder_df)} rows")

>>Unique check column added for data with 1683 rows


In [69]:
#updated_reorder_df.to_csv('unique_updated_reorder_df_min2_recos.csv')

In [70]:
def transform_recommendations_new(df):
    # Initialize an empty list to store the new rows
    rows = []
    
    # Iterate over each row in the original DataFrame
    for _, row in df.iterrows():
        item_cde = row['item_cde']
        # Iterate through each recommendation
        for i in range(1, 11):
            recommendation = row[f'Recommendation {i}']
            # Append the new row to the list
            rows.append({'Primary Item Number': item_cde, 'Related Item Number': recommendation})
    
    # Convert the list to a new DataFrame
    transformed_df = pd.DataFrame(rows)
    return transformed_df

In [71]:
# Transform the recommendations row-wise
transformed_df = transform_recommendations_new(updated_reorder_df)
print(f">> Transform the recommendations, new Length {len(transformed_df)} rows, which is 10 times {len(new_cat1_recommendation_df)} (Prev length)")

>> Transform the recommendations, new Length 16830 rows, which is 10 times 1683 (Prev length)


In [72]:
# Remove empty items from related items column
nonempty_transformed_df=remove_empty_related_items(transformed_df)
print(f">>Data after removing empty related items have {len(nonempty_transformed_df)} rows")

>>Data after removing empty related items have 16640 rows


In [73]:
#nonempty_transformed_df.to_csv('nonprint_minimum2_recommendations.csv', index=False)

In [74]:
nonempty_transformed_df.to_csv('results_mar25/fs_marketbasket_dc.csv',index=False)

PermissionError: [Errno 13] Permission denied: 'results_mar25/fs_marketbasket_dc.csv'

In [None]:
# Add item descriptions
updated_reorder_df_with_desc=add_descriptions(item_desc_df,updated_reorder_df)
print(f">>Added item descriptions to data containing {len(updated_reorder_df_with_desc)} rows")

>>Added item descriptions to data containing 96 rows
