In [1]:
import pandas as pd

def merge_related_items(df1, df2):
    """
    Merges two files containing Primary Item Numbers and Related Item Numbers,
    ensuring that related items from file2 appear immediately after file1 for each Primary Item Number.

    Parameters:
        file1 (str): Path to the first CSV/Excel file.
        file2 (str): Path to the second CSV/Excel file.

    Returns:
        pd.DataFrame: Merged DataFrame with interleaved related items.
    """
  

    # Group by Primary Item Number
    grouped_df1 = df1.groupby("Primary Item Number")
    grouped_df2 = df2.groupby("Primary Item Number")

    merged_data = []

    # Merge in an interleaved fashion
    unique_primary_items = df1["Primary Item Number"].unique()
    for primary_item in unique_primary_items:
        merged_data.append(grouped_df1.get_group(primary_item))  # Add file1's related items
        if primary_item in grouped_df2.groups:
            merged_data.append(grouped_df2.get_group(primary_item))  # Add file2's related items

    # Combine results
    merged_df = pd.concat(merged_data, ignore_index=True)

    return merged_df

# Example usage:
# merged_df = merge_related_items("file1.csv", "file2.csv")
# merged_df.to_csv("merged_output.csv", index=False)


In [2]:
df1=pd.read_csv('results_mar25/pkg_marketbasket_dc.csv')
df2=pd.read_csv('results_mar25/pkg_sustainable.csv')
pkg_df=merge_related_items(df1,df2)
pkg_df.to_csv('results_mar25/pkg_df.csv')

In [3]:
df1=pd.read_csv('results_mar25/fs_marketbasket_dc.csv')
df2=pd.read_csv('results_mar25/fs_sustainable.csv')
pkg_df=merge_related_items(df1,df2)
pkg_df.to_csv('results_mar25/fs_df.csv')

In [4]:
df1=pd.read_csv('results_mar25/print_marketbasket_dc.csv')
df2=pd.read_csv('results_mar25/print_sustainable.csv')
pkg_df=merge_related_items(df1,df2)
pkg_df.to_csv('results_mar25/print_df.csv')

In [5]:
import pandas as pd

def combine_matches(new_df, merged_df):
    """
    Combines related item matches from new_df and merged_df while:
    1. Ensuring new_df matches appear first **only if the Primary Item Number is in merged_df**.
    2. Removing duplicate related items for each Primary Item Number.
    3. Keeping a max of 15 related items per Primary Item Number.

    Parameters:
        new_df (pd.DataFrame): DataFrame with columns 'Item Number',
                               'consumable_match_1_item_cde', 'consumable_match_2_item_cde'.
        merged_df (pd.DataFrame): DataFrame with columns 'Primary Item Number', 'Related Item Number'.

    Returns:
        pd.DataFrame: Combined and cleaned DataFrame.
    """

    # Get unique Primary Item Numbers from merged_df
    valid_primary_items = set(merged_df["Primary Item Number"].unique())

    # Reshape new_df to match merged_df format
    new_matches = new_df.melt(
        id_vars=["Item Number"], 
        value_vars=["consumable_match_1_item_cde", "consumable_match_2_item_cde"],
        var_name="Match Type", 
        value_name="Related Item Number"
    ).drop(columns=["Match Type"]).dropna()

    # Rename column for consistency
    new_matches.rename(columns={"Item Number": "Primary Item Number"}, inplace=True)

    # Filter new_matches to keep only Primary Item Numbers that exist in merged_df
    new_matches = new_matches[new_matches["Primary Item Number"].isin(valid_primary_items)]

    # Combine new_matches and merged_df
    combined_df = pd.concat([new_matches, merged_df], ignore_index=True)

    # Remove duplicates while preserving order
    final_data = []
    for primary_item, group in combined_df.groupby("Primary Item Number"):
        unique_related = list(dict.fromkeys(group["Related Item Number"]))  # Preserve order & remove dups
        trimmed_related = unique_related[:15]  # Keep max 15 related items
        final_data.extend([(primary_item, related) for related in trimmed_related])

    # Create final DataFrame
    final_df = pd.DataFrame(final_data, columns=["Primary Item Number", "Related Item Number"])

    return final_df

# Example usage:
# final_df = combine_matches(new_df, merged_df)
# final_df.to_csv("final_output.csv", index=False)


In [7]:
consumable_df=pd.read_excel('results_mar25/Dispenser-Consumable Matches after Feedback.xlsx')
df_seg=pd.read_csv('results_mar25/print_df.csv')
pkg_df=combine_matches(consumable_df,df_seg)
pkg_df.to_csv('print_refresh_results.csv')

In [8]:
df_seg=pd.read_csv('results_mar25/pkg_df.csv')
pkg_df=combine_matches(consumable_df,df_seg)
pkg_df.to_csv('pkg_refresh_results.csv')

In [9]:
df_seg=pd.read_csv('results_mar25/fs_df.csv')
pkg_df=combine_matches(consumable_df,df_seg)
pkg_df.to_csv('fs_refresh_results.csv')

In [None]:
new_cat1_recommendation_df

Unnamed: 0,item_cde,Recommendation 1,Recommendation 2,Recommendation 3,Recommendation 4,Recommendation 5,Recommendation 6,Recommendation 7,Recommendation 8,Recommendation 9,...,frequency 6,frequency 7,frequency 8,frequency 9,frequency 10,frequency 11,frequency 12,frequency 13,frequency 14,frequency 15
0,10012415,10012427,10357365,10098323,10012724,10012416,10597676,10943252,10546160,10928677,...,154.0,199.0,153.0,108.0,CAT,184.0,150.0,116.0,155.0,361.0
1,10012416,10012724,10357365,20012592,10012427,11153332,11153329,10012415,10012428,11153335,...,CAT,455.0,366.0,CAT,192.0,101.0,161.0,CAT,105.0,125.0
2,10012418,10357365,10050648,20037330,20077538,10084953,10771735,10801559,10058085,10282034,...,276.0,272.0,274.0,262.0,262.0,425.0,262.0,264.0,262.0,259.0
3,10012422,10098323,10781406,10049812,10928677,10282034,10805288,10532757,10012427,10546160,...,255.0,196.0,412.0,293.0,262.0,404.0,258.0,187.0,509.0,189.0
4,10012427,10012416,10012415,10012724,10357365,10098323,10928677,10049812,10597676,20012592,...,303.0,303.0,166.0,153.0,127.0,CAT,118.0,100.0,106.0,181.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1675,20034349,11023420,10779728,10961235,10169745,10421097,10049812,10941576,10048312,10836144,...,,,,,,,,,,
1676,20062885,10943393,10477987,10762937,10169745,10421097,10049812,10771731,10796740,10771735,...,,,,,,,,,,
1677,20078473,11023420,10779728,10961235,10169745,10421097,10049812,10943252,10568358,11153989,...,,,,,,,,,,
1678,20082513,10943393,10477987,10762937,10169745,10421097,10049812,10771731,10796740,10771735,...,,,,,,,,,,
