In [18]:
import pandas as pd


In [19]:
# Load the dataset
file_path = 'ingredient Information.XPT'  # Replace with your file path
dataset = pd.read_sas(file_path)

In [20]:
dataset.head()

Unnamed: 0,DSDPID,DSDSUPP,DSDIID,DSDINGR,DSDOPER,DSDQTY,DSDUNIT,DSDCAT,DSDBLFLG,DSDINGID
0,1.0,b'MYADEC HIGH POTENCY MULTIVITAMIN-MULTIMINERA...,35.0,b'BIOTIN',b'=',30.0,4.0,1.0,2.0,b'10000042'
1,1.0,b'MYADEC HIGH POTENCY MULTIVITAMIN-MULTIMINERA...,45.0,b'BORON',b'=',150.0,4.0,2.0,2.0,b'10000052'
2,1.0,b'MYADEC HIGH POTENCY MULTIVITAMIN-MULTIMINERA...,63.0,b'CALCIUM',b'=',162.0,1.0,2.0,2.0,b'10000070'
3,1.0,b'MYADEC HIGH POTENCY MULTIVITAMIN-MULTIMINERA...,84.0,b'CHROMIUM',b'=',25.0,4.0,2.0,2.0,b'10000093'
4,1.0,b'MYADEC HIGH POTENCY MULTIVITAMIN-MULTIMINERA...,94.0,b'COPPER',b'=',2.0,1.0,2.0,2.0,b'10000103'


In [21]:
#convert byte strings to normal
def safe_decode(x):
    try:
        return x.decode('utf-8') if isinstance(x, bytes) else x
    except UnicodeDecodeError:
        return x.decode('latin-1') if isinstance(x, bytes) else x



# Convert byte strings to regular strings
dataset_cleaned = dataset.applymap(safe_decode)


In [22]:
# Aggregating data by supplements
supplement_profiles = dataset_cleaned.groupby('DSDSUPP').apply(
    lambda x: dict(zip(x['DSDINGR'], x['DSDQTY']))
).reset_index()

# Renaming the columns for clarity
supplement_profiles.columns = ['Supplement', 'IngredientProfile']

In [23]:
# Recommendation function
def recommend_supplements(deficiency, supplement_profiles, top_n=5):

    # Normalize the deficiency input to match dataset format
    deficiency = deficiency.upper()

    # Filter supplements containing the deficiency
    matched_supplements = supplement_profiles[supplement_profiles['IngredientProfile'].apply(
        lambda x: deficiency in x)]

    # Extract relevant information for recommendations
    recommendations = matched_supplements.apply(
        lambda x: (x['Supplement'], x['IngredientProfile'][deficiency]), axis=1)

    # Sort by quantity of the deficient ingredient (if needed) and return top N results
    recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)[:top_n]

    return recommendations

In [27]:
recommendations = recommend_supplements("vitamin d", supplement_profiles)
print(recommendations)

[('VITAMIN D 60000 IU', 60000.0), ('VITAMIN D 50000 IU', 50000.0), ('VITAMIN D 25000 IU', 25000.0), ('DEFAULT DIABETIC MULTIVITAMIN / MULTIMINERAL PACK', 20000.0), ("NATURE'S BOUNTY YOUR LIFE PRESCRIPTIVE FORMULAS MULTIVITAMIN / MULTIMINERAL DIABETES SUPPORT PACKS BALANCED MULTIVITAMIN / MULTIMINERAL ALPHA LIPOIC ACID CINNAMON & CHROMIUM PICOLINATE SELENIUM", 20000.0)]


In [28]:
recommendations = recommend_supplements("biotin", supplement_profiles)
print(recommendations)

[('100 NATURALS BIOTIN 10,000 MCG ADVANCED FORMULA MAXIMUM STRENGTH', 10000.0), ('BIO NUTRITION HEALTHY HAIR WITH BIOTIN 10,000 PLUS ONCE DAILY', 10000.0), ('BIOTIN 10,000 MCG', 10000.0), ('BIOTIN 10,000 MCG UP&UP', 10000.0), ('BIOTIN PLUS KERATIN BIOTIN 10000 MCG KERATIN 100 MG UP&UP', 10000.0)]


In [30]:
recommendations = recommend_supplements("omega-3", supplement_profiles)
print(recommendations)

[('BERKLEY JENSEN FISH OIL 1200 MG PER SOFTGEL', 360.0)]
