In [56]:
import numpy as np
import pandas as pd

In [57]:
food_data = pd.read_csv('preprocessed_food.csv')
disease_data = pd.read_csv('preprocessed_disease.csv')

In [58]:
from sklearn.metrics.pairwise import cosine_similarity

## Cosine Similarity

In [89]:
selected_diseases = ['Hypertension', 'Diabetes(Type-1)', 'Skin Cancer', 'general']

# Create an empty dictionary to store similarity scores for each disease
similarity_scores_dict = {}

for selected_disease in selected_diseases:
    disease_limits = disease_data[disease_data['disease_name'] == selected_disease].iloc[:, 1:].values
    nutrient_columns = food_data.columns[1:]
    food_nutrition = food_data[nutrient_columns].values
    similarity_scores = cosine_similarity(disease_limits, food_nutrition)
    inverted_similarity_scores = 1 - similarity_scores
    top_food_indices = np.argsort(inverted_similarity_scores)[0]

    # Store the similarity scores in the dictionary
    similarity_scores_dict[selected_disease] = inverted_similarity_scores

    print("Top Recommended Foods for", selected_disease)
    for i, idx in enumerate(top_food_indices[:5]):
        food_name = food_data.loc[idx, 'name']
        print(f"{i+1}. {food_name}")
    print()

    print("\nNot Recommended Foods for", selected_disease)
    for i, idx in enumerate(top_food_indices[-5:][::-1]):
        food_name = food_data.loc[idx, 'name']
        print(f"{i+1}. {food_name}")
    print('=' * 50)



Top Recommended Foods for Hypertension
1. Restaurant, without meat or noodles, vegetable chow mein, Chinese
2. Soup, ready to serve, traditional, beef barley, PROGRESSO
3. Soup, ready to serve, beef barley
4. Soup, ready-to-serve, canned, beef and vegetables
5. Soup, ready-to-serve, canned, chunky vegetable


Not Recommended Foods for Hypertension
1. Seasoning mix, coriander & annatto, sazon, dry
2. Salt, table
3. Leavening agents, baking soda
4. Fish oil, fully hydrogenated, menhaden
5. Oil, fully hydrogenated, soy, industrial
Top Recommended Foods for Diabetes(Type-1)
1. Restaurant, without meat or noodles, vegetable chow mein, Chinese
2. Squash, without salt, drained, boiled, cooked, crookneck and straightneck, summer
3. Squash, with salt, drained, boiled, cooked, crookneck and straightneck, summer
4. Soup, ready-to-serve, canned, chicken and vegetable
5. Squash, includes skin, zucchini, summer


Not Recommended Foods for Diabetes(Type-1)
1. Seasoning mix, coriander & annatto, sazon

In [90]:
# Convert the dictionary to a DataFrame
similarity_scores_dict

{'Hypertension': array([[0.83033188, 0.92873466, 0.00226299, ..., 0.16279868, 0.04943411,
         0.04943411]]),
 'Diabetes(Type-1)': array([[0.86963811, 0.92891784, 0.00248732, ..., 0.15985101, 0.04879213,
         0.04879213]]),
 'Skin Cancer': array([[0.90135264, 0.93812756, 0.00318844, ..., 0.16082725, 0.0471918 ,
         0.0471918 ]]),
 'general': array([[0.8303457 , 0.92413273, 0.00233985, ..., 0.16149148, 0.05102753,
         0.05102753]])}

In [91]:
# Flatten the arrays
flattened_dict = {disease: array.flatten() for disease, array in similarity_scores_dict.items()}

# Convert to a DataFrame
similarity_scores_df = pd.DataFrame(flattened_dict)

In [76]:
similarity_scores_df

Unnamed: 0,Hypertension,Diabetes(Type-1),Skin Cancer,general
0,0.830332,0.869638,0.901353,0.830346
1,0.928735,0.928918,0.938128,0.924133
2,0.002263,0.002487,0.003188,0.002340
3,0.799281,0.838257,0.867219,0.800134
4,0.108112,0.121677,0.124822,0.107994
...,...,...,...,...
8181,0.050708,0.050044,0.048484,0.052267
8182,0.117612,0.117246,0.115550,0.119226
8183,0.162799,0.159851,0.160827,0.161491
8184,0.049434,0.048792,0.047192,0.051028


### Export similarity scores

In [94]:
import pickle

with open('similarity_scores.pkl', 'wb') as model_file:
    pickle.dump(similarity_scores_df, model_file)