<a href="https://colab.research.google.com/github/darkginka/Diet-Recommanded-System/blob/main/DRS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Project Start**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

In [None]:
####### dataset #######
food_nutrition = pd.read_csv("/content/drive/MyDrive/dataset/food_nutrition.csv")
disease_nutrition = pd.read_csv("/content/drive/MyDrive/dataset/disease_nutrition.csv",encoding='unicode_escape')

In [None]:
#food_nutrition.info()
for col in food_nutrition.columns:
    print(col)
# food_nutrition.head()

In [None]:
# disease_nutrition.info()
disease_nutrition.head()

**Part-1**

In [None]:
####### Methods #######
def get_disease(disease_name):
	if(disease_name not in list(disease_nutrition["disease"])):
		return False
	else:
		return disease_nutrition[disease_nutrition.disease==disease_name]["disease"].values[0]

def get_disease_name(disease_id):
	return disease_nutrition[disease_nutrition.disease_id == disease_id]["disease"].values[0]

def get_disease_id(disease):
	return disease_nutrition[disease_nutrition.disease == disease]["disease_id"].values[0]

def get_disease_ie(disease):
	return disease_nutrition[disease_nutrition.disease == disease]["ineficient_nutritions"].values[0]

In [None]:
####### get-set Data #######
users_disease = input("Enter Disease Name: ")
disease_name = get_disease(users_disease)
if(disease_name==False):
  print("Disease Not Found")
else:
  disease_id = get_disease_id(disease_name)
  i= disease_id-101
  print(disease_nutrition.iloc[i])         
  disease_ie = get_disease_ie(disease_name)
  # print(disease_ie)
#Convert disease-nutritions column value into list
  dis_list = list(disease_ie.split(" "))
#Convert food column into list
  col_list = food_nutrition.columns.values.tolist()
  print(dis_list)
  for ele in dis_list:
    if(ele==""):
      dis_list.remove(ele)
  print(dis_list)

**Part-2**

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
ax = sns.heatmap(food_nutrition.corr(), annot=True)
plt.show()

In [None]:
food_nutrition[['Vitamin_A', 'Vitamin_B12', 'Vitamin_B6', 'Vitamin_C', 
           'Vitamin_E', 'Vitamin_K']].hist(bins=50, figsize=(10,10))
plt.show()

**Preprocessing data**

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
columns_to_cluster = ['Vitamin_A', 'Vitamin_B12', 'Vitamin_B6', 'Vitamin_C', 
           'Vitamin_E', 'Vitamin_K']

In [None]:
#MinMaxScaler
#Transform features by scaling each feature to a given range.
mms = MinMaxScaler()

food_scaled = mms.fit_transform(food_nutrition[columns_to_cluster])
print("Based on: ", food_scaled[0,:])

# user_scaled = mms.fit_transform(food_nutrition[columns_to_cluster])
# print("user scaled: ", user_scaled[0,:])

In [None]:
columns_to_cluster_scaled = ['Vitamin_A', 'Vitamin_B12', 'Vitamin_B6', 'Vitamin_C', 
           'Vitamin_E', 'Vitamin_K']

In [None]:
df_food_scaled = pd.DataFrame(food_scaled, columns=columns_to_cluster_scaled)

In [None]:
ax = sns.heatmap(df_food_scaled.corr(), annot=True)
plt.show()

**Training the model**

In [None]:
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

In [None]:
n_clusters = range(2,21)
ssd = []
sc = []
for n in n_clusters:
    km = KMeans(n_clusters=n, max_iter=300, n_init=10, init='k-means++', random_state=42)
    km.fit(food_scaled)
    preds = km.predict(food_scaled) 
    centers = km.cluster_centers_ 
    ssd.append(km.inertia_) 
    score = silhouette_score(food_scaled, preds, metric='euclidean')
    sc.append(score)
    print("Number of Clusters = {}, Silhouette Score = {}".format(n, score))

In [None]:
plt.plot(n_clusters, ssd, marker='.', markersize=12)
plt.xlabel('Number of clusters')
plt.ylabel('Sum of squared distances')
plt.title('Elbow method for optimal K')
plt.show()

In [None]:
k=6
model = KMeans(n_clusters=k, random_state=42).fit(food_scaled)
pred = model.predict(food_scaled)
print('10 first clusters: ', model.labels_[:10])

**Visualizing the clusters**

In [None]:
df_food_scaled['cluster'] = model.labels_
df_food_scaled['cluster'].value_counts().plot(kind='bar')
plt.xlabel('Cluster')
plt.ylabel('Amount of food')
plt.title('Amount of foods per cluster')
plt.show()

In [None]:
display(df_food_scaled['cluster'].value_counts())
minor_cluster = df_food_scaled['cluster'].value_counts().tail(1)
print("Amount of food in the smallest cluster: ", int(minor_cluster.values))

In [None]:
df_food_joined = pd.concat([food_nutrition,df_food_scaled], axis=1).set_index('cluster')

for cluster in range(k):
    display(df_food_joined.loc[cluster, ['Description']].sample(frac=1).head(10))

**Applying PCA to visualize the clusters**

In [None]:
pca = PCA(n_components=3, random_state=42)
food_pca = pca.fit_transform(food_scaled)
pca.explained_variance_ratio_.sum()

In [None]:
df_pca = pd.DataFrame(food_pca, columns=['C1', 'C2', 'C3'])
df_pca['cluster'] = model.labels_
df_pca.head()

In [None]:
sampled_clusters_pca = pd.DataFrame()

for c in df_pca.cluster.unique():
    df_cluster_sampled_pca = df_pca[df_pca.cluster == c].sample(n=int(minor_cluster), random_state=42)
    sampled_clusters_pca = pd.concat([sampled_clusters_pca,df_cluster_sampled_pca], axis=0)
sampled_clusters_pca.cluster.value_counts()

In [None]:
sns.scatterplot(x='C1', y='C2', hue='cluster', data=sampled_clusters_pca, legend="full", palette='Paired')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('Clusters view using PCA')
plt.show()

**Predicting users clusters**

In [None]:
user_pred = model.predict(food_scaled)
print('10 first users clusters: ', user_pred[:10])

In [None]:
user_cluster = pd.DataFrame(food_scaled, columns=columns_to_cluster_scaled)
user_cluster['cluster'] = user_pred

user_cluster['cluster'].value_counts().plot(kind='bar', color='green')
plt.xlabel('Cluster')
plt.ylabel('Amount of food')
plt.title('Amount of food in the users clusters')
plt.show()

In [None]:
df_user_food_joined = pd.concat([food_nutrition,user_cluster], axis=1).set_index('cluster')
for cluster in user_cluster['cluster'].unique():
    display(df_user_food_joined.loc[cluster, ['Description']].sample(frac=1).head(10))

**Recommending Food**

In [None]:
df_user_food_joined.reset_index(inplace=True)
cluster_pct = df_user_food_joined.cluster.value_counts(normalize=True)*20

if int(cluster_pct.round(0).sum()) < 20:
    cluster_pct[cluster_pct < 0.5] = cluster_pct[cluster_pct < 0.5] + 1.0
    
display(cluster_pct)
print('Total food: ', int(cluster_pct.round(0).sum()))

In [None]:
df_food_joined.reset_index(inplace=True)
df_food_joined.head(3)

In [None]:
df_user_food_joined['cluster_pct'] = df_user_food_joined['cluster'].apply(lambda c: cluster_pct[c])
df_user_food_joined.drop(columns=columns_to_cluster_scaled, inplace=True)
df_user_food_joined.head(3)

In [None]:
final_Food = pd.DataFrame()

for ncluster, pct in cluster_pct.items():
    songs = df_food_joined[df_food_joined['cluster'] == ncluster].sample(n=int(round(pct, 0)))
    final_Food = pd.concat([final_Food,songs], ignore_index=True)
    if len(final_Food) > 20 :
        flag = 20 - len(final_Food)
        final_Food = final_Food[:flag]
final_Food.head(3)

In [None]:
final_Food[['Description']]