In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import os
import random
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Mount Google Drive
drive.mount('/content/drive')

# Define the directory where all files are located
base_dir = "/content/drive/My Drive/durga_project"

# Load metadata
metadata_path = os.path.join(base_dir, "metadata.csv")
metadata = pd.read_csv(metadata_path)

# Filters for 'NORMAL' and 'DEVIANT' groups
filter1 = metadata["group"] == "NORMAL"
filter2 = metadata["group"] == "DEVIANT"

normal_sample_ids = metadata[filter1]["sample-id"].to_numpy()
normal_sample_ids = np.insert(normal_sample_ids, 0, 'taxonomy')

# Load taxonomy data
taxonomy_path = os.path.join(base_dir, "taxonomy_400.csv")
taxonomy = pd.read_csv(taxonomy_path)

# Process taxonomy for NORMAL samples
taxonomy_normal = taxonomy[["taxonomy"] + list(normal_sample_ids[1:])]
taxonomy_normal.set_index("taxonomy", inplace=True)

# Save taxonomy range
taxonomy_range = list(taxonomy_normal.index)

# Calculate population-averaged relative abundances
reference_data = []
for taxon in taxonomy_range:
    ref_data = taxonomy_normal.loc[taxon].str.strip('%').astype(float)
    ref_mean = np.mean(ref_data)
    ref_std = np.std(ref_data)
    ref_max = ref_mean + ref_std
    ref_min = ref_mean - ref_std
    reference_data.append((taxon, ref_mean, ref_std, ref_max, ref_min))

normal_parameters = pd.DataFrame(reference_data, columns=["taxonomy", "mean", "std", "max", "min"])
normal_parameters.set_index("taxonomy", inplace=True)

# Process taxonomy for DEVIANT samples
deviant_sample_ids = metadata[filter2]["sample-id"].to_numpy()
deviant_sample_ids = np.insert(deviant_sample_ids, 0, 'taxonomy')

taxonomy_deviant = taxonomy[["taxonomy"] + list(deviant_sample_ids[1:])]
taxonomy_deviant.iloc[:, 1:] = taxonomy_deviant.iloc[:, 1:].apply(lambda x: x.str.strip('%').astype(float))
taxonomy_deviant.set_index("taxonomy", inplace=True)

# Categorize taxons into 'U', 'O', or 'N'
class_list = []
for taxon in taxonomy_range:
    ref_parameters = normal_parameters.loc[taxon]
    test_val = taxonomy_deviant.loc[taxon].mean()
    if test_val < ref_parameters["min"]:
        class_list.append("U")
    elif test_val > ref_parameters["max"]:
        class_list.append("O")
    else:
        class_list.append("N")

taxonomy_deviant["set_class"] = class_list

# Load nutrient impact matrices
nim_aminoacids_path = os.path.join(base_dir, "nim-aminoacids_400.csv")
nim_aminoacids = pd.read_csv(nim_aminoacids_path)

nim_aminoacidsD_path = os.path.join(base_dir, "nim-aminoacidsD_400.csv")
nim_sugars_path = os.path.join(base_dir, "nim-sugars_400.csv")
nim_vitamins_path = os.path.join(base_dir, "nim-vitamins_400.csv")

nim_aminoacidsD = pd.read_csv(nim_aminoacidsD_path)
nim_sugars = pd.read_csv(nim_sugars_path)
nim_vitamins = pd.read_csv(nim_vitamins_path)

# Merge nutrient matrices
nim_total = nim_aminoacids.set_index('taxonomy')
nim_total = nim_total.join(nim_aminoacidsD.set_index('taxonomy'))
nim_total = nim_total.join(nim_sugars.set_index('taxonomy'))
nim_total = nim_total.join(nim_vitamins.set_index('taxonomy'))

# Nutrient range
nutrients_range = list(nim_total.columns)

# Categorize unbalanced taxons
dict_unbalanced_U = {}
dict_unbalanced_O = {}

for taxon in taxonomy_range:
    if taxonomy_deviant.loc[taxon, "set_class"] == "U":
        dict_unbalanced_U[taxon] = nim_total.loc[taxon].values.tolist()
    elif taxonomy_deviant.loc[taxon, "set_class"] == "O":
        dict_unbalanced_O[taxon] = nim_total.loc[taxon].values.tolist()

# Reward function
def reward(a, dict_unbalanced, list_indices, epsilon):
    temp_product = 1
    ref_nim_list = dict_unbalanced[a]
    for index in list_indices:
        temp_product *= (1 - ref_nim_list[index])
    return 1 if (1 - temp_product) >= epsilon else 0

# Reward nutrient
def reward_nutrient(n, dict_unbalanced_O, dict_unbalanced_U, epsilon_O, epsilon_U):
    sum_U = sum(reward(b, dict_unbalanced_U, [n], epsilon_U) for b in dict_unbalanced_U)
    sum_O = sum(reward(a, dict_unbalanced_O, [n], epsilon_O) for a in dict_unbalanced_O)
    return sum_U - sum_O

# Randomized algorithm
final_score_dict = {}
m_values = [5, 10, 15, 20, 25]
epsilon_O, epsilon_U = 0.9, 0.5

for m in m_values:
    temp_max_length, temp_max_score = 0, -10000
    for _ in range(5000):  # Reduced iterations for runtime efficiency
        temp_score_dict = {}
        for j in range(1, m + 1):
            list_indices = random.sample(range(len(nutrients_range)), j)
            sum_U = sum(reward(b, dict_unbalanced_U, list_indices, epsilon_U) for b in dict_unbalanced_U)
            sum_O = sum(reward(a, dict_unbalanced_O, list_indices, epsilon_O) for a in dict_unbalanced_O)
            tempscore = sum_U - sum_O
            temp_score_dict[len(list_indices)] = tempscore
        key_max = max(temp_score_dict, key=temp_score_dict.get)
        score_max = temp_score_dict[key_max]
        if score_max > temp_max_score:
            temp_max_score = score_max
            temp_max_length = key_max
    final_score_dict[m] = [temp_max_length, temp_max_score]

# Prepare data for deep learning
X = []
y = []

for m, scores in final_score_dict.items():
    feature = [m] + scores
    target = scores[1]
    X.append(feature)
    y.append(target)

X = np.array(X)
y = np.array(y)

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Deep learning model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_split=0.1, verbose=1)

# Evaluate and predict
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Mean Absolute Error: {mae:.2f}")

y_pred = model.predict(X_test)
print("Sample predictions:", y_pred[:5].flatten())
# List all available nutrients
nutrients_range = list(nim_total.columns)
print("Available Nutrients:")
for idx, nutrient in enumerate(nutrients_range):
    print(f"{idx}: {nutrient}")

# Define thresholds for impact score evaluation
GOOD_THRESHOLD = 10
BAD_THRESHOLD = -10

# Function to classify impact score
def evaluate_impact_score(score):
    if score > GOOD_THRESHOLD:
        return "Good Impact"
    elif score < BAD_THRESHOLD:
        return "Bad Impact"
    else:
        return "Neutral Impact"

# Function to take input, predict, and evaluate
def predict_and_evaluate_nutrient_score():
    # List all available nutrients
    nutrients_range = list(nim_total.columns)
    print("Available Nutrients:")
    for idx, nutrient in enumerate(nutrients_range):
        print(f"{idx}: {nutrient}")

    # Prompt user to select nutrients by their index
    selected_indices = input("Enter the indices of selected nutrients (comma-separated): ")
    selected_indices = [int(i) for i in selected_indices.split(",")]

    # Display the selected nutrients
    selected_nutrients = [nutrients_range[i] for i in selected_indices]
    print(f"Selected Nutrients: {selected_nutrients}")

    # Prompt user for additional input values
    m = len(selected_indices)  # Automatically set m to the number of selected nutrients
    nutrient_combination_length = int(input("Enter the length of nutrient combination: "))
    max_score = float(input("Enter the maximum nutrient score: "))

    # Prepare the input feature
    input_feature = np.array([[m, nutrient_combination_length, max_score]])

    # Normalize the input feature using the scaler
    input_feature_scaled = scaler.transform(input_feature)

    # Make prediction
    prediction = model.predict(input_feature_scaled)
    impact_score = prediction[0][0]

    # Evaluate the impact score
    evaluation = evaluate_impact_score(impact_score)

    print(f"Predicted Nutrient Impact Score: {impact_score:.2f}")
    print(f"Impact Evaluation: {evaluation}")

# Call the function to get input, predict, and evaluate
predict_and_evaluate_nutrient_score()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  taxonomy_deviant.iloc[:, 1:] = taxonomy_deviant.iloc[:, 1:].apply(lambda x: x.str.strip('%').astype(float))
  taxonomy_deviant["set_class"] = class_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  taxonomy_deviant["set_class"] = class_list


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.0047 - mae: 0.0423 - val_loss: 0.0014 - val_mae: 0.0381
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step - loss: 0.0044 - mae: 0.0476 - val_loss: 0.0013 - val_mae: 0.0363
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 6.2481e-04 - mae: 0.0201 - val_loss: 7.9276e-04 - val_mae: 0.0282
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - loss: 5.5603e-04 - mae: 0.0195 - val_loss: 4.8620e-04 - val_mae: 0.0220
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - loss: 0.0013 - mae: 0.0247 - val_loss: 4.8763e-04 - val_mae: 0.0221
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step - loss: 0.0022 - mae: 0.0315 - val_loss: 6.6014e-04 - val_mae: 0.0257
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - loss: 