In [1]:
import pandas as pd

# Specify the file path
file_path = "C:/Users/maria/OneDrive/Documentos/CS_Clases/Desicion_Modeling/NutriScore_Project/Ahmad_Mayorga-Llano_Khalid.csv"

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

In [2]:
# Specify the columns you want to extract
columns_to_extract = [
    "product_name",
    "energy-kj_100g",
    "sugars_100g",
    "saturated-fat_100g",
    "salt_100g",
    "proteins_100g",
    "fiber_100g",
    "fruits-vegetables-nuts-estimate-from-ingredients_100g",
]


# Extract the specified columns
selected_columns = df[columns_to_extract]
selected_columns

Unnamed: 0,product_name,energy-kj_100g,sugars_100g,saturated-fat_100g,salt_100g,proteins_100g,fiber_100g,fruits-vegetables-nuts-estimate-from-ingredients_100g
0,Multigrano arandanos y sabor fresa,1940.0,29.4,4.41,0.411,5.88,5.88,15.000000
1,Les gourmandises chocolat noir et fruits - Fra...,1958.0,60.0,13.00,0.020,3.30,4.60,23.000000
2,Tortilla Chips - Salted,1979.0,0.8,2.00,0.900,6.60,4.00,0.000000
3,Stevia chocolate,1741.1,14.4,20.20,0.300,11.30,10.20,0.000000
4,Il Pandoro,1695.0,18.0,12.00,0.650,8.40,1.60,0.000000
...,...,...,...,...,...,...,...,...
305,Génoises Orange,1610.0,52.0,6.00,0.150,3.00,2.00,11.000000
306,Chocolat au lait du Pays Alpin,2210.0,59.0,18.00,0.000,6.10,1.80,0.390625
307,Soßenkuchen,1507.0,29.0,0.20,0.050,7.90,2.90,0.000000
308,Soufflé à la cacahuète,2039.0,1.8,7.40,1.430,10.50,0.00,19.000000


In [3]:
#Initialize variables
lambda_thresholds_percentage = [0.5,0.6, 0.7] 
weights = [1,1,1,1,2,2,2]
preference_thresholds_pi = [
    [3350, 34, 10, 2, 2.4, 3, 40 ], # pi_1
    [2345, 23.8, 7, 1.4,7.26, 4.46, 53.3 ], # pi_2
    [1340, 13.6, 4, 0.8, 12.13, 5.93, 66.6], # pi_3
    [335, 3.4, 1, 0.2, 17, 7.4, 80] # pi_4
]

In [4]:
#Get the sum of weights of the item if it surpases the pi threshold
def item_assessment(weights, item_criterias, preference_thresholds_pi):
    
    
    #Adjust weights to be percentage-wise
    adjusted_weights = [weight / sum(weights) for weight in weights]
    #print(adjusted_weights)

    if len(weights) != len(preference_thresholds_pi) or len(item_criterias) != len(preference_thresholds_pi):
        #print(len(weights), len(criteria), len(preference_thresholds_pi))
        raise ValueError("Input lists must have the same length.")

    assessments = [adjusted_weight if item_value >= threshold else 0 for adjusted_weight, item_value, threshold in zip(adjusted_weights, item_criterias, preference_thresholds_pi)]
    #print("Pi", preference_thresholds_pi)
    #print("ITEM CRITERIA", item_criterias)
    #print("Weights", adjusted_weights)
    #print("Assessments", assessments)
    total_assessment = sum(assessments)
    return total_assessment


In [5]:
def item_assessment(weights, item_criterias, preference_thresholds_pi, positive_characteristics=None, negative_characteristics=None):
    # Default positive characteristics
    if positive_characteristics is None:
        positive_characteristics = ["proteins_100g", "fiber_100g", "fruits-vegetables-nuts-estimate-from-ingredients_100g"]

    # Default negative characteristics
    if negative_characteristics is None:
        negative_characteristics = ["energy-kj_100g", "sugars_100g", "saturated-fat_100g", "salt_100g"]

    # Adjust weights to be percentage-wise
    adjusted_weights = [weight / sum(weights) for weight in weights]

    # Check if input lists have the same length
    if len(weights) != len(preference_thresholds_pi) or len(item_criterias) != len(preference_thresholds_pi):
        raise ValueError("Input lists must have the same length.")

    # Assess positive characteristics (unchanged threshold comparison)
    positive_assessments = [
        adjusted_weight if feature in positive_characteristics and value >= threshold else 0
        for adjusted_weight, feature, value, threshold in zip(adjusted_weights, item_criterias.index, item_criterias, preference_thresholds_pi)
    ]

    # Assess negative characteristics (modified threshold comparison)
    negative_assessments = [
        adjusted_weight if feature in negative_characteristics and value <= threshold else 0
        for adjusted_weight, feature, value, threshold in zip(adjusted_weights, item_criterias.index, item_criterias, preference_thresholds_pi)
    ]

    # Combine positive and negative assessments
    combined_assessments = [pos + neg for pos, neg in zip(positive_assessments, negative_assessments)]

    total_assessment = sum(combined_assessments)
    
    #print("Positive Assessments:", positive_assessments, "Negative Assessments:", negative_assessments, "Combined Assessments:", combined_assessments)

    return total_assessment


In [6]:
#Testing variables
# Specify the columns you want to extract
columns_to_test = [
    "energy-kj_100g",
    "sugars_100g",
    "saturated-fat_100g",
    "salt_100g",
    "proteins_100g",
    "fiber_100g",
    "fruits-vegetables-nuts-estimate-from-ingredients_100g",
]


# Extract the specified columns
testing_columns = df[columns_to_test]
testing_columns

testing_weights = [0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0]
testing_item = testing_columns.iloc[0].copy()
testing_pi = [2345, 23.8, 7, 1.4, 8, 2, 15]

testing_item

energy-kj_100g                                           1940.000
sugars_100g                                                29.400
saturated-fat_100g                                          4.410
salt_100g                                                   0.411
proteins_100g                                               5.880
fiber_100g                                                  5.880
fruits-vegetables-nuts-estimate-from-ingredients_100g      15.000
Name: 0, dtype: float64

In [7]:
item_assessment(testing_weights, testing_item, testing_pi)

0.7

In [8]:
def matrix_assessment(weights, df, preference_thresholds_pi):
    # Extract items characteristics
    columns_criterias = [
        "energy-kj_100g",
        "sugars_100g",
        "saturated-fat_100g",
        "salt_100g",
        "proteins_100g",
        "fiber_100g",
        "fruits-vegetables-nuts-estimate-from-ingredients_100g",
    ]

    criterias = df[columns_criterias]

    # Iterate over each list of preference thresholds
    for i, thresholds in enumerate(preference_thresholds_pi, start=1):
        # List to store the results
        assessment_results = []

        # Iterate over each row in the DataFrame
        for index, row in criterias.iterrows():
            # Apply item_assessment to the current row
            result = item_assessment(weights, row, thresholds)

            # Append the result to the list
            assessment_results.append(result)

        # Add a new column to the DataFrame for each set of thresholds
        df[f'assessment_pi_{i}'] = assessment_results

    return df

# Call the function with the DataFrame and list of lists
result_df = matrix_assessment(weights, selected_columns, preference_thresholds_pi)

# Display the resulting DataFrame
result_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'assessment_pi_{i}'] = assessment_results
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'assessment_pi_{i}'] = assessment_results
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'assessment_pi_{i}'] = assessment_results
A value is trying to be set on a copy of a slice from a DataFrame.


Unnamed: 0,product_name,energy-kj_100g,sugars_100g,saturated-fat_100g,salt_100g,proteins_100g,fiber_100g,fruits-vegetables-nuts-estimate-from-ingredients_100g,assessment_pi_1,assessment_pi_2,assessment_pi_3,assessment_pi_4
0,Multigrano arandanos y sabor fresa,1940.0,29.4,4.41,0.411,5.88,5.88,15.000000,0.8,0.5,0.1,0.0
1,Les gourmandises chocolat noir et fruits - Fra...,1958.0,60.0,13.00,0.020,3.30,4.60,23.000000,0.6,0.4,0.1,0.1
2,Tortilla Chips - Salted,1979.0,0.8,2.00,0.900,6.60,4.00,0.000000,0.8,0.4,0.2,0.1
3,Stevia chocolate,1741.1,14.4,20.20,0.300,11.30,10.20,0.000000,0.7,0.7,0.3,0.2
4,Il Pandoro,1695.0,18.0,12.00,0.650,8.40,1.60,0.000000,0.5,0.5,0.1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
305,Génoises Orange,1610.0,52.0,6.00,0.150,3.00,2.00,11.000000,0.5,0.3,0.1,0.1
306,Chocolat au lait du Pays Alpin,2210.0,59.0,18.00,0.000,6.10,1.80,0.390625,0.4,0.2,0.1,0.1
307,Soßenkuchen,1507.0,29.0,0.20,0.050,7.90,2.90,0.000000,0.6,0.5,0.2,0.2
308,Soufflé à la cacahuète,2039.0,1.8,7.40,1.430,10.50,0.00,19.000000,0.6,0.4,0.1,0.1


In [9]:
def PessimisticmajoritySorting(weights, lambda_thresholds, selected_columns, preference_thresholds_pi):
    #gano gano pierdo paro
    
    assessed_items = matrix_assessment(weights, selected_columns, preference_thresholds_pi)
    # Iterate over each lambda threshold
    for lambda_threshold in lambda_thresholds_percentage:
        # Create a list to store the classes
        class_list = []

        # Iterate over each row in the DataFrame
        for index, row in assessed_items.iterrows():
            # Check the conditions and assign the corresponding class
            if row['assessment_pi_4'] > lambda_threshold:
                classification = 'A'
            elif row['assessment_pi_3'] > lambda_threshold:
                classification = 'B'
            elif row['assessment_pi_2'] > lambda_threshold:
                classification = 'C'
            elif row['assessment_pi_1'] > lambda_threshold:
                classification = 'D'
            else:
                classification = 'E'

            # Append the class to the list
            class_list.append(classification)

            # Add a new column with a name based on the current lambda threshold
            column_name = f'Pes_classification_{int(lambda_threshold * 100)}'
        
        assessed_items[column_name] = class_list
        
    return assessed_items

PessimisticmajoritySorting(weights, lambda_thresholds_percentage, selected_columns, preference_thresholds_pi)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'assessment_pi_{i}'] = assessment_results
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  assessed_items[column_name] = class_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  assessed_items[column_name] = class_list


Unnamed: 0,product_name,energy-kj_100g,sugars_100g,saturated-fat_100g,salt_100g,proteins_100g,fiber_100g,fruits-vegetables-nuts-estimate-from-ingredients_100g,assessment_pi_1,assessment_pi_2,assessment_pi_3,assessment_pi_4,Pes_classification_50,Pes_classification_60,Pes_classification_70
0,Multigrano arandanos y sabor fresa,1940.0,29.4,4.41,0.411,5.88,5.88,15.000000,0.8,0.5,0.1,0.0,D,D,D
1,Les gourmandises chocolat noir et fruits - Fra...,1958.0,60.0,13.00,0.020,3.30,4.60,23.000000,0.6,0.4,0.1,0.1,D,D,E
2,Tortilla Chips - Salted,1979.0,0.8,2.00,0.900,6.60,4.00,0.000000,0.8,0.4,0.2,0.1,D,D,D
3,Stevia chocolate,1741.1,14.4,20.20,0.300,11.30,10.20,0.000000,0.7,0.7,0.3,0.2,C,C,E
4,Il Pandoro,1695.0,18.0,12.00,0.650,8.40,1.60,0.000000,0.5,0.5,0.1,0.0,E,E,E
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,Génoises Orange,1610.0,52.0,6.00,0.150,3.00,2.00,11.000000,0.5,0.3,0.1,0.1,E,E,E
306,Chocolat au lait du Pays Alpin,2210.0,59.0,18.00,0.000,6.10,1.80,0.390625,0.4,0.2,0.1,0.1,E,E,E
307,Soßenkuchen,1507.0,29.0,0.20,0.050,7.90,2.90,0.000000,0.6,0.5,0.2,0.2,D,D,E
308,Soufflé à la cacahuète,2039.0,1.8,7.40,1.430,10.50,0.00,19.000000,0.6,0.4,0.1,0.1,D,D,E


In [10]:
def OptimisticmajoritySorting(weights, lambda_thresholds, selected_columns, preference_thresholds_pi):
    #pierdo pierdo gano paro
    assessed_items = matrix_assessment(weights, selected_columns, preference_thresholds_pi)

    # Iterate over each lambda threshold
    for lambda_threshold in lambda_thresholds_percentage:
        # Create a list to store the classes
        class_list = []

        # Iterate over each row in the DataFrame
        for index, row in assessed_items.iterrows():
            # Check the conditions and assign the corresponding class
            if row['assessment_pi_1'] < lambda_threshold:
                classification = 'E'
            elif row['assessment_pi_2'] < lambda_threshold:
                classification = 'D'
            elif row['assessment_pi_3'] < lambda_threshold:
                classification = 'C'
            elif row['assessment_pi_4'] < lambda_threshold:
                classification = 'B'
            else:
                classification = 'A'

            # Append the class to the list
            class_list.append(classification)

            # Add a new column with a name based on the current lambda threshold
            column_name = f'Opt_classification_{int(lambda_threshold * 100)}'
        
        assessed_items[column_name] = class_list
        
    return assessed_items


OptimisticmajoritySorting(weights, lambda_thresholds_percentage, selected_columns, preference_thresholds_pi)

Unnamed: 0,product_name,energy-kj_100g,sugars_100g,saturated-fat_100g,salt_100g,proteins_100g,fiber_100g,fruits-vegetables-nuts-estimate-from-ingredients_100g,assessment_pi_1,assessment_pi_2,assessment_pi_3,assessment_pi_4,Pes_classification_50,Pes_classification_60,Pes_classification_70,Opt_classification_50,Opt_classification_60,Opt_classification_70
0,Multigrano arandanos y sabor fresa,1940.0,29.4,4.41,0.411,5.88,5.88,15.000000,0.8,0.5,0.1,0.0,D,D,D,C,D,D
1,Les gourmandises chocolat noir et fruits - Fra...,1958.0,60.0,13.00,0.020,3.30,4.60,23.000000,0.6,0.4,0.1,0.1,D,D,E,D,D,E
2,Tortilla Chips - Salted,1979.0,0.8,2.00,0.900,6.60,4.00,0.000000,0.8,0.4,0.2,0.1,D,D,D,D,D,D
3,Stevia chocolate,1741.1,14.4,20.20,0.300,11.30,10.20,0.000000,0.7,0.7,0.3,0.2,C,C,E,C,C,C
4,Il Pandoro,1695.0,18.0,12.00,0.650,8.40,1.60,0.000000,0.5,0.5,0.1,0.0,E,E,E,C,E,E
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,Génoises Orange,1610.0,52.0,6.00,0.150,3.00,2.00,11.000000,0.5,0.3,0.1,0.1,E,E,E,D,E,E
306,Chocolat au lait du Pays Alpin,2210.0,59.0,18.00,0.000,6.10,1.80,0.390625,0.4,0.2,0.1,0.1,E,E,E,E,E,E
307,Soßenkuchen,1507.0,29.0,0.20,0.050,7.90,2.90,0.000000,0.6,0.5,0.2,0.2,D,D,E,C,D,E
308,Soufflé à la cacahuète,2039.0,1.8,7.40,1.430,10.50,0.00,19.000000,0.6,0.4,0.1,0.1,D,D,E,D,D,E
