## Common Ingredients in Different Nutrition Values

Each nutrition values here has 3 different grades (Low, Moderate, High):
- Fat
- Saturated fat
- Salt
- Sugar

Is there any common ingredients for each values in each grades?

In [None]:
# Code configurations
CONNECTION_STRING = "mongodb://192.168.56.102:27017"
DATABASE_NAME = "nus"
COLLECTION_NAME = "openfood"

In [None]:
from pymongo import MongoClient
from bson.code import Code
import plotly, pymongo, string, unicodecsv, re, itertools
plotly.offline.init_notebook_mode()
from plotly.graph_objs import Bar

client = MongoClient(CONNECTION_STRING)
db = client[DATABASE_NAME]
openfood = db[COLLECTION_NAME]

Read in the translated ingredients for easy lookup.

In [None]:
translate = {}
with open("ingredients_translated.txt", "r") as f:
    for line in f:
        foreign_lang, eng_lang = line.split('\t')
        translate[foreign_lang] = eng_lang

In [None]:
try:
    print translate["sel"]
except:
    print "Translation not found!"

Extracting all the ingredients for all low fat food.

In [None]:
curr = openfood.find({"nutrient_levels.fat": "low"})
total_ingredient_count = 0
translated_ingredient_count = 0
# limit = 10

for food in curr:
#     limit -= 1
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
#             print "Cannot find:", processed_ingredient_names[0]
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
    
#     print ingredients
#     if not limit:
#         break
    with open('low_fat_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm and the data as it is, the result is as follows:
```
python Apriori-master\apriori.py -f low_fat_ingredients.csv -s 0.08 -c 0.05
item: ('antioxidant',) , 0.082
item: ('citric acid',) , 0.112
item: ('water', 'salt') , 0.114
item: ('acidifiant',) , 0.115
item: ('salt',) , 0.195
item: ('water',) , 0.218
item: ('sugar',) , 0.227

------------------------ RULES:
Rule: ('water',) ==> ('salt',) , 0.523
Rule: ('salt',) ==> ('water',) , 0.586
```

Since `water` is a common ingredient, we eliminate that in our data and run the algorithm again. The result is as follows:
```
python Apriori-master\apriori.py -f low_fat_ingredients_wo_water.csv -s 0.07 -c 0.05
item: ('acidifiant', 'citric acid') , 0.072
item: ('salt', 'sugar') , 0.076
item: ('antioxidant',) , 0.082
item: ('citric acid',) , 0.112
item: ('acidifiant',) , 0.115
item: ('salt',) , 0.195
item: ('sugar',) , 0.227

------------------------ RULES:
Rule: ('sugar',) ==> ('salt',) , 0.334
Rule: ('salt',) ==> ('sugar',) , 0.391
Rule: ('acidifiant',) ==> ('citric acid',) , 0.629
Rule: ('citric acid',) ==> ('acidifiant',) , 0.643
```

Extracting all the ingredients for all low saturated fat food.

In [None]:
curr = openfood.find({"nutrient_levels.saturated-fat": "low"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
    
    with open('low_sat_fat_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm and the data as it is, the result is as follows:
```
python Apriori-master\apriori.py -f low_sat_fat_ingredients.csv -s 0.1 -c 0.05
item: ('water', 'sugar') , 0.106
item: ('citric acid',) , 0.106
item: ('acidifiant',) , 0.117
item: ('salt', 'sugar') , 0.120
item: ('water', 'salt') , 0.168
item: ('sugar',) , 0.243
item: ('water',) , 0.269
item: ('salt',) , 0.280

------------------------ RULES:
Rule: ('water',) ==> ('sugar',) , 0.394
Rule: ('salt',) ==> ('sugar',) , 0.431
Rule: ('sugar',) ==> ('water',) , 0.435
Rule: ('sugar',) ==> ('salt',) , 0.495
Rule: ('salt',) ==> ('water',) , 0.601
Rule: ('water',) ==> ('salt',) , 0.625
```
Similarly, we eliminate `water` in our data and run the algorithm again. The result is as follows:
```
python Apriori-master\apriori.py -f low_sat_fat_ingredients_wo_water.csv -s 0.07 -c 0.05
item: ('Colza oil',) , 0.071
item: ('acidifiant', 'citric acid') , 0.072
item: ('antioxidant',) , 0.094
item: ('citric acid',) , 0.106
item: ('acidifiant',) , 0.117
item: ('salt', 'sugar') , 0.120
item: ('sugar',) , 0.243
item: ('salt',) , 0.280

------------------------ RULES:
Rule: ('salt',) ==> ('sugar',) , 0.431
Rule: ('sugar',) ==> ('salt',) , 0.495
Rule: ('acidifiant',) ==> ('citric acid',) , 0.612
Rule: ('citric acid',) ==> ('acidifiant',) , 0.675
```

Extracting all the ingredients for all low salt food.

In [None]:
curr = openfood.find({"nutrient_levels.salt": "low"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
        
    with open('low_salt_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all low salt food:
```
python Apriori-master\apriori.py -f low_salt_ingredients.csv -s 0.06 -c 0.05
item: ('acidifiant', 'citric acid') , 0.060
item: ('glucose syrup',) , 0.060
item: ('citric acid', 'sugar') , 0.062
item: ('water', 'sugar') , 0.063
item: ('\xc3\xa9mulsifiant', 'sugar') , 0.064
item: ('p\xc3\xa2te de cacao',) , 0.066
item: ('cocoa butter',) , 0.088
item: ('acidifiant',) , 0.089
item: ('citric acid',) , 0.096
item: ('\xc3\xa9mulsifiant',) , 0.103
item: ('water',) , 0.111
item: ('sugar',) , 0.260

------------------------ RULES:
Rule: ('sugar',) ==> ('citric acid',) , 0.237
Rule: ('sugar',) ==> ('water',) , 0.243
Rule: ('sugar',) ==> ('\xc3\xa9mulsifiant',) , 0.247
Rule: ('water',) ==> ('sugar',) , 0.569
Rule: ('\xc3\xa9mulsifiant',) ==> ('sugar',) , 0.621
Rule: ('citric acid',) ==> ('acidifiant',) , 0.624
Rule: ('citric acid',) ==> ('sugar',) , 0.640
Rule: ('acidifiant',) ==> ('citric acid',) , 0.678
```
Remove water from the list:
```
python Apriori-master\apriori.py -f low_salt_ingredients_wo_water.csv -s 0.06 -c 0.05
item: ('acidifiant', 'citric acid') , 0.060
item: ('glucose syrup',) , 0.060
item: ('citric acid', 'sugar') , 0.062
item: ('\xc3\xa9mulsifiant', 'sugar') , 0.064
item: ('cocoa paste',) , 0.066
item: ('cocoa butter',) , 0.088
item: ('acidifiant',) , 0.089
item: ('citric acid',) , 0.096
item: ('\xc3\xa9mulsifiant',) , 0.103
item: ('sugar',) , 0.260

------------------------ RULES:
Rule: ('sugar',) ==> ('citric acid',) , 0.237
Rule: ('sugar',) ==> ('\xc3\xa9mulsifiant',) , 0.247
Rule: ('\xc3\xa9mulsifiant',) ==> ('sugar',) , 0.621
Rule: ('citric acid',) ==> ('acidifiant',) , 0.624
Rule: ('citric acid',) ==> ('sugar',) , 0.640
Rule: ('acidifiant',) ==> ('citric acid',) , 0.678
```

Extracting all the ingredients for all low sugar food.

In [None]:
curr = openfood.find({"nutrient_levels.sugars": "low"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)

    with open('low_sugar_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all low salt food:
```
python Apriori-master\apriori.py -f low_sugar_ingredients.csv -s 0.1 -c 0.05
item: ('antioxidant',) , 0.102
item: ('water', 'sugar') , 0.103
item: ('sunflower oil',) , 0.109
item: ('Tory', 'salt') , 0.112
item: ('Tory',) , 0.122
item: ('salt', 'sugar') , 0.134
item: ('sugar',) , 0.151
item: ('water', 'salt') , 0.274
item: ('water',) , 0.340
item: ('salt',) , 0.482

------------------------ RULES:
Rule: ('salt',) ==> ('Tory',) , 0.232
Rule: ('salt',) ==> ('sugar',) , 0.277
Rule: ('water',) ==> ('sugar',) , 0.304
Rule: ('salt',) ==> ('water',) , 0.568
Rule: ('sugar',) ==> ('water',) , 0.685
Rule: ('water',) ==> ('salt',) , 0.805
Rule: ('sugar',) ==> ('salt',) , 0.885
Rule: ('Tory',) ==> ('salt',) , 0.915
```
Remove water from the list:
```
python Apriori-master\apriori.py -f low_sugar_ingredients_wo_water.csv -s 0.1 -c 0.05
item: ('antioxidant',) , 0.102
item: ('salt', 'Preservative') , 0.105
item: ('sunflower oil',) , 0.109
item: ('Preservative',) , 0.117
item: ('salt', 'sugar') , 0.134
item: ('sugar',) , 0.151
item: ('salt',) , 0.482

------------------------ RULES:
Rule: ('salt',) ==> ('Preservative',) , 0.219
Rule: ('salt',) ==> ('sugar',) , 0.277
Rule: ('sugar',) ==> ('salt',) , 0.885
Rule: ('Preservative',) ==> ('salt',) , 0.904
```

What about the medium level?

In [None]:
curr = openfood.find({"nutrient_levels.fat": "moderate"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
    
    with open('mod_fat_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all moderate fat food:
```
python Apriori-master\apriori.py -f mod_fat_ingredients_wo_water.csv -c 0.1 -s 0.12
item: ('\xc3\xa9mulsifiant',) , 0.122
item: ('preservative',) , 0.124
item: ('Colza oil',) , 0.128
item: ('sunflower oil',) , 0.130
item: ('salt', 'sugar') , 0.217
item: ('sugar',) , 0.324
item: ('salt',) , 0.492

------------------------ RULES:
Rule: ('salt',) ==> ('sugar',) , 0.441
Rule: ('sugar',) ==> ('salt',) , 0.669
```

In [None]:
curr = openfood.find({"nutrient_levels.saturated-fat": "moderate"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
    
    with open('mod_sat_fat_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all moderate saturated fat food:
```
python Apriori-master\apriori.py -f mod_sat_fat_ingredients_wo_water.csv -c 0.1 -s 0.12
item: ('sunflower oil', 'salt') , 0.123
item: ('preservative',) , 0.124
item: ('Colza oil',) , 0.129
item: ('sunflower oil',) , 0.173
item: ('salt', 'sugar') , 0.207
item: ('sugar',) , 0.328
item: ('salt',) , 0.487

------------------------ RULES:
Rule: ('salt',) ==> ('sunflower oil',) , 0.253
Rule: ('salt',) ==> ('sugar',) , 0.426
Rule: ('sugar',) ==> ('salt',) , 0.632
Rule: ('sunflower oil',) ==> ('salt',) , 0.710
```

In [None]:
curr = openfood.find({"nutrient_levels.salt": "moderate"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
        
    with open('mod_salt_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all moderate salt food:
```
python Apriori-master\apriori.py -f mod_salt_ingredients_wo_water.csv -c 0.1 -s 0.11
item: ('farine de _bl\xc3\xa9_',) , 0.110
item: ('salt', 'Colza oil') , 0.118
item: ('\xc3\xa9mulsifiant',) , 0.128
item: ('Colza oil',) , 0.134
item: ('sunflower oil',) , 0.140
item: ('salt', 'sugar') , 0.274
item: ('sugar',) , 0.336
item: ('salt',) , 0.587

------------------------ RULES:
Rule: ('salt',) ==> ('Colza oil',) , 0.202
Rule: ('salt',) ==> ('sugar',) , 0.467
Rule: ('sugar',) ==> ('salt',) , 0.815
Rule: ('Colza oil',) ==> ('salt',) , 0.882
```

In [None]:
curr = openfood.find({"nutrient_levels.sugars": "moderate"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)

    with open('mod_sugar_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all moderate sugar food:
```
python Apriori-master\apriori.py -f mod_sugar_ingredients_wo_water.csv -c 0.1 -s 0.09
item: ('antioxidant',) , 0.091
item: ('salt', 'yeast') , 0.093
item: ('Colza oil',) , 0.097
item: ('citric acid',) , 0.100
item: ('yeast',) , 0.106
item: ('sunflower oil',) , 0.106
item: ('salt', 'sugar') , 0.229
item: ('salt',) , 0.337
item: ('sugar',) , 0.368

------------------------ RULES:
Rule: ('salt',) ==> ('yeast',) , 0.275
Rule: ('sugar',) ==> ('salt',) , 0.622
Rule: ('salt',) ==> ('sugar',) , 0.679
Rule: ('yeast',) ==> ('salt',) , 0.876
```

In [None]:
curr = openfood.find({"nutrient_levels.fat": "high"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
    
    with open('high_fat_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all high fat food:
```
python Apriori-master\apriori.py -f high_fat_ingredients_wo_water.csv -c 0.1 -s 0.11
item: ('cocoa butter', '\xc3\xa9mulsifiant') , 0.111
item: ('\xc3\xa9mulsifiant', 'sugar') , 0.114
item: ('cocoa butter', 'cocoa paste') , 0.120
item: ('water',) , 0.128
item: ('cocoa paste',) , 0.130
item: ('cocoa butter',) , 0.173
item: ('salt', 'sugar') , 0.174
item: ('\xc3\xa9mulsifiant',) , 0.177
item: ('sugar',) , 0.276
item: ('salt',) , 0.432

------------------------ RULES:
Rule: ('salt',) ==> ('sugar',) , 0.402
Rule: ('sugar',) ==> ('\xc3\xa9mulsifiant',) , 0.414
Rule: ('\xc3\xa9mulsifiant',) ==> ('cocoa butter',) , 0.626
Rule: ('sugar',) ==> ('salt',) , 0.631
Rule: ('cocoa butter',) ==> ('\xc3\xa9mulsifiant',) , 0.642
Rule: ('\xc3\xa9mulsifiant',) ==> ('sugar',) , 0.643
Rule: ('cocoa butter',) ==> ('cocoa paste',) , 0.694
Rule: ('cocoa paste',) ==> ('cocoa butter',) , 0.926
```

In [None]:
curr = openfood.find({"nutrient_levels.saturated-fat": "high"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
    
    with open('high_sat_fat_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all high saturated fat food:
```
python Apriori-master\apriori.py -f high_sat_fat_ingredients_wo_water.csv -c 0.1 -s 0.14
item: ('\xc3\xa9mulsifiant', 'sugar') , 0.142
item: ('cocoa butter',) , 0.178
item: ('water',) , 0.182
item: ('salt', 'sugar') , 0.187
item: ('\xc3\xa9mulsifiant',) , 0.209
item: ('sugar',) , 0.317
item: ('salt',) , 0.445

------------------------ RULES:
Rule: ('salt',) ==> ('sugar',) , 0.419
Rule: ('sugar',) ==> ('\xc3\xa9mulsifiant',) , 0.448
Rule: ('sugar',) ==> ('salt',) , 0.589
Rule: ('\xc3\xa9mulsifiant',) ==> ('sugar',) , 0.679
```

In [None]:
curr = openfood.find({"nutrient_levels.salt": "high"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)
        
    with open('high_salt_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all high salt food:
```
python Apriori-master\apriori.py -f high_salt_ingredients_wo_water.csv -c 0.1 -s 0.17
item: ('dextrose',) , 0.170
item: ('antioxidant',) , 0.173
item: ('salt', 'preservative') , 0.192
item: ('salt', 'sugar') , 0.200
item: ('preservative',) , 0.205
item: ('sugar',) , 0.228
item: ('salt',) , 0.670

------------------------ RULES:
Rule: ('salt',) ==> ('preservative',) , 0.286
Rule: ('salt',) ==> ('sugar',) , 0.298
Rule: ('sugar',) ==> ('salt',) , 0.875
Rule: ('preservative',) ==> ('salt',) , 0.934
```

In [None]:
curr = openfood.find({"nutrient_levels.sugars": "high"})
total_ingredient_count = 0
translated_ingredient_count = 0

for food in curr:
    ingredients_list = food['ingredients']
    ingredients = []
    for ingredient in ingredients_list:
        total_ingredient_count += 1
        processed_ingredient_names = re.findall('(\d+ ?(.?g|.?l)?|\D+ ?(.?g|.?l)?)', ingredient['text'])
        processed_ingredient_names = list(itertools.chain.from_iterable(processed_ingredient_names))
        processed_ingredient_names = [x.strip() for x in processed_ingredient_names if (len(x) > 2 and not re.search(r'\d', x))]
        if len(processed_ingredient_names) == 0:
            continue
        try:
            translated_ingredient = translate[processed_ingredient_names[0]][:-1]
        except:
            translated_ingredient = ingredient['text']
            
        if not translated_ingredient == ingredient['text']:
            translated_ingredient_count += 1
            
        ingredients.append(translated_ingredient)

    with open('high_sugar_ingredients.csv', 'ab') as f:
        writer = unicodecsv.writer(f)
        writer.writerow(ingredients)

print "Total ingredient count:", total_ingredient_count
print "Translated ingredient count:", translated_ingredient_count

Using Apriori algorithm, we find similar ingredients in all high sugar food:
```
python Apriori-master\apriori.py -f high_sugar_ingredients_wo_water.csv -c 0.1 -s 0.14
item: ('cocoa paste',) , 0.150
item: ('\xc3\xa9mulsifiant', 'sugar') , 0.167
item: ('cocoa butter',) , 0.184
item: ('salt', 'sugar') , 0.185
item: ('\xc3\xa9mulsifiant',) , 0.222
item: ('salt',) , 0.226
item: ('sugar',) , 0.484

------------------------ RULES:
Rule: ('sugar',) ==> ('\xc3\xa9mulsifiant',) , 0.345
Rule: ('sugar',) ==> ('salt',) , 0.382
Rule: ('\xc3\xa9mulsifiant',) ==> ('sugar',) , 0.750
Rule: ('salt',) ==> ('sugar',) , 0.819
```