# Water usage for food production - an analysis of the water usage per item and country

## Packeges:

In [1]:
import pandas as pd
import numpy as np

## Data:

In [None]:
db = pd.DataFrame(pd.read_csv('watertofood_data_21crops_11countries_1960-2016.csv', sep=';', header=1))
db = db.replace(0, np.nan)

In [None]:
#calculate the mean from column 7-63
db['mean'] = db.iloc[:,7:63].mean(axis=1)
db.head()

In [7]:
#create a new database with the mean waterfootprint per product over all countries
temp1 = pd.pivot_table(db, values='mean', index=['Item Category', 'Item Name'], aggfunc=np.mean).reset_index()
temp1

Unnamed: 0,Item Category,Item Name,mean
0,Cereals,Maize,1674.101605
1,Cereals,"Rice, paddy",1788.696999
2,Fruits,"Almonds, with shell",8151.645724
3,Fruits,Apples,438.32907
4,Fruits,Avocados,2009.206797
5,Fruits,Bananas,2227.226955
6,Fruits,Coconuts,3059.23257
7,Fruits,Dates,2561.357534
8,Fruits,Grapes,729.732055
9,Fruits,Lemons and limes,558.691132


In [8]:
#identify the products with the lowest mean waterfootprint per category and safe in the database "subs" (substitute)
#sort decreasing
#group by Item Category
#filter for the last element of every category (the product with the lowest waterfootprint per category)
subs = temp1.sort_values('mean', ascending=False).groupby('Item Category').tail(1)
subs = subs.reset_index().drop(['index'], axis = 1)
subs

Unnamed: 0,Item Category,Item Name,mean
0,Lux-foods,Cocoa beans,24259.055437
1,Cereals,Maize,1674.101605
2,Fruits,Strawberries,429.585905
3,Vegetables,Tomatoes,97.765596


In [9]:
#make a list of the productnames from the dataset
products = temp1['Item Name'].tolist()

### Fragestellungen:
- Produktklasssen: Was sind empfehlenswerte Alternativen für “Lieblingsprodukte”? (Produktklassen einteilen und Empfehlungen für Alternativen geben, z.B. Nüsse)

- Calculator: Wie lässt sich der water footprint für eine Liste an Produkten berechnen?

## Lösung der 1. Fragen:
- Durchschnitt vs. letzter Wert als Referenz? Was haltet ihr von letztem aktuellste Wert?
- Identifiziere pro Kategorie das Produkt mit dem geringsten Wasser Verbrauch
- Abfrage: Was ist dein Lieblingsprodukt?
- Antwort: Für den Anbau deines Lieblingsprudukts wird ... liter Wasser pro Kg? verbraucht. Wenn du stattdessen ... Ausprobierst würdest du ... Liter Wasser pro Kg einsparen. 

In [10]:
Input = input("What is your favorite product? ")
while temp1.loc[temp1['Item Name']==Input].shape[0] == 0:
    print('Das Produkt "{}" ist nicht in unserer Datenbank enthalten. Bitte wähle ein neues aus folgender Liste:{} \n'
         .format(Input, products))
    Input = input("What is your favorite product? ")
else:  
    temp2 = temp1.loc[temp1['Item Name']==Input].iloc[0,-1] #waterfootprint of the product
    temp3 = temp1.loc[temp1['Item Name']==Input].iloc[0,0] #cateogy of the product
    temp4 = subs.loc[subs['Item Category']==temp3].iloc[0,1] #replacement product
    temp5 = subs.loc[subs['Item Category']==temp3].iloc[0,2] #waterfootprint of the replacement product
    temp6 = temp2-temp5 #water saving
    if temp6 == 0:
        print('\nYoure favorite product is already the one with the lowest waterfootprint in the category "{}". '
              .format(temp3))
    else:
        print('\nYour favorite product uses around {:.0f} liters of water in its production and belongs to the Item Category "{}". In this category, "{}" consumes the least. \nIf you were to replace your favorite product with this, you could save about {:.0f} liters of water per kg and you would only use {:.0f} liters of water per kg. '
              .format(temp2, temp3, temp4, temp6, temp5)) #variable values for the text

What is your favorite product? Potatoes

Your favorite product uses around 200 liters of water in its production and belongs to the Item Category "Vegetables". In this category, "Tomatoes" consumes the least. 
If you were to replace your favorite product with this, you could save about 102 liters of water per kg and you would only use 98 liters of water per kg. 


## Lösung der 2. Fragen:
- Aufbauend auf davor
- Abfrage Schleife
    - Welches Produkt möchtest du für deinen Smoothie verwenden?
    - Antwort, falls dieses Produkt nicht in unserer Datenbank enthalten ist "Wir haben für dieses Produkt keine Daten, daher würden wir einen Durchschnittlichen Wasserverbrauch von ... Liter pro Kg für die Berechnung verwenden. Ist das für dich in Ordnung oder möchtest du lieber ein anderes Produkt in deinen Smoothie aufnehmen?
    - Ja/Nein Option
    - Wie viel möchtest du von... in deinen Smoothie in Kg geben?
    - Möchtest du weitere Zutaten in deinen Smoothie geben?
    - Ja/Nein Option
    - Berechnung
    - Ausgabe: Dein Smoothie würde aktuell ... Liter Wasser für die Produktin benötigen
    - Alternativ Produkt aus 1. Vorschlagen
    - Ausgabe: Wenn du statt... lieber ... verwenden würdest, önntest du für deinen Smoothie... Lietr Wasser einsparen. Möchtest du dies ausporbieren?
    - Ja/Nein Option
- Ausgabe: Ok, für den Anbau deiner Smoothie zutaten würden ... Liter Wasser benötigt.

In [11]:
temp2 = 0 #set the waterfootprint of a smoothie with no ingredients to zero
temp10 = 'y' #used for the loop
Ingr = pd.DataFrame([], columns= ['Intgredients', 'Wasserverbrauch', 'Änderung']) #database to safe all the ingredients, the waterfootprint and the possible waterfootprint of the ingredients

while temp10.lower() == 'y':
    Input = input('Which ingredient would you like to add to your smoothie? ') #query of the ingredients
    while temp1.loc[temp1['Item Name']==Input].shape[0] == 0: #is true if the input is no product of the dataset       
        print('The product "{}" does not exist in our database. Please choose a new one from the following list: {} \n'
             .format(Input, products)) #print the list of the possible products
        Input = input('Which ingredient would you like to add to your smoothie? ')
    else:  
        Qty = input('How much of it would you like to add to your smoothie in kg?') #query of the quantity 
        temp3 = float(temp1.loc[temp1['Item Name']==Input].iloc[0,-1])*float(Qty) #used water for the product * quantity
        temp4 = temp1.loc[temp1['Item Name']==Input].iloc[0,0] #category of the input product
        temp5 = subs.loc[subs['Item Category']==temp4].iloc[0,1] #replacement product
        temp6 = subs.loc[subs['Item Category']==temp4].iloc[0,2] #waterfootprint of the replacement product
        temp7 = float(temp3)-float(temp6)*float(Qty) #water saving
        temp2 += temp3 #calculate the waterfootprint of the smoothie
        Ingr.loc[len(Ingr.index)] = [Input, temp3, temp7] #safe the data in the database "Ingr"
        temp10 = input("\nWould you like to add another ingredient to your smoothie?(Y/N) ") #include additional ingredients?
        
else:
    Ingr.sort_values('Änderung', ascending=True).tail(1)
    temp11 = Ingr.sort_values('Änderung', ascending=True).tail(1).iloc[0,0] #product with the highest waterfootprint in the smoothie
    temp12 = Ingr.sort_values('Änderung', ascending=True).tail(1).iloc[0,2] #max water saving
    temp13 = temp1.loc[temp1['Item Name']==Ingr.sort_values('Änderung', ascending=True).tail(1).iloc[0,0]].iloc[0,0] #category of the replacement product
    temp14 = subs.loc[subs['Item Category']==temp13].iloc[0,1] #replacement product with highest water saving
    print('\n{:.0f} liters of water were used to produce the ingredients for your smoothie. You could improve your smoothie the most if you swapped {} for {}. Then you could save {:.0f} liters of water. '
         .format(temp2, 
                 temp11,
                 temp14,
                 temp12))

Which ingredient would you like to add to your smoothie? Potatoes
How much of it would you like to add to your smoothie in kg?3

Would you like to add another ingredient to your smoothie?(Y/N) d

600 liters of water were used to produce the ingredients for your smoothie. You could improve your smoothie the most if you swapped Potatoes for Tomatoes. Then you could save 307 liters of water. 
