In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
url = "https://github.com/TheEconomist/banana-index-data/releases/download/1.0/bananaindex.csv"
df = pd.read_csv(url)

In [None]:
# Display the first few rows:
print(df.head())

In [None]:
# Display the dataframe info:
print(df.info())

## Data Preparation

a. Set the index of the DataFrame to be the ‘entity’ column.

In [None]:
df = df.set_index('entity')
df.head()

b. Remove the ‘year’, ‘Banana values’, ‘type’, ‘Unnamed: 16’, and ‘Chart?’ columns.

In [None]:
df = df.drop(['year','Banana values','type','Unnamed: 16','Chart?'],axis=1)

c. Display the first few rows of the modified DataFrame.

In [None]:
df.head()

## Exploring Banana Scores

a. For each of the pre-computed banana score columns (kg, calories, and protein), show the 10 highest-scoring food products.

In [None]:
#top_10_kg = df.nlargest(10,'Bananas index (kg)')
#print(top_10_kg)

#top_10_cal = df.nlargest(10, 'Bananas index (1000 kcalories)')
#print(top_10_cal)

#top_10_pro = df.nlargest(10, 'Bananas index (100g protein)')
#print(top_10_pro)

b. Edit the function below so that is returns the top 10 scores for a given column:

In [None]:
def return_top_ten(df, column):
    top_10 = df.nlargest(10,column)
    return(top_10[column])

In [None]:
return_top_ten(df, 'Bananas index (kg)')

c. Use your function to display the results for each of the three Banana index columns.

In [None]:
def return_top_ten(df, column):
    top_10 = df.nlargest(10,column)
    return(top_10[column])

## Common High-Scoring Foods

Identify which foods, if any, appear in the top 10 for all three banana score lists (kg, calories, and protein).

In [None]:
kg_set = set(return_top_ten(df, 'Bananas index (kg)').index)
cal_set = set(return_top_ten(df,'Bananas index (1000 kcalories)').index)
pro_set = set(return_top_ten(df, 'Bananas index (100g protein)').index)

in_all_three = set.intersection(kg_set, cal_set, pro_set)
print(in_all_three)

## Land Use Analysis

a. Create a new column named ‘Bananas index (land use 1000 kcal)’, calculating that food item’s use of land for every 1,000 kcal in comparison to a banana.

In [None]:
bananas_only = df.loc['Bananas','land_use_1000kcal']
print(bananas_only)

df['Bananas index (land use 1000 kcal)'] = df['land_use_1000kcal'] / bananas_only
df.head()

b. Display the 10 foods with the highest land use score.

In [None]:
df.nlargest(10, 'Bananas index (land use 1000 kcal)')

c. Compare this list with the previous top 10 lists. Are there any common foods?

In [None]:
set_1000_use = set(return_top_ten(df, 'Bananas index (land use 1000 kcal)').index)

new_intersect = set.intersection(kg_set, set_1000_use)
print(new_intersect)

## Cheese Analysis

Identify the type of cheese with the highest banana score per 1,000 kcal. How does it compare to other cheeses in the dataset?

In [None]:
# max_banana = df[ (df['Bananas index (land use 1000 kcal)' = df['Bananas index (land use 1000 kcal)'].max() ]

rows_with_cheese = df.filter(like='heese',axis=0)
                  
# print(rows_with_cheese)

max_cheese = rows_with_cheese['Bananas index (land use 1000 kcal)'].max()
print(max_cheese)