In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

# Study data files
calories_money_path = "Resources/coefficient-of-variation-cv-in-per-capita-caloric-intake.csv"
meat_kilos_path = "Resources/daily-meat-consumption-per-person.csv"
calories_supply_path = "Resources/daily-per-capita-caloric-supply.csv"
fat_money_path = "Resources/daily-per-capita-fat-supply-vs-gdp-per-capita.csv"
fat_path = "Resources/daily-per-capita-fat-supply.csv"
protein_money_path = "Resources/daily-per-capita-protein-supply-vs-gdp-per-capita.csv"
protein_path = "Resources/daily-per-capita-protein-supply.csv"
calories_money_path = "Resources/daily-per-capita-supply-of-calories-vs-gdp-per-capita.csv"
protein_source_path = "Resources/daily-protein-supply-from-animal-and-plant-based-foods.csv"
calories_path = "Resources/global-food.csv"
min_calories_path = "Resources/minimum-requirement-calories.csv"
insecurity_path = "Resources/prevalence-of-undernourishment-vs-daily-supply-of-calories.csv"
# Read the data
calories_data = pd.read_csv(calories_path)
min_calories = pd.read_csv(min_calories_path)
fat_data = pd.read_csv(fat_path)
protein_source = pd.read_csv(protein_source_path)
food_insecurity = pd.read_csv(insecurity_path)
meat_kilos= pd.read_csv(meat_kilos_path)

#remove 'code' column to facilitate merge

food_insecurity = food_insecurity.drop(columns=['Code'])
protein_source = protein_source.drop(columns=['Code'])
fat_data = fat_data.drop(columns=['Code'])
min_calories = min_calories.drop(columns=['Code'])
meat_kilos = meat_kilos.drop(columns=['Code'])

# Assuming 'country' is the common column, remame to merge


food_insecurity = food_insecurity.rename(columns={'Entity': 'Country'})
protein_source = protein_source.rename(columns={'Entity': 'Country'})
fat_data = fat_data.rename(columns={'Entity': 'Country'})
min_calories = min_calories.rename(columns={'Entity': 'Country'})
calories_data = calories_data.rename(columns={'Entity': 'Country'})
meat_kilos = meat_kilos.rename(columns={'Entity': 'Country'})

# Combine the data into a single DataFrame
food_combined1 = pd.merge(food_insecurity, protein_source, on=['Country', 'Year'], how= 'left', suffixes=('_fi', '_ps'))
food_combined2 = pd.merge(min_calories, calories_data, on=['Country', 'Year'], how= 'left', suffixes=('_mi', '_cd'))
food_combined3 = pd.merge(fat_data, food_combined1, on=['Country', 'Year'], how='left', suffixes=('_fd', ''))
food_combined4 = pd.merge(food_combined2, meat_kilos, on=['Country', 'Year'], how= 'left')
food_combined = pd.merge(food_combined3, food_combined4, on=['Country', 'Year'], how='left')
# Display the data table for preview
food_combined

In [None]:
#apply filters
food_combined_filter1 = [
    'Country', 
    'Year',  
    '2.1.1 Prevalence of undernourishment | 00000000024000 || Value | 006121 || Percent', 
    'Minimum dietary energy requirement  (kcal/cap/day) | 00021056 || Value | 006128 || kcal/capita/day', 
    'Food supply (kcal per capita per day)', 
    'Total | 00002901 || Food available for consumption | 0684pc || grams of fat per day per capita', 
    'Food supply (Protein g per capita per day)', 
    'Animal Products | 00002941 || Food available for consumption | 0674pc || grams of protein per day per capita', 
    'Vegetal Products | 00002903 || Food available for consumption | 0674pc || grams of protein per day per capita', 
    'Meat, total | 00002943 || Food available for consumption | 0645pc || kilograms per year per capita'
]
food_filtered1= food_combined[food_combined_filter1]
year_filter= food_filtered1[(food_filtered1['Year'] >= 2015) & (food_filtered1['Year'] <= 2020)]
year_filter
euandus_nations= ['United States', 'Russia', 'Germany', 'United Kingdom', 'France', 'Italy', 'Spain', 'Poland', 'Ukraine', 'Romania', 'Netherlands', 'Belgium', 'Sweden', 'Czech Republic', 'Czechia', 'Czechoslovakia', 'Greece', 'Portugal', 'Hungary', 'Belarus', 'Austria', 'Switzerland', 'Serbia', 'Bulgaria', 'Denmark', 'Slovakia', 'Finland', 'Croatia', 'Moldova', 'Norway', 'Ireland', 'Bosnia and Herzegovina', 'Albania', 'Lithuania', 'Slovenia', 'North Macedonia', 'Latvia', 'Estonia', 'Luxembourg', 'Montenegro', 'Malta', 'Iceland', 'Andorra', 'Lichtenstein', 'Monaco', 'San Marino', 'Holy See']
country_filter= year_filter[year_filter['Country'].isin(euandus_nations)]
country_filter

In [None]:
#rename for readability
country_filter = country_filter.rename(columns={
    'Total | 00002901 || Food available for consumption | 0684pc || grams of fat per day per capita': 'Fat (g) per Capita per day',
    '2.1.1 Prevalence of undernourishment | 00000000024000 || Value | 006121 || Percent': 'Undernourishment Percentage',
    'Total | 00002901 || Food available for consumption | 0664pc || kilocalories per day per capita': 'kCal per Capita per Day',
    'Animal Products | 00002941 || Food available for consumption | 0674pc || grams of protein per day per capita': 'Animal Protein(g) supply per Capita per day',
    'Meat, total | 00002943 || Food available for consumption | 0645pc || kilograms per year per capita': 'Meat (kg) supply per Capita per Year',
    'Minimum dietary energy requirement  (kcal/cap/day) | 00021056 || Value | 006128 || kcal/capita/day': 'Minimum kCal per day per capita',
    'Vegetal Products | 00002903 || Food available for consumption | 0674pc || grams of protein per day per capita':'Plant Protein(g) supply per Capita per day'
})
country_filter

In [None]:
# Step 1: Group by 'Country'
grouped_nations = country_filter.groupby('Country')

# Step 2 & 3: Calculate the differences and format the year range
def calculate_difference(group):
    # Find the max and min years
    max_year = group['Year'].max()
    min_year = group['Year'].min()
    
    # Create a Series for the year range
    year_range = pd.Series(f"{min_year}-{max_year}", index=['Years'])

    # Calculate the difference for other columns
    max_year_row = group[group['Year'] == max_year]
    min_year_row = group[group['Year'] == min_year]

    # We exclude 'Year' from difference calculation
    difference = max_year_row.drop(columns=['Year']).iloc[0] - min_year_row.drop(columns=['Year']).iloc[0]

    # Combine year range and differences
    return pd.concat([year_range, difference])

# Applying the function to each group
change_df = grouped_nations.apply(calculate_difference)

# Step 4: Rename columns
for col in change_df.columns:
    if col != 'Years':
        change_df.rename(columns={col: 'Change in ' + col}, inplace=True)

# Resetting index
change_df.reset_index(inplace=True)

# The result is in change_df
change_df

In [None]:
#pie chart generator for dietary macronutrient makeup visualization
# Add new columns
country_filter['Fat kCal per day'] = country_filter['Fat (g) per Capita per day'] * 9
country_filter['Animal Protein kCal per day'] = country_filter['Animal Protein(g) supply per Capita per day'] * 4
country_filter['Plant Protein kCal per day'] = country_filter['Plant Protein(g) supply per Capita per day'] * 4

pie_country = 'Sweden'

# Filter for the specific country and year
# Replace 'YourCountry' with the country of your choice
selected_country_data = country_filter[(country_filter['Country'] == pie_country) & (country_filter['Year'] == 2020)]

# If there are multiple rows for the country and year, you might need to aggregate them
# For simplicity, assuming there's only one row per country per year
if not selected_country_data.empty:
    selected_row = selected_country_data.iloc[0]

    # Prepare data for the pie chart
    values = [
        selected_row['Fat kCal per day'],
        selected_row['Animal Protein kCal per day'],
        selected_row['Plant Protein kCal per day'],
        selected_row['Food supply (kcal per capita per day)'] - (selected_row['Fat kCal per day'] + selected_row['Animal Protein kCal per day'] + selected_row['Plant Protein kCal per day'])
    ]
    labels = ['Fat kCal per day', 'Animal Protein kCal per day', 'Plant Protein kCal per day', 'Other kCal (presumed carbs) per day']

    # Plotting the pie chart
    colors = ['gold', 'lightcoral', 'lightskyblue', 'yellowgreen']
    plt.figure(figsize=(10, 7))
    plt.pie(values, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
    plt.title(f'Caloric Composition in {selected_row["Country"]} for 2020')
    plt.show()
else:
    print(f"No data available for the selected country and year.")
