# Build Dataset Basics - API Calls to OpenMensa


**Important Links:**

API - Documentation: https://docs.openmensa.org/api/v2/overview/

Call example: https://openmensa.org/api/v2/canteens/1287/days/2024-03-08/meals

Example Canteen Page: https://openmensa.org/c/1216/2024-03-01

## Imports - Don't forget to run this!!!


In [None]:
import requests
from datetime import date, timedelta
import time
import json
from bs4 import BeautifulSoup
import os
import re
from datetime import date, timedelta, datetime
import time

## Get Data from OpenMensa

### Get all Canteens

First we need to get a dataset of all canteens that OpenMensa has

In [14]:
def get_all_canteens():
    api_url = "https://openmensa.org/api/v2/canteens"
    try:
        all_canteens = []
        page = 1
        has_canteens = True

        while has_canteens:  # Loop until there are no more canteens to find
            params = {'page': page}
            response = requests.get(api_url, params=params)
            response.raise_for_status()  # stop loop if the request fails
            canteens = response.json()

            if not canteens:  # exit the loop when no more canteens are found
                has_canteens = False
            else:
                all_canteens.extend(canteens)
                page += 1

        return all_canteens
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return []

In [15]:
file_name = 'all_canteens.json'
all_canteens = get_all_canteens()

with open(file_name, 'w') as file:
    json.dump(all_canteens, file, indent=4)

print(f"All canteens are here: {file_name}.")

All canteens are here: all_canteens.json.


In [16]:
len(all_canteens)

1211

### Filter out German Canteens

Because OpenMensa also has a lot of canteens from europe/switzerland and not only germany we need to filter them out. For this we will scrape wikipedia to create a dictionary of all german cities.

#### Wikipedia

https://moduliertersingvogel.de/2017/09/03/german-cities-list/

In [None]:
countries={
      'BY':'Bayern',
      'BW':'Baden-Württemberg',
      'NW':'Nordrhein-Westfalen',
      'HE':'Hessen',
      'SN':'Sachsen',
      'NI':'Niedersachsen',
      'RP':'Rheinland-Pfalz',
      'TH':'Thüringen',
      'BB':'Brandenburg',
      'ST':'Sachsen-Anhalt',
      'MV':'Mecklenburg-Vorpommern',
      'SH':'Schleswig-Holstein',
      'SL':'Saarland',
      'HB':'Bremen',
      'BE':'Berlin',
      'HH':'Hamburg'
  }

In [None]:
def retrieveGermanList():
    r = requests.get('https://de.wikipedia.org/wiki/Liste_der_St%C3%A4dte_in_Deutschland')
    soup = BeautifulSoup(r.content, "html5lib")
    cities={}
    tables=soup.find_all('table')
    for t in tables:
        lis=t.find_all('dd')
        for l in lis:
            countryShort=None
            additional=l.contents[1].split('(')[1].split(')')[0].strip()
            if ',' in additional:
                countryShort=additional.split(',')[0]
            else:
                countryShort=additional
            cities[l.find('a').contents[0]]=countries[countryShort]

    return cities

In [None]:
file_path = 'german_cities.json'

with open(file_path, 'w') as file:
    json.dump(retrieveGermanList(), file, ensure_ascii=False, indent=4)

file_path

'german_cities.json'

#### Filter out Canteens for german_canteens set

In [None]:
file_name = 'german_canteens.json'

In [None]:
with open('german_cities.json', 'r', encoding='utf-8') as file:
    german_cities = json.load(file)

with open('all_canteens.json', 'r', encoding='utf-8') as file:
    all_canteens = json.load(file)

def normalize_city_name(city_name):
    return city_name.split('/')[0].strip().lower()

# Invert the german_cities dictionary for reverse lookup (normalized city name -> original city name)
german_cities_lookup = {normalize_city_name(city): city for city in german_cities}

german_canteens = []
for canteen in all_canteens:
    normalized_city_name = normalize_city_name(canteen['city'])
    if normalized_city_name in german_cities_lookup:
        # Update the city field to match the exact city name from german_cities - IMPORTANT for later
        canteen['city'] = german_cities_lookup[normalized_city_name]
        german_canteens.append(canteen)

with open(file_name, 'w', encoding='utf-8') as file:
    json.dump(german_canteens, file, ensure_ascii=False, indent=4)

print(f"German canteens saved to {file_name}")
print(f"Number of German canteens found: {len(german_canteens)}")


German canteens saved to german_canteens.json
Number of German canteens found: 688


#### Add State for Research Questions (Forgot to implement it before)

In [None]:
canteens_file_path = 'german_canteens.json'
cities_file_path = 'german_cities.json'
output_file_path = 'updated_german_canteens.json'

In [None]:
def add_state_to_canteens(canteens_file_path, cities_file_path, output_file_path):

    with open(cities_file_path, 'r', encoding='utf-8') as file:
        german_cities = json.load(file)
    with open(canteens_file_path, 'r', encoding='utf-8') as file:
        german_canteens = json.load(file)

    # Map each city to its corresponding state using the german_cities
    for canteen in german_canteens:
        canteen_city = canteen['city']
        canteen['state'] = german_cities.get(canteen_city, "Unknown")

    with open(output_file_path, 'w', encoding='utf-8') as file:
        json.dump(german_canteens, file, ensure_ascii=False, indent=4)

In [None]:
add_state_to_canteens(canteens_file_path, cities_file_path, output_file_path)

In [None]:
def add_state_id_to_canteens(updated_canteens_file_path, output_file_path):
    state_abbreviations = {
        "Baden-Württemberg": "BW",
        "Bayern": "BY",
        "Berlin": "BE",
        "Brandenburg": "BB",
        "Bremen": "HB",
        "Hamburg": "HH",
        "Hessen": "HE",
        "Mecklenburg-Vorpommern": "MV",
        "Niedersachsen": "NI",
        "Nordrhein-Westfalen": "NW",
        "Rheinland-Pfalz": "RP",
        "Saarland": "SL",
        "Sachsen": "SN",
        "Sachsen-Anhalt": "ST",
        "Schleswig-Holstein": "SH",
        "Thüringen": "TH"
    }
    with open(updated_canteens_file_path, 'r', encoding='utf-8') as file:
        german_canteens = json.load(file)
    for canteen in german_canteens:
        state_name = canteen.get('state', "Unknown")
         # Map state name to abbreviation
        canteen['state-id'] = state_abbreviations.get(state_name, "Unknown")
    with open(output_file_path, 'w', encoding='utf-8') as file:
        json.dump(german_canteens, file, ensure_ascii=False, indent=4)

In [None]:
updated_canteens_file_path = 'updated_german_canteens.json'
output_file_path = 'further_updated_german_canteens.json'

add_state_id_to_canteens(updated_canteens_file_path, output_file_path)

### Fetching the meals etc from OpenMensa using the IDs I filtered out

To fetch the data I used several instances on Colab and 5 automations running on Raspberry Pi's that are build in a similar fashin to his example.

In [None]:
def fetch_meals_for_canteen(canteen_id, start_date, end_date, request_timeout=0.5):
    meals_data = []
    current_date = start_date
    while current_date <= end_date:
        url = f"https://openmensa.org/api/v2/canteens/{canteen_id}/days/{current_date.strftime('%Y-%m-%d')}/meals"
        response = requests.get(url)
        if response.status_code == 200:
            day_meals = response.json()
            for meal in day_meals:
                meal_with_date = meal
                meal_with_date['date'] = current_date.strftime('%Y-%m-%d')
                meals_data.append(meal_with_date)
        time.sleep(request_timeout)
        current_date += timedelta(days=1)
    return meals_data

In [None]:
def append_meals_to_file(data, output_file):
    try:
        with open(output_file, 'r+') as file:
            file_data = json.load(file)
            file_data.append(data)
            file.seek(0)
            json.dump(file_data, file, indent=4)
    except FileNotFoundError:
        with open(output_file, 'w') as file:
            json.dump([data], file, indent=4)

In [None]:
start_id = 1  # Starting ID for fetching meals
end_id = 100  # End ID were it should stop if needed manually - hardcoded
# end_id = start_id + 39 # For batches
input_file = 'german_canteens.json'
output_file = f'meals_all_canteens_{start_id}_{end_id}.json'
start_date = date(2023, 1, 1)
end_date = date(2024, 3, 1)

In [None]:
print(f"Canteens {start_id} until {end_id}")

with open(input_file, 'r') as file:
    german_canteens = json.load(file)

for canteen in german_canteens:
    canteen_id = canteen['id']
    if not (start_id <= canteen_id <= end_id):
        continue
    canteen_meals = fetch_meals_for_canteen(canteen_id, start_date, end_date)
    meal_data = {
        "canteen_id": canteen_id,
        "meals": canteen_meals
    }
    append_meals_to_file(meal_data, output_file) # Very Important if it crashed while running
    print(f"Finished getting all meals for canteen ID: {canteen_id}")

Canteens 1 until 100


KeyboardInterrupt: 

### Merging all Data Set batches that were created

Important all batches have to be in a folder (here called AllMealsData)

In [None]:
def merge_json_files(directory_path):
    json_files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
    sorted_files = sorted(json_files, key=lambda x: int(re.search(r'(\d+)_', x).group(1)))

    all_data = []
    for filename in sorted_files:
        with open(os.path.join(directory_path, filename), 'r', encoding='utf-8') as file:
            data = json.load(file)
            all_data.extend(data)

    all_data_sorted = sorted(all_data, key=lambda x: x.get('canteen_id', 0))

    with open(os.path.join(directory_path, 'full_all_canteens.json'), 'w', encoding='utf-8') as outfile:
        json.dump(all_data_sorted, outfile, ensure_ascii=False, indent=4)

In [None]:
directory_path = '/AllMealsData/'
merge_json_files(directory_path)

print('Merging completed - full_all_canteens.json')

In [None]:
# See if the lengths is still correct

canteen_ids = set()
with open("/AllMealsData/full_all_canteens.json", 'r') as file:
    data = json.load(file)
    for item in data:
        canteen_ids.add(item['canteen_id'])

unique_canteen_id_count = len(canteen_ids)
print(unique_canteen_id_count)

### Helper

Here are not nessesary parts that are helpful for different overviews

#### Filtering Canteens by City 1

In [None]:
def filter_canteens_by_city(file_path, city):
    try:
        with open(file_path, 'r') as file:
            canteens = json.load(file)

        # Filter canteens that are in the specified city
        canteens_in_city = [canteen for canteen in canteens if canteen.get('city') == city]
        return canteens_in_city
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return []

In [None]:
file_name = 'all_canteens.json'
kiel_canteens = filter_canteens_by_city(file_name, "Kiel")
for canteen in kiel_canteens:
    print(canteen)

{'id': 1216, 'name': 'Mensa I', 'city': 'Kiel', 'address': 'Westring 385, 24118 Kiel', 'coordinates': [54.337464121688505, 10.122903412237703]}
{'id': 1218, 'name': 'Mensa II', 'city': 'Kiel', 'address': 'Leibnizstraße 14, 24118 Kiel', 'coordinates': [54.346414080501006, 10.113333431595441]}
{'id': 1219, 'name': 'Schwentine Mensa', 'city': 'Kiel', 'address': 'Grenzstraße 14, 24149 Kiel', 'coordinates': [54.32977105, 10.181043219375]}
{'id': 1758, 'name': 'Cafeteria & American Diner', 'city': 'Kiel', 'address': 'Sokratesplatz 6, 24149 Kiel', 'coordinates': [54.3323777, 10.1812416]}
{'id': 1759, 'name': 'Mensa Gaarden', 'city': 'Kiel', 'address': 'Kaiserstraße 2, 24143 Kiel', 'coordinates': [54.3155954, 10.1521691]}
{'id': 1760, 'name': 'Mensa Kesselhaus', 'city': 'Kiel', 'address': 'Legienstraße 35, 24103 Kiel', 'coordinates': [54.32782205, 10.129155763933195]}
{'id': 1838, 'name': 'Cafeteria Dockside', 'city': 'Kiel', 'address': 'Kaiserstraße 2, 24143 Kiel', 'coordinates': [54.31532790

#### Filtering Canteens by Cities 2

In [None]:
def filter_canteens_by_city(file_path, city):
    try:
        with open(file_path, 'r') as file:
            canteens = json.load(file)
        canteens_in_city = [canteen for canteen in canteens if canteen.get('city') == city]
        return canteens_in_city
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return []

In [None]:
file_name = 'german_canteens.json'
cities = ["Kiel", "Lübeck", "Flensburg", "Heide"]

for city in cities:
    city_canteens = filter_canteens_by_city(file_name, city)
    print(f"Canteens in {city}:")
    for canteen in city_canteens:
        print(canteen)

Canteens in Kiel:
{'id': 1216, 'name': 'Mensa I', 'city': 'Kiel', 'address': 'Westring 385, 24118 Kiel', 'coordinates': [54.337464121688505, 10.122903412237703]}
{'id': 1218, 'name': 'Mensa II', 'city': 'Kiel', 'address': 'Leibnizstraße 14, 24118 Kiel', 'coordinates': [54.346414080501006, 10.113333431595441]}
{'id': 1219, 'name': 'Schwentine Mensa', 'city': 'Kiel', 'address': 'Grenzstraße 14, 24149 Kiel', 'coordinates': [54.32977105, 10.181043219375]}
{'id': 1758, 'name': 'Cafeteria & American Diner', 'city': 'Kiel', 'address': 'Sokratesplatz 6, 24149 Kiel', 'coordinates': [54.3323777, 10.1812416]}
{'id': 1759, 'name': 'Mensa Gaarden', 'city': 'Kiel', 'address': 'Kaiserstraße 2, 24143 Kiel', 'coordinates': [54.3155954, 10.1521691]}
{'id': 1760, 'name': 'Mensa Kesselhaus', 'city': 'Kiel', 'address': 'Legienstraße 35, 24103 Kiel', 'coordinates': [54.32782205, 10.129155763933195]}
{'id': 1838, 'name': 'Cafeteria Dockside', 'city': 'Kiel', 'address': 'Kaiserstraße 2, 24143 Kiel', 'coordina

#### Forgotten Canteens

Only imporant because we fetched the german IDs wrong before (forgot the cities with umlauts in them)

In [None]:
with open('canteens_updated.json', 'r', encoding='utf-8') as file:
    canteens_updated_data = json.load(file)

with open('german_canteens.json', 'r', encoding='utf-8') as file: # Load Old Set
    german_canteens_data = json.load(file)

updated_ids = {canteen['id'] for canteen in canteens_updated_data}
german_canteens_ids = {canteen['id'] for canteen in german_canteens_data}

forgotten_ids = updated_ids.symmetric_difference(german_canteens_ids)

forgotten_canteens = [canteen for canteen in canteens_updated_data if canteen['id'] in forgotten_ids]
with open('forgotten_canteens.json', 'w', encoding='utf-8') as file:
    json.dump(forgotten_canteens, file, ensure_ascii=False, indent=4)

In [None]:
len(forgotten_canteens)