fetching from wikidata first

In [1]:
import requests
from typing import List, Dict
import time

def fetch_cheeses() -> List[Dict]:
    """
    Queries Wikidata for all types of cheese using SPARQL.
    Returns a list of dictionaries containing cheese information.
    """
    endpoint_url = "https://query.wikidata.org/sparql"
    
    # SPARQL query to get cheese items with their labels and descriptions
    query = """
    SELECT DISTINCT ?cheese ?cheeseLabel ?cheeseDescription ?countryLabel
    WHERE {
      ?cheese wdt:P31/wdt:P279* wd:Q10943 .  # Instance of cheese or subclass of cheese
      
      OPTIONAL { ?cheese wdt:P495 ?country }  # Country of origin if available
      
      SERVICE wikibase:label { 
        bd:serviceParam wikibase:language "en" .
        ?cheese rdfs:label ?cheeseLabel .
        ?cheese schema:description ?cheeseDescription .
        ?country rdfs:label ?countryLabel .
      }
    }
    """
    
    headers = {
        'User-Agent': 'CheeseCatalog/1.0 (https://github.com/yourusername; youremail@example.com)'
    }
    
    try:
        response = requests.get(
            endpoint_url,
            params={'query': query, 'format': 'json'},
            headers=headers
        )
        response.raise_for_status()
        
        results = response.json()['results']['bindings']
        
        cheeses = []
        for result in results:
            cheese_info = {
                'name': result['cheeseLabel']['value'],
                'description': result.get('cheeseDescription', {}).get('value', 'No description available'),
                'country': result.get('countryLabel', {}).get('value', 'Unknown origin'),
                'wikidata_id': result['cheese']['value'].split('/')[-1]
            }
            cheeses.append(cheese_info)
            
        return cheeses
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return []

# Fetch the cheeses
all_cheeses = fetch_cheeses()

# Print some sample results
print(f"Found {len(all_cheeses)} different types of cheese!")
for cheese in all_cheeses[:5]:  # Show first 5 cheeses
    print(f"\nName: {cheese['name']}")
    print(f"Description: {cheese['description']}")
    print(f"Country: {cheese['country']}")
    print(f"Wikidata ID: {cheese['wikidata_id']}")




Found 232 different types of cheese!

Name: Brânză de vaci
Description: Romanian cheese
Country: Moldova
Wikidata ID: Q108112946

Name: testun
Description: cheese from Piedmont
Country: Italy
Wikidata ID: Q97172601

Name: Camoscio d'oro
Description: No description available
Country: Unknown origin
Wikidata ID: Q79087543

Name: Chubut cheese
Description: No description available
Country: Unknown origin
Wikidata ID: Q6094556

Name: Tronchon
Description: type of soft cheese from Spain
Country: Spain
Wikidata ID: Q6094605


changed my mind these are just from cheese.com

In [18]:
import json
cheese_json = None
with open('../data/cheeses.json') as f:
    cheese_json = json.load(f)

In [19]:
cheese_json

[{'href': 'https://www.cheese.com/media/img/cheese-thumbs/-suggestion/Old_Gouda_-_48_Month.webp',
  'name': 'Gouda'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/-suggestion/Briefermier.webp',
  'name': 'Brie'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/10-American-Cheese-shutterstock_1610208106.webp',
  'name': 'American Cheese'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/Pecorino_romano_cheese.webp',
  'name': 'Pecorino Romano'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/cheddar_large.webp',
  'name': 'Cheddar'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/-suggestion/manchego_1280x800_1.webp',
  'name': 'Manchego'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/-suggestion/170306-DS-Premium-Cheese-Smoked_381x242_acf_cropped.webp',
  'name': 'Smoked Gouda'},
 {'href': 'https://www.cheese.com/media/img/cheese-thumbs/-suggestion/CamembertStLoup_800x.webp',
  'name': 'Camembert'},
 {'href': 'https://

In [36]:
import json
import psycopg2
from psycopg2.extras import execute_values


db_config = {
    'dbname': 'hon',
    'user': 'hon',
    'password': 't6tsMFzdqDNbAPpiEEDt',
    'host': 'chmod.site',
    'port': '5432',
}

def seed_db(data):
    insert_query = """
        INSERT INTO images (name, href) 
        VALUES %s;
    """
    # prepare data for insertion
    values = [(item['name'], item['href']) for item in data]

    try:
        # connect to db
        with psycopg2.connect(**db_config) as conn:
            with conn.cursor() as cur:
                # execute batch insert
                execute_values(cur, insert_query, values)
                print(f"Seeded {len(values)} rows into the database.")
    except Exception as e:
        print(f"Error seeding database: {e}")

# call the function
seed_db(cheese_json)

Seeded 100 rows into the database.
