In [3]:
import requests
import pandas as pd

# Define the base URL and parameters for Open Food Facts API
base_url = "https://world.openfoodfacts.net/api/v0/search.json"
fields = (
    'product_name,categories,ecoscore_score,countries_tags'
)
page_size = 100  # Smaller page size for proper pagination

# Function to fetch wine data with pagination and specific category filters
def fetch_wine_data(page_size=100):
    wines = []
    page = 1
    while len(wines) < 5000:  # Fetch up to 5000 wines
        params = {
            'fields': fields,
            'page_size': page_size,
            'page': page,
            'countries_tags': 'france',  # Filter by country (e.g., France)
            'categories_tags': 'wine',  # Wine category tag
        }
        
        response = requests.get(base_url, params=params)
        
        print(f"Fetching page {page}...")
        
        if response.status_code == 200:
            data = response.json()
            if 'products' in data and data['products']:
                # Filter out non-wine products (like juices) and collect wines
                wines.extend([product for product in data['products'] if 'Vins' in product['categories']])
                if len(wines) >= 5000:
                    print(f"Reached target of 5000 wines")
                    break  # Stop fetching more wines
                elif len(data['products']) < page_size:
                    print(f"Reached end of results on page {page}")
                    break  # No more pages
                page += 1
            else:
                print(f"No wines found on page {page}")
                break
        else:
            print(f"Failed to fetch data on page {page}")
            break
    
    return wines[:5000]  # Return up to 5000 wines

# Fetch wine data
wines = fetch_wine_data()

# Extract relevant information into a list of dictionaries
wine_info = []
for wine in wines:
    # Clean up and format categories
    categories = wine.get('categories', '').split(',')
    categories = [cat.strip() for cat in categories if cat.strip()]
    
    # Create a dictionary with basic information
    wine_data = {
        'Name': wine.get('product_name', ''),
        'EcoScore': wine.get('ecoscore_score', ''),
        'Countries': wine.get('countries_tags', ''),
    }
    
    # Add each category label as a separate column
    for idx, category in enumerate(categories):
        wine_data[f'Category_{idx + 1}'] = category
    
    wine_info.append(wine_data)

# Create a DataFrame
df = pd.DataFrame(wine_info)

# Display the first few rows of the DataFrame
df

# Save to a CSV file
df.to_csv('wine_data_from_openfoodfacts.csv', index=False)


Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
Fetching page 48...
Fetching page 49...
Fetching page 50...
Fetching 

In [4]:
df['Category_16'].value_counts()

Category_16
Raisins frais    3
Name: count, dtype: int64