In [3]:
import requests
import pandas as pd
import re
import numpy as np

In [4]:
pd.set_option('display.max_rows', 2500)
pd.set_option('display.max_columns', 500)

### Παίρνει τα δεδομένα όλων των διαθέσιμων προϊόντων για κάθε σούπερ μάρκετ

In [47]:
headers = {
    'sec-ch-ua-platform': '"macOS"',
    'Referer': 'https://e-katanalotis.gov.gr/',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
    'sec-ch-ua-mobile': '?0',
}

params = {
    'cid': '1730332800000',
}

response = requests.get(
    'https://warply.s3.amazonaws.com/applications/ed840ad545884deeb6c6b699176797ed/basket-retailers/prices.json',
    params=params,
    headers=headers,
)

In [48]:
data = response.json()

In [49]:
prod = data['context']['MAPP_PRODUCTS']['result']['products']

In [50]:
merchants = data['context']['MAPP_PRODUCTS']['result']['merchants']

In [51]:
# ορίζει συνάρτηση που παίρνει το όνομα του κάθε σούπερ μάρκετ από το αρχείο json. 
# input: merchant unique id (uuid)

In [52]:
def get_merchants(uuid:int):
    merchant = 'not found'
    for item in merchants:
        if item['merchant_uuid']==uuid:
            merchant = item['name']
            break
    return merchant

In [53]:
suppliers = data['context']['MAPP_PRODUCTS']['result']['suppliers']

In [54]:
# ορίζει συνάρτηση που παίρνει το όνομα του κάθε προμηθευτή από το αρχείο json.

In [55]:
def get_supplier(uuid:int):
    supplier = 'not found'
    for item in suppliers:
        if item['id']==uuid:
            supplier = item['name']
            break
    return supplier

In [56]:
categories = data['context']['MAPP_PRODUCTS']['result']['categories']

In [57]:
categories[10]

{'name': 'Είδη μιας χρήσης & Είδη Πάρτι',
 'uuid': 163,
 'sub_categories': [{'name': 'Είδη Συντήρησης & Ψησίματος τροφίμων',
   'uuid': 164,
   'sub_sub_categories': [{'name': 'Λαδόχαρτα & Σακούλες ψησίματος',
     'uuid': 165},
    {'name': 'Αλουμινόχαρτα', 'uuid': 166},
    {'name': 'Σακούλες συντήρησης τροφίμων', 'uuid': 167}]},
  {'name': 'Σακούλες απορριμμάτων',
   'uuid': 168,
   'sub_sub_categories': [{'name': 'Σακούλες απορριμμάτων', 'uuid': 169}]}]}

In [58]:
def category_long(category:dict):
    results=[]
    result={}
    result['name']=category['name']
    result['uuid']=category['uuid']
    results.append(result)
    for item in category['sub_categories']:
        result={}
        result['name']=item['name']
        result['uuid']=item['uuid']
        results.append(result)
        for sub_sub in item['sub_sub_categories']:
            result={}
            result['name']=sub_sub['name']
            result['uuid']=sub_sub['uuid']
            results.append(result)
    return results

This function takes a dictionary as input and processes categories in a three-level hierarchy (main category, sub-categories, and sub-sub-categories).
Creates an empty list results=[] to store all categories
Creates an empty dictionary result={} for each category entry
So it flattens the nested structure into a single list where:

First it adds the main category
Then adds each sub-category
Then adds each sub-sub-category

Each item in the resulting list has just two pieces of information: 
the name and uuid, regardless of what level it came from in the original nested structure.

In [59]:
def find_category(uuid:int):
    category = 'Not found'
    for item in category_results:
        if item['uuid']==uuid:
            category = item['name']
    return category

The function simply:

Takes a UUID as input
Looks through all items in category_results
If it finds an item with a matching UUID, returns that item's name
If no match is found, returns 'Not found'

It's basically a lookup function that converts UUIDs back into their corresponding category names.

In [60]:
category_results=[]
for item in categories:
    category_results = category_results+category_long(item)


The third part is a sequence of commands that uses the first function (category_long()) 
to process multiple categories
Start with empty category_results = []
For first item in categories (Clothing):

Run category_long() on it
Add results to category_results

For second item (Electronics):

Run category_long() on it
Add these new results to existing category_results

The end result is one big flat list containing all categories, sub-categories, and sub-sub-categories from all main categories.
This combined list is what the find_category() function uses to look up names by UUID. 

In [67]:
def category_df(record:dict):
    results = []
    
    
    if record['sub_categories']:
        subcategories = record['sub_categories']
        
        if isinstance(subcategories, list) and len(subcategories)>0:
            for i, item in enumerate(subcategories):
                
                if item['sub_sub_categories'] and isinstance(item['sub_sub_categories'], list) and len(item['sub_sub_categories'])>0:
                    for n, sub_sub_item in enumerate(item['sub_sub_categories']):
                        result = {}
                        result['name'] = record['name']
                        result['uuid'] = record['uuid']
                        
                        result[f"sub_name"]=item['name']
                        result[f"sub_uuid"]=item['uuid']
                        
                        
                        result[f"sub_sub_name"]=sub_sub_item['name']
                        result[f"sub_sub_uuid"]=sub_sub_item['uuid']
                        results.append(result)

    return results

# record(categories[1])

This function creates a flattened structure but maintains the hierarchical relationships between categories.
Here's how it works:

Takes a dictionary (record) as input
Checks if there are sub-categories and they're in a list
For each sub-category, checks if there are sub-sub-categories
Creates a flat structure showing the relationships between main category, sub-category, and sub-sub-category

Key differences from the previous category_long function:

This function maintains the hierarchical relationships in each record
Each result shows the complete path (main → sub → sub-sub)
Perfect for creating a DataFrame as each record has the same structure
Each record shows which sub-category and main category a sub-sub-category belongs to

The function also includes several safety checks:

if record['sub_categories']:  # Checks if sub_categories exists
if isinstance(subcategories, list):  # Verifies it's a list
if len(subcategories)>0:  # Checks if the list isn't empty
if item['sub_sub_categories']:  # Checks if sub_sub_categories exists

In [71]:
results = []
for item in categories:
    results=results+category_df(item)

The process is:

Start with empty results list
For each main category in categories:

Run category_df() on it
Add the results to the main results list


End up with one big list containing all hierarchical relationships

This is particularly useful when you want to:

See all category relationships at once
Create a structured DataFrame
Filter or search across all levels of categories
Maintain the parent-child relationships in your data

Alternative ways to write this could be:

Using extend:
results = []
for item in categories:
    results.extend(category_df(item))

Or using list comprehension:
results = [item for category in categories for item in category_df(category)]


In [72]:
cat = pd.DataFrame(results)

**Overall Logic:**
1. Starting Problem:


You have a complex nested data structure (categories) with three levels:

- Main categories (e.g., Electronics)
- Sub-categories (e.g., Phones)
- Sub-sub-categories (e.g., iPhone)


Each level has both a name and UUID (unique identifier)

2. Different Approaches Created (comparing the two main functions):

   `category_long()`: This function flattens everything into a simple list. Output format: [{'name': X, 'uuid': Y}, ...]. Loses the hierarchical relationships between categories


   `category_df()`: This function preserves relationships while flattening.

   Output format: [{'name': main_category, 'uuid': main_uuid,sub_name': sub_category,'sub_uuid': sub_uuid,'sub_sub_name': sub_sub_category,sub_sub_uuid': sub_sub_uuid}, ...]. Maintains parent-child relationships
   
3. The Process Flow:

        Original Data (categories) →
            Apply category_df() to each category →
                Combine all results →
                    Convert to DataFrame (cat)
        
        [Nested Structure] → [Flattened with Relations] → [Single List] → [DataFrame]

4. Why This Design?:

- Purpose: Transform complex nested data into a format that's: Easy to query, filter, analyze, Maintains relationships


- DataFrame Choice: Perfect for this because: Can easily filter (e.g., cat[cat['name'] == 'Electronics'])


In [73]:
cat

Unnamed: 0,name,uuid,sub_name,sub_uuid,sub_sub_name,sub_sub_uuid
0,Έτοιμα Γεύματα,1,Λαδερά,2,Λαδερά,3
1,Αλλαντικά,4,Αλλαντικά Γαλοπούλας & Κοτόπουλου,5,Αλλαντικά Γαλοπούλας,6
2,Αλλαντικά,4,Αλλαντικά Γαλοπούλας & Κοτόπουλου,5,Αλλαντικά Κοτόπουλου,7
3,Αλλαντικά,4,"Ζαμπόν, Μπέικον & Ωμοπλάτη",8,Ζαμπόν Μπούτι,9
4,Αλλαντικά,4,"Ζαμπόν, Μπέικον & Ωμοπλάτη",8,Προσούτο & Jamon,10
5,Αλλαντικά,4,"Ζαμπόν, Μπέικον & Ωμοπλάτη",8,"Μπέικον, Ωμοπλάτη & Μπριζόλα",11
6,Αλλαντικά,4,Λουκάνικα,12,Χωριάτικα Λουκάνικα,13
7,Αλλαντικά,4,Λουκάνικα,12,Λουκάνικα Κοκτέιλ,14
8,Αλλαντικά,4,Λουκάνικα,12,Λουκάνικα Φρανκφούρτης,15
9,Αλλαντικά,4,Πάριζα & Μορταδέλα,16,Μορταδέλα,17


In [65]:
import json

In [78]:
def product(record:dict):
    results=[]
    fields = json.loads(record['extra_fields'])
    for item in record['prices']:
        result ={}
        result['product_id']=record['barcode']
        result['name'] = record['name']
        result['date'] = fields['date']
        result['unit'] = fields['unit']
        
        if isinstance(record['category'], list):
            categories = ', '.join([find_category(i) for i in record['category']])
            result['category_name'] = categories
        
        result['category_codes'] = record['category']
        
        result['monimi_meiosi'] = record['monimi_meiosi']
        result['promo'] = record['promo']
        
        result['supplier_name'] = get_supplier(record['supplier'])
        result['supplier_code'] = record['supplier']
        
        result['merchant'] = get_merchants(int(item['merchant_uuid']))
        result['price'] = item['price']
        result['price_normalized'] = item['price_normalized']
        
        results.append(result)
    return results

In [79]:
results_products = []
for item in prod:
    results_products= results_products + product(item)

In [80]:
df = pd.DataFrame(results_products)

In [81]:
df.tail()

Unnamed: 0,product_id,name,date,unit,category_name,category_codes,monimi_meiosi,promo,supplier_name,supplier_code,merchant,price,price_normalized
24936,904,ΣΟΛΟΜΟΣ ΦΙΛΕΤΟ Μ/Δ ΝΩΠΟ 4/250G,12-11-2024,ανά κιλ,"Φρέσκο Ψάρι & Θαλασσινά, Ψάρια Ιχθυοκαλλιέργει...","[424, 425, 428]",False,False,Ν/Α,302.0,sklavenitis,22.8,22.8
24937,9066001427506,ALPILAND ESPRESSO 250ML,12-11-2024,ανά κιλ,"Αναψυκτικά, Νερά & Χυμοί, Αναψυκτικά, Σόδες & ...","[21, 22, 23]",True,False,ALPILAND,46.0,mymarket,1.2,4.8
24938,9066001427506,ALPILAND ESPRESSO 250ML,12-11-2024,ανά κιλ,"Αναψυκτικά, Νερά & Χυμοί, Αναψυκτικά, Σόδες & ...","[21, 22, 23]",True,False,ALPILAND,46.0,ab,1.22,4.88
24939,9066001427605,ALPILAND CAPPUCCINO 250ML,12-11-2024,ανά κιλ,"Αναψυκτικά, Νερά & Χυμοί, Αναψυκτικά, Σόδες & ...","[21, 22, 23]",True,False,ALPILAND,46.0,mymarket,1.2,4.8
24940,9066001427605,ALPILAND CAPPUCCINO 250ML,12-11-2024,ανά κιλ,"Αναψυκτικά, Νερά & Χυμοί, Αναψυκτικά, Σόδες & ...","[21, 22, 23]",True,False,ALPILAND,46.0,ab,1.22,4.88


### Όλα τα προϊόντα

In [83]:
#!/usr/bin/env python
# coding: utf-8

# Εισαγωγή των απαραίτητων βιβλιοθηκών
import requests
import pandas as pd
import re
import numpy as np
import json
from datetime import datetime
today = datetime.today().strftime('_%d_%m_%Y')

# Ρυθμίσεις για την εμφάνιση δεδομένων στο pandas
pd.set_option('display.max_rows', 2500)     # Ορισμός μέγιστου αριθμού γραμμών που θα εμφανίζονται
pd.set_option('display.max_columns', 500)   # Ορισμός μέγιστου αριθμού στηλών που θα εμφανίζονται

# Ορισμός των headers για το HTTP αίτημα
headers = {
    'sec-ch-ua-platform': '"macOS"',
    'Referer': 'https://e-katanalotis.gov.gr/',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
    'sec-ch-ua-mobile': '?0',
}

params = {
    'cid': '1730332800000',
}

response = requests.get(
    'https://warply.s3.amazonaws.com/applications/ed840ad545884deeb6c6b699176797ed/basket-retailers/prices.json',
    params=params,
    headers=headers,
)

# Ανάγνωση της απόκρισης ως JSON
data = response.json()

# Ανάκτηση της λίστας των προϊόντων από τα δεδομένα
prod = data['context']['MAPP_PRODUCTS']['result']['products']

# Ανάκτηση της λίστας των εμπόρων από τα δεδομένα
merchants = data['context']['MAPP_PRODUCTS']['result']['merchants']

# Συνάρτηση για την εύρεση του ονόματος του εμπόρου με βάση το uuid
def get_merchants(uuid:int):
    merchant = 'not found'  # Αρχικοποίηση μεταβλητής
    for item in merchants:
        if item['merchant_uuid']==uuid:
            merchant = item['name']
            break
    return merchant

# Ανάκτηση της λίστας των προμηθευτών από τα δεδομένα
suppliers = data['context']['MAPP_PRODUCTS']['result']['suppliers']

# Συνάρτηση για την εύρεση του ονόματος του προμηθευτή με βάση το uuid
def get_supplier(uuid:int):
    supplier = 'not found'  # Αρχικοποίηση μεταβλητής
    for item in suppliers:
        if item['id']==uuid:
            supplier = item['name']
            break
    return supplier

# Ανάκτηση της λίστας των κατηγοριών από τα δεδομένα
categories = data['context']['MAPP_PRODUCTS']['result']['categories']

# Εμφάνιση της 11ης κατηγορίας (για έλεγχο)
categories[10]

# Συνάρτηση για την ανάπτυξη των κατηγοριών και υποκατηγοριών σε λίστα
def category_long(category:dict):
    results=[]
    result={}
    result['name']=category['name']
    result['uuid']=category['uuid']
    results.append(result)
    for item in category['sub_categories']:
        result={}
        result['name']=item['name']
        result['uuid']=item['uuid']
        results.append(result)
        for sub_sub in item['sub_sub_categories']:
            result={}
            result['name']=sub_sub['name']
            result['uuid']=sub_sub['uuid']
            results.append(result)
    return results

# Συνάρτηση για την εύρεση του ονόματος της κατηγορίας με βάση το uuid
def find_category(uuid:int):
    category = 'Not found'
    for item in category_results:
        if item['uuid']==uuid:
            category = item['name']
    return category

# Δημιουργία λίστας με όλα τα αποτελέσματα κατηγοριών
category_results=[]
for item in categories:
    category_results = category_results+category_long(item)

# Συνάρτηση για τη δημιουργία DataFrame από τις κατηγορίες
def category_df(record:dict):
    results = []
    if record['sub_categories']:
        subcategories = record['sub_categories']
        if isinstance(subcategories, list) and len(subcategories)>0:
            for i, item in enumerate(subcategories):
                if item['sub_sub_categories'] and isinstance(item['sub_sub_categories'], list) and len(item['sub_sub_categories'])>0:
                    for n, sub_sub_item in enumerate(item['sub_sub_categories']):
                        result = {}
                        result['name'] = record['name']
                        result['uuid'] = record['uuid']
                        result[f"sub_name"]=item['name']
                        result[f"sub_uuid"]=item['uuid']
                        result[f"sub_sub_name"]=sub_sub_item['name']
                        result[f"sub_sub_uuid"]=sub_sub_item['uuid']
                        results.append(result)
    return results

# Παράδειγμα εκτέλεσης της συνάρτησης (σχολιασμένο)
# record(categories[1])

# Συγκέντρωση όλων των αποτελεσμάτων σε μία λίστα
results = []
for item in categories:
    results=results+category_df(item)

# Δημιουργία DataFrame από τα αποτελέσματα των κατηγοριών
cat = pd.DataFrame(results)



# Συνάρτηση για την επεξεργασία των προϊόντων και τη δημιουργία λίστας λεξικών με τα δεδομένα
def product(record:dict):
    results=[]
    fields = json.loads(record['extra_fields'])  # Μετατροπή των 'extra_fields' από JSON string σε dict
    for item in record['prices']:
        result ={}
        result['product_id']=record['barcode']
        result['name'] = record['name']
        result['date'] = fields['date']
        result['unit'] = fields['unit']
        if isinstance(record['category'], list):
            categories = ', '.join([find_category(i) for i in record['category']])
            result['category_name'] = categories
        result['category_codes'] = record['category']
        result['monimi_meiosi'] = record['monimi_meiosi']
        result['promo'] = record['promo']
        result['supplier_name'] = get_supplier(record['supplier'])
        result['supplier_code'] = record['supplier']
        result['merchant'] = get_merchants(int(item['merchant_uuid']))
        result['price'] = item['price']
        result['price_normalized'] = item['price_normalized']
        results.append(result)
    return results

# Συγκέντρωση όλων των αποτελεσμάτων προϊόντων σε μία λίστα
results_products = []
for item in prod:
    results_products= results_products + product(item)

# Δημιουργία DataFrame από τα αποτελέσματα των προϊόντων
df = pd.DataFrame(results_products)
df.to_csv(f"data{today}.csv", index=False)



## Καλάθι οπωροκηπευτικών

In [84]:
response_3 = requests.get(
    'https://warply.s3.amazonaws.com/applications/ed840ad545884deeb6c6b699176797ed/basket-retailers/freshbasket.json?v=1730710976905',
    headers={
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9,el;q=0.8',
        'Connection': 'keep-alive',
        'Origin': 'https://e-katanalotis.gov.gr',
        'Referer': 'https://e-katanalotis.gov.gr/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'cross-site',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"',
    }
)

In [85]:
data = response_3.json()

In [87]:
retailers = data['retailers'].keys()
retailers

dict_keys(['ab', 'bazaar', 'galaxias', 'egnatia', 'kritikos', 'lidl', 'masoutis', 'marketin', 'mymarket', 'sklavenitis', 'synka', 'xalkiadakis'])

In [90]:
from_date_object = datetime.strptime(data['from'], '%d-%m-%Y')
to_date_object = datetime.strptime(data['to'], '%d-%m-%Y')
filename = f"fresh_basket_{str(from_date_object.day)}_{str(from_date_object.month)}_to_{str(to_date_object.day)}_{str(to_date_object.month)}_{str(to_date_object.year)}.csv"
filename

'fresh_basket_5_11_to_11_11_2024.csv'

In [93]:
fresh_basket=[]
for retail in retailers:
    for i in data['retailers'][retail]['basket']:
        i['retailer'] = retail
        i['from'] = data['from']
        i['to'] = data['to']
        fresh_basket.append(i)
fresh_basket

[{'CATEGORY': '1',
  'BARCODE': '7063330',
  'NAME': 'Σταφύλια',
  'PRICE_MIN': '3.97',
  'PRICE_MAX': '3.97',
  'IMAGE': 'stafilia.jpg',
  'CATEGORY_NAME': 'Φρούτα',
  'UNIT': 'Kg',
  'DISPLAY': '',
  'retailer': 'ab',
  'from': '05-11-2024',
  'to': '11-11-2024'},
 {'CATEGORY': '11',
  'BARCODE': '7063667',
  'NAME': 'Τομάτες',
  'PRICE_MIN': '2.55',
  'PRICE_MAX': '5.43',
  'IMAGE': 'ntomates.jpg',
  'CATEGORY_NAME': 'Λαχανικά',
  'UNIT': 'Kg',
  'DISPLAY': '',
  'retailer': 'ab',
  'from': '05-11-2024',
  'to': '11-11-2024'},
 {'CATEGORY': '12',
  'BARCODE': '7063697',
  'NAME': 'Τοματίνια',
  'PRICE_MIN': '6.98',
  'PRICE_MAX': '12.76',
  'IMAGE': 'ntomatinia.jpg',
  'CATEGORY_NAME': 'Λαχανικά',
  'UNIT': 'Kg',
  'DISPLAY': '',
  'retailer': 'ab',
  'from': '05-11-2024',
  'to': '11-11-2024'},
 {'CATEGORY': '13',
  'BARCODE': '7063652',
  'NAME': 'Αγγούρια',
  'PRICE_MIN': '0.62',
  'PRICE_MAX': '2.82',
  'IMAGE': 'aggouria.jpg',
  'CATEGORY_NAME': 'Λαχανικά',
  'UNIT': 'Kg',
  'D

In [92]:
pd.DataFrame(fresh_basket).to_csv(filename, index=False)

LICENSE                             data_4_11_2024.csv
README.md                           e-katanalotis.ipynb
data_1_11_2024.csv                  [34mekatanalotis[m[m
data_2_11_2024.csv                  fresh_basket_29_10_to_4_11_2024.csv
data_3_11_2024.csv                  script.py


## Καλάθι νοικοκυριού

In [95]:
import requests

response_4 = requests.get(
    'https://warply.s3.eu-west-1.amazonaws.com/applications/ed840ad545884deeb6c6b699176797ed/basket-retailers/basket.json',
    headers={
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9,el;q=0.8',
        'Connection': 'keep-alive',
        'Origin': 'https://e-katanalotis.gov.gr',
        'Referer': 'https://e-katanalotis.gov.gr/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'cross-site',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"',
    }
)

In [96]:
data = response_4.json()
data

{'from': '06-11-2024',
 'to': '12-11-2024',
 'base_url': 'https://warply.s3.eu-west-1.amazonaws.com/applications/ed840ad545884deeb6c6b699176797ed/basket-retailers/',
 'variance': [],
 'retailers': {'ab': {'logo': 'ab.png',
   'name': 'ΑΒ Βασιλόπουλος',
   'basket': [{'CATEGORY': '1',
     'BARCODE': '5201399020074',
     'NAME': '3ΑΛΦΑ ΡΥΖΙ ΚΑΡΟΛΙΝΑ 500GR Ε.Ε.',
     'PRICE': '1.53',
     'PRICE_NORMALIZED': '3.06',
     'IMAGE': '7085666.jpg',
     'PL': '0',
     'CATEGORY_NAME': 'Ρύζι',
     'UNIT': 'Kg',
     'DISPLAY': ''},
    {'CATEGORY': '1',
     'BARCODE': '5202336158133',
     'NAME': 'ΑΒ ΕΠΙΛΟΓΗ ΡΥΖΙ ΚΑΡΟΛΙΝΑ 1KG',
     'PRICE': '1.79',
     'PRICE_NORMALIZED': '1.79',
     'IMAGE': 'default_kalathi.png',
     'PL': '1',
     'CATEGORY_NAME': 'Ρύζι',
     'UNIT': 'Kg',
     'DISPLAY': ''},
    {'CATEGORY': '2',
     'BARCODE': '5202336109876',
     'NAME': 'ΑΒ ΨΩΜΙ ΤΟΣΤ ΣΙΚΑΛΗΣ 340ΓΡ',
     'PRICE': '0.75',
     'PRICE_NORMALIZED': '2.21',
     'IMAGE': '7347418.jpg',
     

In [97]:
retailers = data['retailers'].keys()
retailers

dict_keys(['ab', 'bazaar', 'galaxias', 'efood', 'efresh', 'egnatia', 'kantzas', 'kritikos', 'lidl', 'marketin', 'mymarket', 'pitsias', 'sklavenitis', 'synka', 'xalkiadakis'])

In [99]:
from_date_object = datetime.strptime(data['from'], '%d-%m-%Y')
to_date_object =datetime.strptime(data['to'], '%d-%m-%Y')
filename = f"household_basket_{str(from_date_object.day)}_{str(from_date_object.month)}_to_{str(to_date_object.day)}_{str(to_date_object.month)}_{str(to_date_object.year)}.csv"
filename

'household_basket_6_11_to_12_11_2024.csv'

In [100]:
household_basket = []
for retail in retailers: 
    for i in data['retailers'][retail]['basket']:
        i['retailer'] = retail
        i['from'] = data['from']
        i['to'] = data['to']
        household_basket.append(i)
household_basket
        
        

[{'CATEGORY': '1',
  'BARCODE': '5201399020074',
  'NAME': '3ΑΛΦΑ ΡΥΖΙ ΚΑΡΟΛΙΝΑ 500GR Ε.Ε.',
  'PRICE': '1.53',
  'PRICE_NORMALIZED': '3.06',
  'IMAGE': '7085666.jpg',
  'PL': '0',
  'CATEGORY_NAME': 'Ρύζι',
  'UNIT': 'Kg',
  'DISPLAY': '',
  'retailer': 'ab',
  'from': '06-11-2024',
  'to': '12-11-2024'},
 {'CATEGORY': '1',
  'BARCODE': '5202336158133',
  'NAME': 'ΑΒ ΕΠΙΛΟΓΗ ΡΥΖΙ ΚΑΡΟΛΙΝΑ 1KG',
  'PRICE': '1.79',
  'PRICE_NORMALIZED': '1.79',
  'IMAGE': 'default_kalathi.png',
  'PL': '1',
  'CATEGORY_NAME': 'Ρύζι',
  'UNIT': 'Kg',
  'DISPLAY': '',
  'retailer': 'ab',
  'from': '06-11-2024',
  'to': '12-11-2024'},
 {'CATEGORY': '2',
  'BARCODE': '5202336109876',
  'NAME': 'ΑΒ ΨΩΜΙ ΤΟΣΤ ΣΙΚΑΛΗΣ 340ΓΡ',
  'PRICE': '0.75',
  'PRICE_NORMALIZED': '2.21',
  'IMAGE': '7347418.jpg',
  'PL': '1',
  'CATEGORY_NAME': 'Ψωμί για Tοστ',
  'UNIT': 'Kg',
  'DISPLAY': '',
  'retailer': 'ab',
  'from': '06-11-2024',
  'to': '12-11-2024'},
 {'CATEGORY': '3',
  'BARCODE': '5204647003683',
  'NAME': 'ΑΡΤΟΠ

In [102]:
df_basket = pd.DataFrame(household_basket)
df_basket.head()

Unnamed: 0,CATEGORY,BARCODE,NAME,PRICE,PRICE_NORMALIZED,IMAGE,PL,CATEGORY_NAME,UNIT,DISPLAY,retailer,from,to,null
0,1,5201399020074,3ΑΛΦΑ ΡΥΖΙ ΚΑΡΟΛΙΝΑ 500GR Ε.Ε.,1.53,3.06,7085666.jpg,0,Ρύζι,Kg,,ab,06-11-2024,12-11-2024,
1,1,5202336158133,ΑΒ ΕΠΙΛΟΓΗ ΡΥΖΙ ΚΑΡΟΛΙΝΑ 1KG,1.79,1.79,default_kalathi.png,1,Ρύζι,Kg,,ab,06-11-2024,12-11-2024,
2,2,5202336109876,ΑΒ ΨΩΜΙ ΤΟΣΤ ΣΙΚΑΛΗΣ 340ΓΡ,0.75,2.21,7347418.jpg,1,Ψωμί για Tοστ,Kg,,ab,06-11-2024,12-11-2024,
3,3,5204647003683,ΑΡΤΟΠ/ΣΜΑ ΧΩΡΙΑΤΙΚΟ 350ΓΡ ΚΤΨ,0.68,1.94,7619515.jpg,1,Ψωμί Φραντζόλα,Kg,,ab,06-11-2024,12-11-2024,
4,3,5214000155980,ΑΡΤΟΣ ΤΟΥ ΧΩΡΙΟΥ 350ΓΡ,0.99,2.83,7293943.jpg,0,Ψωμί Φραντζόλα,Kg,,ab,06-11-2024,12-11-2024,


In [101]:
df_basket.to_csv(filename, index=False)