# NOTE: Before executing this ipynb, make sure you have done the following things:
### 1. Change the table name of green-db to green-db-backup (from your favorite sql client)

```sql
ALTER TABLE "green-db" RENAME TO "green-db-backup";
```


### 2. Create new Scraping Tables 

```sql
-- create new scraping tables:
--FR
CREATE TABLE "asos_FR" AS SELECT "id", "timestamp", "merchant", 'FR' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "asos";
--CREATE TABLE "hm_FR" AS SELECT "id", "timestamp", "merchant", 'FR' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "hm";
CREATE TABLE "zalando_FR" AS SELECT "id", "timestamp", 'zalando' AS merchant, 'FR' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "zalando_fr";

--DE
CREATE TABLE "otto_DE" AS SELECT "id", "timestamp", "merchant", 'DE' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "otto";
CREATE TABLE "amazon_DE" AS SELECT "id", "timestamp", "merchant", 'DE' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "amazon";
CREATE TABLE "zalando_DE" AS SELECT "id", "timestamp", "merchant", 'DE' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "zalando";

--GB/UK
CREATE TABLE "zalando_GB" AS SELECT "id", "timestamp", 'zalando' AS merchant, 'GB' AS country_code, "category", "url", "html", "page_type", "meta_information" FROM "zalando_uk";
```

### 3. Redeploy workers

```bash
make workers-test-deploy
```

### 4. Add postgres password in the following cell

In [1]:
import os
os.environ["POSTGRES_GREEN_DB_USER"] = "green-db"
os.environ["POSTGRES_GREEN_DB_PASSWORD"] = "" # TODO
os.environ["POSTGRES_GREEN_DB_HOST"] = "localhost"
os.environ["POSTGRES_GREEN_DB_PORT"] = "5432"

import pandas as pd
#from database.connection import GreenDB

In [2]:
from database.tables import GreenDBBaseTable, __TableMixin
from sqlalchemy import ARRAY, BIGINT, INTEGER, JSON, NUMERIC, TEXT, TIMESTAMP, VARCHAR, Column

from datetime import datetime
from enum import Enum
from typing import List, Optional

from pydantic import BaseModel, conint, conlist

from core.sustainability_labels import create_CertificateType

CertificateType = create_CertificateType()


class GreenDBTableOld(GreenDBBaseTable, __TableMixin):
    """
    Defines the GreenDB columns.

    Args:
        GreenDBBaseTable ([type]): `sqlalchemy` base class for the GreenDB database
        __TableMixin ([type]): Mixin that implements some convenience methods
    """

    __tablename__ = "green-db-backup" #TODO

    id = Column(INTEGER, nullable=False, autoincrement=True, primary_key=True)
    timestamp = Column(TIMESTAMP, nullable=False)
    merchant = Column(TEXT, nullable=False)
    category = Column(TEXT, nullable=False)
    url = Column(TEXT, nullable=False)
    name = Column(TEXT, nullable=False)
    description = Column(TEXT, nullable=False)
    brand = Column(TEXT, nullable=False)
    sustainability_labels = Column(ARRAY(TEXT), nullable=False)  # TODO foreign keys to labels
    price = Column(NUMERIC, nullable=False)
    currency = Column(TEXT, nullable=False)
    image_urls = Column(ARRAY(TEXT), nullable=False)

    color = Column(TEXT, nullable=True)
    size = Column(TEXT, nullable=True)

    gtin = Column(BIGINT, nullable=True)
    asin = Column(TEXT, nullable=True)
    
class CurrencyType(str, Enum):
    EUR = "EUR"
    GBP = "GBP"
    
class Productold(BaseModel):
    timestamp: datetime
    url: str
    merchant: str
    category: str
    name: str
    description: str
    brand: str
    sustainability_labels: conlist(CertificateType, min_items=1)  # type: ignore
    price: float
    currency: CurrencyType
    image_urls: List[str]

    color: Optional[str]
    size: Optional[str]

    # int, source: https://support.google.com/merchants/answer/6219078?hl=en
    gtin: Optional[int]

    # str because alpha numeric
    # source: https://en.wikipedia.org/wiki/Amazon_Standard_Identification_Number
    asin: Optional[str]

    class Config:
        orm_mode = True
        use_enum_values = True

In [3]:
from database.connection import Connection
from core.constants import DATABASE_NAME_GREEN_DB
from database.tables import SustainabilityLabelsTable
from typing import Iterator
from core.domain import Product

class GreenDBold(Connection):
    def __init__(self) -> None:
        """
        `Connection` for the GreenDB.
        Automatically pre-populates the sustainability labels table.
        """
        super().__init__(GreenDBTableOld, DATABASE_NAME_GREEN_DB)

        from core.sustainability_labels.bootstrap_database import sustainability_labels

        with self._session_factory() as db_session:
            # NOTE: this is slowly..
            # if we have many more labels to bootstrap, we should refactor it.
            for label in sustainability_labels:
                if (  # If label does not exist
                    not db_session.query(SustainabilityLabelsTable.id)
                    .filter(SustainabilityLabelsTable.id == label.id)
                    .first()
                ):
                    db_session.add(SustainabilityLabelsTable(**label.dict()))

            db_session.commit()
    
    def get_all_products(self, batch_size: int = 1000) -> Iterator[Product]:
        """
        Fetch all `Products`.

        Args:            
            batch_size (int, optional): How many rows to fetch simultaneously. Defaults to 1000.

        Yields:
            Iterator[ScrapedPage]: Iterator over the domain object representations
        """
        with self._session_factory() as db_session:
            query = db_session.query(self._database_class)
            return (Productold.from_orm(row) for row in query.all())

In [4]:
greenDBold = GreenDBold()

## new table connection

In [5]:
from database.connection import GreenDB
from core.domain import Product

greenDB = GreenDB()

In [6]:
def old_product_to_new(product: Productold) -> Product:

    merchant_to_new_merchant = {
        "asos": "asos",
        "amazon": "amazon",
        "zalando": "zalando",
        "zalando_fr": "zalando",
        "zalando_uk": "zalando",
        "otto": "otto",
        "hm": "hm"
    }

    merchant_to_country_code = {
        "asos": "FR",
        "amazon": "DE",
        "zalando": "DE",
        "zalando_fr": "FR",
        "zalando_uk": "GB",
        "hm": "FR",
        "otto": "DE"
    }
    
    product = dict(product)

    product['country_code'] = merchant_to_country_code.get(product.get('merchant'))
    product['merchant'] = merchant_to_new_merchant.get(product.get('merchant'))
    
    if color:= product.get('color'):
        product['color']=[color]
    if size:= product.get('size'):
        if size == 'None':
            product['size'] = None
        else:
            product['size'] = size.split(", ")
    
    return Product.parse_obj(product)

In [7]:
products_iterator = greenDBold.get_all_products()

for product in products_iterator:
    try:
        greenDB.write(old_product_to_new(product))
    except Exception as e:
        product = dict(product)
        print(f"error for product with timestamp: {product.get('timestamp')} and url: {product.get('url')} throwing: {e}")
        print(product)

If everything went well and and the tables look good, you can delete the old tables e.g. via:

```sql
DROP TABLE "amazon", "asos", "otto", "zalando", "zalando_fr", "zalando_uk";
DROP TABLE "green-db-backup";
```