## Intégrer Refashion depuis l'api pointsapport :

- Récupérer les données-eo-refashion depuis l'api pointsapport.
- Créer et mapper les données vers les tables Acteurs, Proposition de Services et Sous-catégories.
- Enregistrer chaque table dans un fichier CSV.

#### préprod

In [50]:
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Accessing environment variables
user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')  # Default PostgreSQL port is 5432, but we're using a custom one here
db_name = os.getenv('DB_NAME')

# Create the connection URL
connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'

# Create the engine
engine = create_engine(connection_string)   


#### prod

In [31]:
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Accessing environment variables
user = os.getenv('DB_USER_PROD')
password = os.getenv('DB_PASSWORD_PROD')
host = os.getenv('DB_HOST_PROD')
port = os.getenv('DB_PORT_PROD')  # Default PostgreSQL port is 5432, but we're using a custom one here
db_name = os.getenv('DB_NAME_PROD')

# Create the connection URL
connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'

# Create the engine
engine_prod = create_engine(connection_string)


## Get data from point apport 

In [4]:
import requests
import pandas as pd


def fetch_all_data(url):
    all_data = []
    while url:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            all_data.extend(data['results'])
            # Check if there's a next page link
            url = data.get('next', None)
            print(url)
        else:
            print(f"Failed to fetch data: {response.status_code}")
            break
    return all_data

api_url = "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/donnees-eo-refashion/lines?size=10000"

data = fetch_all_data(api_url)

df = pd.DataFrame(data)





https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624636413%2C403043131910
https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624630377%2C403037095920
https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624624501%2C403031219930
None


In [5]:
df_acteurtype = pd.read_sql_table('qfdmo_acteurtype', engine)
df_sources = pd.read_sql_table('qfdmo_source', engine)
df_da = pd.read_sql_table('qfdmo_displayedacteur', engine)
df_ps = pd.read_sql_table('qfdmo_propositionservice', engine)
df_ps['id'].max()
df_pssc = pd.read_sql_table('qfdmo_propositionservice_sous_categories', engine)
df_action = pd.read_sql_table('qfdmo_action', engine)
df_ac = pd.read_sql_table('qfdmo_acteur', engine)
df_libel = pd.read_sql_table('qfdmo_labelqualite', engine)


  self.meta.reflect(bind=self.con, only=[table_name], views=True)
  self.meta.reflect(bind=self.con, only=[table_name], views=True)
  self.meta.reflect(bind=self.con, only=[table_name], views=True)


In [6]:
df_libel

Unnamed: 0,id,libelle,code,afficher,bonus,url,logo_file
0,1,Repar'Acteur,reparacteur,True,False,https://www.artisanat.fr/annuaire-repar-acteurs,
1,3,Re_fashion,refashion,True,True,https://refashion.fr/citoyen/fr/bonus-reparation,logos/Refashion_32.png
2,4,Ecomaison,ecomaison,True,True,https://ecomaison.com/developper-reparation/,logos/ecomaison32.png
3,5,Bonus Répar,bonusrepar,True,True,,logos/BonusRepar32.png
4,2,QualiRépar,qualirepar,True,True,https://www.label-qualirepar.fr/,logos/logo-qualirepar.png


In [4]:
df_action = pd.read_sql_table('qfdmo_action', engine)
engine

Engine(postgresql://quefairedem_2657:***@quefairedem-2657.postgresql.a.osc-fr1.scalingo-dbs.com:33517/quefairedem_2657)

In [5]:
df_action

Unnamed: 0,id,code,libelle,order,description,couleur,icon,afficher
0,9,echanger,échanger,7,,blue-cumulus,fr-icon-action-echanger,True
1,6,mettreenlocation,mettre en location,4,Mettre en location,purple-glycine,fr-icon-action-mettreenlocation,True
2,5,louer,louer,3,,purple-glycine,fr-icon-action-louer,True
3,4,donner,donner,6,,yellow-tournesol,fr-icon-action-donner,True
4,3,revendre,vendre,9,,brown-cafe-creme,fr-icon-action-vendre,True
5,2,acheter,acheter de seconde main,8,acheter d'occasion,brown-cafe-creme,fr-icon-action-acheter,True
6,1,reparer,réparer,5,,green-menthe,fr-icon-action-reparer,True
7,8,preter,prêter,1,,orange-terre-battue,fr-icon-action-preter,True
8,7,emprunter,emprunter,2,,orange-terre-battue,fr-icon-action-emprunter,True
9,43,trier,trier,10,trier pour recycler,yellow-tournesol,fr-icon-recycle-line,True


### Mappers

In [7]:
column_mapping = {
    'id_point_apport_ou_reparation': 'identifiant_externe',
    'adresse_complement': 'adresse_complement',
    'type_de_point_de_collecte': 'acteur_type_id',
    'telephone': 'telephone',
    'siret': 'siret',
    'uniquement_sur_rdv': '',
    'exclusivite_de_reprisereparation': '',
    'filiere': '',
    'public_accueilli': '',
    'produitsdechets_acceptes': '',
    'labels_etou_bonus': '',
    'reprise': '',
    'point_de_reparation': '',
    'ecoorganisme': 'source_id',
    'adresse_format_ban': 'adresse',
    'nom_de_lorganisme': 'nom',
    'enseigne_commerciale':'nom_commercial',
    '_updatedAt':'cree_le',
    'site_web': 'url',
    'email': 'email',
    'perimetre_dintervention': '',
    'longitudewgs84': 'location',  
    'latitudewgs84': 'location',  
    'horaires_douverture': 'horaires',
    'consignes_dacces': 'description',
}


# Print the dictionary for visual confirmation
print(column_mapping)

{'id_point_apport_ou_reparation': 'identifiant_externe', 'adresse_complement': 'adresse_complement', 'type_de_point_de_collecte': 'acteur_type_id', 'telephone': 'telephone', 'siret': 'siret', 'uniquement_sur_rdv': '', 'exclusivite_de_reprisereparation': '', 'filiere': '', 'public_accueilli': '', 'produitsdechets_acceptes': '', 'labels_etou_bonus': '', 'reprise': '', 'point_de_reparation': '', 'ecoorganisme': 'source_id', 'adresse_format_ban': 'adresse', 'nom_de_lorganisme': 'nom', 'enseigne_commerciale': 'nom_commercial', '_updatedAt': 'cree_le', 'site_web': 'url', 'email': 'email', 'perimetre_dintervention': '', 'longitudewgs84': 'location', 'latitudewgs84': 'location', 'horaires_douverture': 'horaires', 'consignes_dacces': 'description'}


In [10]:
df.columns

Index(['id_point_apport_ou_reparation', 'adresse_complement',
       'type_de_point_de_collecte', 'telephone', '_i', 'siret',
       'uniquement_sur_rdv', 'exclusivite_de_reprisereparation', 'filiere',
       'public_accueilli', '_rand', 'point_dapport_pour_reemploi',
       'point_de_collecte_ou_de_reprise_des_dechets',
       'produitsdechets_acceptes', 'labels_etou_bonus', 'reprise',
       'point_de_reparation', 'ecoorganisme', 'adresse_format_ban',
       'nom_de_lorganisme', 'enseigne_commerciale', '_updatedAt',
       'point_dapport_de_service_reparation', 'site_web', '_score', '_id',
       'service_a_domicile', 'email', 'perimetre_dintervention',
       'longitudewgs84', '_geopoint', 'latitudewgs84', 'horaires_douverture',
       'consignes_dacces', 'identifiant_externe', 'acteur_type_id'],
      dtype='object')

### Transformations

#### Create Actors

In [8]:
from shapely.geometry import Point
from shapely import wkb
import re
import hashlib


selected_columns = ['nom', 'adresse', 'type_de_point_de_collecte', 'id_point_apport_ou_reparation','identifiant_externe']

def generate_unique_id(row):
    unique_str = '_'.join([str(row[col]) for col in selected_columns])
    return hashlib.sha256(unique_str.encode()).hexdigest()
def transform_acteur_type_id(value):
    mapping_dict = {
        "Solution en ligne (site web, app. mobile)": "en ligne (web, mobile)",
        "Artisan, commerce indépendant": "artisan, commerce indépendant",
        "Magasin / Franchise, Enseigne commerciale / Distributeur / Point de vente": "commerce",
        "Point d'Apport Volontaire Publique": "point d'apport volontaire public",
        "Association, entreprise de l’économie sociale et solidaire (ESS)": "Association, entreprise de l'ESS",
        "Déchèterie": "déchèterie",
    }
    libelle = mapping_dict.get(value)
    id_value = df_acteurtype.loc[df_acteurtype['libelle'] == libelle, 'id'].values[0] if any(df_acteurtype['libelle'] == libelle) else None
    return id_value



def transform_location(longitude, latitude):
    point = Point(longitude, latitude)
    
    transformed_location_binary = wkb.dumps(point)
    transformed_location_hex = transformed_location_binary.hex()

    return transformed_location_hex

def transform_ecoorganisme(value):
    
    id_value = df_sources.loc[df_sources['code'].str.lower() == value.lower(), 'id'].values[0] if any(df_sources['code'].str.lower() == value.lower()) else None
    return id_value

def extract_details(row):
    pattern = re.compile(r'\b(\d{5})\s+(.*)')
    
    address = None
    postal_code = None
    city = None
    if pd.isnull(row['adresse_format_ban']):
        return pd.Series([None, None, None])

    # Ensure adress_ban is treated as a string
    adress_ban = str(row['adresse_format_ban'])
    
    # Search for the pattern
    match = pattern.search(adress_ban)
    if match:
        postal_code = match.group(1)
        city = match.group(2)
        address = adress_ban[:match.start()].strip()
    
    return pd.Series([address, postal_code, city])

# Apply the function and assign the result to new columns
for old_col, new_col in column_mapping.items():
    if new_col: 
        if old_col == 'type_de_point_de_collecte':
            df[new_col] = df[old_col].apply(transform_acteur_type_id)
        elif old_col in ('longitudewgs84', 'latitudewgs84'):
            df['location'] = df.apply(lambda row: transform_location(row['longitudewgs84'], row['latitudewgs84']), axis=1)
        elif old_col == 'ecoorganisme':
            df[new_col] = df[old_col].apply(transform_ecoorganisme)
        elif old_col == 'adresse_format_ban':
            df[['adresse', 'code_postal', 'ville']] = df.apply(extract_details, axis=1)
        else:
            df[new_col] = df[old_col]
df['label_reparacteur']=False
df['statut']='ACTIF'
df['identifiant_unique'] = df.apply(generate_unique_id, axis=1)
            


In [13]:
df.loc[df['service_a_domicile']=='service à domicile uniquement','statut'] = 'SUPPRIME'

In [14]:
df['modifie_le'] = df['cree_le']
df['siret'] = df['siret'].astype(str).apply(lambda x : x[:14])
df['telephone'] = df['telephone'].dropna().apply(lambda x: x.replace(' ', ''))
df['telephone'] = df['telephone'].dropna().apply(lambda x: '0' + x[2:] if x.startswith('33') else x)

In [None]:
df.drop_duplicates('identifiant_unique', keep='first', inplace=True)

#### Create Proposition de services

In [None]:
rows_list = []

for index, row in df.iterrows():
    acteur_id = row['identifiant_unique']
    action_id = None
    sous_categories = row['produitsdechets_acceptes']
    if row['point_dapport_de_service_reparation']:
        acteur_service_id = 17
        action_id = 1
    elif row['point_dapport_pour_reemploi']:
        acteur_service_id = 4
        action_id = 4
    elif row['point_de_reparation']:
        acteur_service_id = 15
        action_id = 1
    elif row['point_de_collecte_ou_de_reprise_des_dechets']:
        acteur_service_id = 4
        action_id = 43
    else:
        continue  # Skip rows that don't match any criteria
    
    rows_list.append({"acteur_service_id": acteur_service_id, "action_id": action_id, "acteur_id": acteur_id, "sous_categories":sous_categories})

df_pds = pd.DataFrame(rows_list)
df_pds.index = range(df_ps['id'].max()+1, df_ps['id'].max()+1 + len(df_pds))

df_pds['id'] = df_pds.index


#### Create sous categories

In [None]:
rows_list=[]
sous_categories = { 
    "Vêtement" : 107,
    "Linge" : 104,
    "Chaussure":109
}
for index, row in df_pds.iterrows():
    products = str(row["sous_categories"]).split("|")
    for product in products:
        if product.strip() in sous_categories:
            rows_list.append({
                'propositionservice_id': row['id'], 
                'souscategorieobjet_id': sous_categories[product.strip()]
            })

df_sous_categories = pd.DataFrame(rows_list, columns=['propositionservice_id', 'souscategorieobjet_id'])

df_sous_categories

In [10]:
df['labels_etou_bonus'].unique()

array(['Agréé Bonus Réparation', nan], dtype=object)

#### Create libellé

In [16]:
rows_list= []
for index, row in df.iterrows():
    label = str(row["labels_etou_bonus"])
    if label == 'Agréé Bonus Réparation':
            rows_list.append({
                'acteur_id': row['identifiant_unique'], 
                'labelqualite_id': 3,
                'labelqualite': df_libel.loc[df_libel["id"]==3,"libelle"].tolist()[0]
            })

df_libelles = pd.DataFrame(rows_list, columns=['acteur_id', 'labelqualite_id', 'labelqualite'])

df_libelles

Unnamed: 0,acteur_id,labelqualite_id,labelqualite
0,5c761cd79c679af340a540c4d77cca46bd5cef232e2fcf...,3,Re_fashion
1,da991542b3c34ec43043501b5e8c0fa7025c9e4d485a97...,3,Re_fashion
2,36ff562c2a87bda80a22950f01bb7cf66c4f2dcda31949...,3,Re_fashion
3,c868120d7e4f4f2d400672ac8af567bbc771966814ff37...,3,Re_fashion
4,c1dc4d491615af30a41ea0042ce0ac785d0fb434361d92...,3,Re_fashion
...,...,...,...
916,f978ca97c24b6f509e97008edbac01e2b7b6f9335026b4...,3,Re_fashion
917,0cf31f76552155f68a6b5bf3d6ac7b27c402a329306998...,3,Re_fashion
918,dc9ec750646094de9057bfa26d06d8eaee5030dbf17458...,3,Re_fashion
919,99ec7e15a132bc57a741cbb65f5bae338a5a1af13de8f9...,3,Re_fashion


'Re_fashion'

#### Add to DB

In [None]:

df[[
    'identifiant_unique',
    'acteur_type_id',
'adresse',
    'code_postal', 'ville',
 'adresse_complement',
 'commentaires',
    'description',
 'email',
 'horaires',
 'identifiant_externe',
 'label_reparacteur',
 'nom_commercial',
 'nom',
    'location',
'cree_le',
'modifie_le',
'multi_base',
'manuel',
'statut',
 'siret',
 'source_id',
 'telephone',
 'url'
]].to_sql("qfdmo_acteur",engine, if_exists='append',index=False,method='multi',chunksize=1000)

In [None]:
df_pds[['acteur_service_id','action_id','acteur_id','id']].to_csv('refashion_propositionservice.csv')
df_pds[['id','acteur_service_id','action_id','acteur_id']].to_sql("qfdmo_propositionservice",engine, if_exists='append',index=False,method='multi',chunksize=1000)

In [None]:
df_sous_categories[['propositionservice_id','souscategorieobjet_id']].to_csv('refashion_sous_categories.csv')

In [None]:
df_sous_categories.to_sql("qfdmo_propositionservice_sous_categories",engine, if_exists='append',index=False,method='multi',chunksize=1000)

In [None]:
delete_query= """
DELETE FROM qfdmo_propositionservice_sous_categories
    USING qfdmo_propositionservice_sous_categories_refashion
    WHERE qfdmo_propositionservice_sous_categories.propositionservice_id = qfdmo_propositionservice_sous_categories_refashion.propositionservice_id
    AND qfdmo_propositionservice_sous_categories.souscategorieobjet_id = qfdmo_propositionservice_sous_categories_refashion.souscategorieobjet_id;"""

In [None]:
import psycopg2
from psycopg2 import sql
from sqlalchemy import create_engine
user = 
password = 
host = 
port = '33517'  # default PostgreSQL port is 5432
db_name = 'quefairedem_2657'
# Access variables in .env
conn = psycopg2.connect(
    dbname=db_name, 
    user=user, 
    password=password, 
    host=host,
    port=port
)
conn.autocommit = True
cursor = conn.cursor()

query = sql.SQL(sql_query)
cursor.execute(query)

cursor.close()
conn.close()

In [None]:
df.loc[df['service_a_domicile']=='service à domicile uniquement',['statut','identifiant_unique']].to_sql("qfdmo_acteur_fix_sd",engined)

## Revision Christian --> revisionacteur

In [65]:
df_chris_rev = pd.read_csv("./../../refashion_acteurs_chris_28032024.csv")

  df_chris_rev = pd.read_csv("./../../refashion_acteurs_chris_28032024.csv")


In [69]:
df_chris_rev[['nom','horaires','url','adresse_complement']]

Unnamed: 0,nom,horaires,url,adresse_complement
0,MFC,,https://www.lamanufacture49.fr/,SAINT-PIERRE-MONTLIMART
1,TILLI,,https://tilli.fr/,
2,TILLI,,https://tilli.fr/,
3,TILLI,,https://tilli.fr/,
4,A.C.L PROXI POL.,,,
...,...,...,...,...
35925,APIVET,24h/24. 7j/7,https://www.apivet49.com/,Place Tout Blanc
35926,APIVET,24h/24. 7j/7,https://www.apivet49.com/,
35927,APIVET,24h/24. 7j/7,https://www.apivet49.com/,
35928,APIVET,24h/24. 7j/7,https://www.apivet49.com/,


In [33]:
df_revact = pd.read_sql_table("qfdmo_revisionacteur",engine_prod)
df_actprod = pd.read_sql_table("qfdmo_acteur",engine_prod)

  self.meta.reflect(bind=self.con, only=[table_name], views=True)
  self.meta.reflect(bind=self.con, only=[table_name], views=True)


In [44]:
df_revact['identifiant_unique'].count()

160552

In [45]:
df_rev_man = pd.merge(df_actprod[(df_actprod['source_id']==45)][['identifiant_unique']],df_revact, on = ['identifiant_unique'])

In [60]:
df_refashion = pd.read_sql_table("qfdmo_acteur",  engine)

  self.meta.reflect(bind=self.con, only=[table_name], views=True)


In [62]:
filtered_df = df_refashion[df_refashion['identifiant_unique'].str.startswith("refashion_")]
filtered_df

Unnamed: 0,nom,identifiant_unique,adresse,adresse_complement,code_postal,ville,url,email,location,telephone,...,acteur_type_id,statut,source_id,cree_le,modifie_le,naf_principal,commentaires,horaires_osm,description,horaires_description
21690,Ghenam Reparation,refashion_SWKLYBWCFOLZ,7 Rue Franklin,,93100,Montreuil,https://ghenam-reparation.jimdosite.com/nos-ta...,,0101000020E6100000990F0874268D034068321CCF676E...,,...,3,ACTIF,45,2023-11-08 17:28:48.329397+00:00,2023-11-24 10:17:25.965587+00:00,,,,,
42031,Patine Studio,refashion_FINMHILZPXHW,8 rue Martel,,75010,Paris,https://www.patine.fr/blogs/book-a-session,hello@patine.fr,0101000020E6100000D828EB3713D30240A75D4C33DD6F...,0181701618,...,3,ACTIF,45,2024-01-11 12:55:34.682766+00:00,2024-01-11 12:56:35.642529+00:00,47.91A,,Tu-Sa 11:00-19:00,,
42232,Au fil et à mesure,refashion_WWXFNUVVIMHC,7 Rue de la Vendée,,79130,Secondigny,https://secondigny.fr/utile/annuaire-entrepris...,,0101000020E61000001477BCC96FD1DABF58478E74064E...,06 24 35 91 55,...,3,ACTIF,45,2024-01-17 07:55:28.288884+00:00,2024-01-17 07:55:28.288894+00:00,14.13Z,,,,
42420,La Réserve Varzy,refashion_KFUJSXXOKJPM,19 Rue Delangle,,58210,Varzy,https://www.facebook.com/lareservevarzy/,asso.lareservevarzy@gmail.com,0101000020E61000007FA4880CAB180B400853944BE3AD...,06 04 01 18 10,...,4,ACTIF,45,2024-03-06 17:44:16.226699+00:00,2024-03-06 17:44:16.226711+00:00,47.29Z,,,,
168751,MFC,refashion_TLC-REFASHION-REP-455001208507113095_d,RTE CHAUDRON,SAINT-PIERRE-MONTLIMART,49110,MONTREVAULT-SUR-EVRE,https://www.lamanufacture49.fr/,,0101000020E6100000000000000000F87F000000000000...,0241754850,...,5,ACTIF,45,2024-03-05 07:44:03.205000+00:00,2024-03-05 07:44:03.205000+00:00,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205929,APIVET,refashion_TLC-REFASHION-PAV-3271797,Angle rue Bertin et Avenue Jean Joxe,Place Tout Blanc,49000,Angers,,,0101000020E610000082C5E1CCAF66E1BF82548A1D8DBD...,,...,10,ACTIF,45,2024-03-05 07:43:41.565000+00:00,2024-03-05 07:43:41.565000+00:00,,,,,24h/24. 7j/7
205930,APIVET,refashion_TLC-REFASHION-PAV-3271796,rue Louis Gain,,49000,Angers,,,0101000020E6100000925852EE3E47E1BF2B4CDF6B08BC...,,...,10,ACTIF,45,2024-03-05 07:43:41.565000+00:00,2024-03-05 07:43:41.565000+00:00,,,,,24h/24. 7j/7
205931,APIVET,refashion_TLC-REFASHION-PAV-3271795,Place André Leroy,,49000,Angers,,,0101000020E6100000B2BCAB1E308FE1BFBA1457957DBB...,,...,10,ACTIF,45,2024-03-05 07:43:41.565000+00:00,2024-03-05 07:43:41.565000+00:00,,,,,24h/24. 7j/7
205932,APIVET,refashion_TLC-REFASHION-PAV-3271794,101 RUE ST NICOLAS,,49000,Angers,,,0101000020E6100000822A244F4821E2BF03DA0C26B4BC...,,...,10,ACTIF,45,2024-03-05 07:43:41.565000+00:00,2024-03-05 07:43:41.565000+00:00,,,,,24h/24. 7j/7


In [47]:
df_rev_man.to_sql("rev_refashion",engine_prod, if_exists='replace')

81

In [140]:
import psycopg2
from psycopg2 import sql

# Connect to the database again
conn = psycopg2.connect(
    dbname=db_name, 
    user=user, 
    password=password, 
    host=host,
    port=port
)
conn.autocommit = True
cursor = conn.cursor()

# Perform the update
cursor.execute("""
 UPDATE qfdmo_revisionacteur
SET 
    acteur_type_id = NULL,
    adresse = NULL,
    code_postal = NULL, 
    ville = NULL,
    email = NULL,
    horaires = NULL,
    identifiant_externe = NULL,
    label_reparacteur = qfdmo_revision_acteur_enrich_christian.label_reparacteur,
    nom_commercial = NULL,
    nom = NULL,
    location = NULL,
    cree_le = NOW(),
    modifie_le = NOW(),
    statut = qfdmo_revision_acteur_enrich_christian.statut,
    siret = NULL,
    source_id = NULL,
    telephone = NULL,
    description = qfdmo_revision_acteur_enrich_christian.description,
    adresse_complement = qfdmo_revision_acteur_enrich_christian.adresse_complement,
    url = qfdmo_revision_acteur_enrich_christian.url
FROM rev_refashion
WHERE qfdmo_revisionacteur.identifiant_unique = qfdmo_revision_acteur_enrich_christian.identifiant_unique;

""")

# Cleanup
cursor.close()
conn.close()

DELETE FROM qfdmo_displayedpropositionservice_sous_categories
WHERE propositionservice_id IN (
    SELECT id FROM qfdmo_propositionservice
    WHERE acteur_id IN (
        SELECT identifiant_unique FROM qfdmo_acteur WHERE source_id = 45
    )
);
DELETE 105969
quefairedem_2657=> DELETE FROM qfdmo_propositionservice                
WHERE acteur_id IN (
    SELECT identifiant_unique FROM qfdmo_acteur WHERE source_id = 45
);
DELETE 35930
quefairedem_2657=> delete from qfdmo_acteur where identifiant_unique =45;
ERROR:  operator does not exist: character varying = integer
LINE 1: delete from qfdmo_acteur where identifiant_unique =45;
                                                          ^
HINT:  No operator matches the given name and argument types. You might need to add explicit type casts.
quefairedem_2657=> delete from qfdmo_acteur where source_id =45;


In [133]:
df = pd.read_sql_table("rev_refashion",engine)

In [149]:
sources =  pd.read_sql_table("qfdmo_sources_acteurs",engine)

In [180]:
df_rev = pd.merge(df[df['source_id_y']==45],sources[['identifiant_unique','identifiant_externe']],left_on=['identifiant_externe_y'], right_on=['identifiant_externe'])

In [182]:
df_rev = df_rev.drop(columns=['identifiant_unique_x'])
df_rev =  df_rev.rename(columns={'identifiant_unique_y':'identifiant_unique'})
df_rev.drop_duplicates()

In [203]:
df_rev[
            [
                "identifiant_unique",
                "nom",
                "adresse",
                "adresse_complement",
                "code_postal",
                "ville",
                "url",
                "email",
                "location",
                "telephone",
                "nom_commercial",
                "nom_officiel",
                "siret",
                "identifiant_externe",
                "acteur_type_id",
                "statut",
                "cree_le",
                "modifie_le",
                "naf_principal",
                "commentaires",
                "horaires_osm",
                "horaires_description",
                "description",
            ]
        ].to_sql('qfdmo_revisionacteur',engine,            index=False,
if_exists="append",method="multi",
                chunksize=1000)

35929

In [20]:
df_sql = pd.read_sql_query(
    "SELECT * FROM qfdmo_dagrunchange WHERE "
    "dag_run_id IN "
    "(SELECT id FROM qfdmo_dagrun WHERE status = 'DagRunStatus.TO_INSERT')",
    engine,
)

In [21]:
df_sql

Unnamed: 0,id,change_type,meta_data,row_updates,dag_run_id
0,646816,CREATE,,"{'nom': 'MFC', 'url': 'https://www.lamanufactu...",30
1,646817,CREATE,,"{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...",30
2,646818,CREATE,,"{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...",30
3,646819,CREATE,,"{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...",30
4,646820,CREATE,,"{'nom': 'A.C.L PROXI POL.', 'url': None, 'emai...",30
...,...,...,...,...,...
35925,682741,CREATE,,"{'nom': 'APIVET', 'url': None, 'email': None, ...",30
35926,682742,CREATE,,"{'nom': 'APIVET', 'url': None, 'email': None, ...",30
35927,682743,CREATE,,"{'nom': 'APIVET', 'url': None, 'email': None, ...",30
35928,682744,CREATE,,"{'nom': 'APIVET', 'url': None, 'email': None, ...",30


In [22]:
max_id_pds = pd.read_sql_query(
    "SELECT max(id) FROM qfdmo_displayedpropositionservice", engine
)["max"][0]
df_sql = pd.read_sql_query(
    "SELECT * FROM qfdmo_dagrunchange WHERE "
    "dag_run_id IN "
    "(SELECT id FROM qfdmo_dagrun WHERE status = 'DagRunStatus.TO_INSERT')",
    engine,
)
dag_run_id = df_sql["dag_run_id"].iloc[0]

normalized_dfs = df_sql["row_updates"].apply(pd.json_normalize)
df_actors = pd.concat(normalized_dfs.tolist(), ignore_index=True)

normalized_labels_dfs = df_actors["labels"].apply(pd.json_normalize)
df_labels = pd.concat(normalized_labels_dfs.tolist(), ignore_index=True)

normalized_pds_dfs = df_actors["proposition_services"].apply(pd.json_normalize)
df_pds = pd.concat(normalized_pds_dfs.tolist(), ignore_index=True)
ids_range = range(max_id_pds + 1, max_id_pds + 1 + len(df_pds))

df_pds["id"] = ids_range
df_pds["pds_sous_categories"] = df_pds.apply(
    lambda row: [
        {**d, "propositionservice_id": row["id"]}
        for d in row["pds_sous_categories"]
    ],
    axis=1,
)

normalized_pdssc_dfs = df_pds["pds_sous_categories"].apply(pd.json_normalize)
df_pdssc = pd.concat(normalized_pdssc_dfs.tolist(), ignore_index=True)

  df_actors = pd.concat(normalized_dfs.tolist(), ignore_index=True)


NotImplementedError: 

In [26]:
normalized_labels_dfs = df_actors["labels"].apply(pd.json_normalize)


NotImplementedError: 

In [30]:
df_actors["labels"].dropna().apply(pd.json_normalize)[0]

Unnamed: 0,acteur_id,labelqualite,labelqualite_id
0,refashion_TLC-REFASHION-REP-455001208507113095_d,Re_fashion,3
