In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# === Chargement clients + comptes et jointure ===
print("üìã Chargement des donn√©es clients et comptes...")
clients_df = pd.read_csv(r"C:\Users\MSI\Desktop\Attijari_bank\stage\clients.csv", dtype={'id_client': str})
comptes_df = pd.read_csv(r"C:\Users\MSI\Desktop\Attijari_bank\stage\comptes.csv", dtype={'id_compte': str, 'id_client': str})
comptes_df['date_ouverture'] = pd.to_datetime(comptes_df['date_ouverture'], errors='coerce')

# === Jointure INNER JOIN pour r√©cup√©rer type_client ===
comptes_df = comptes_df.merge(clients_df[['id_client', 'type_client']], on='id_client', how='inner')
comptes_df.dropna(inplace=True)

if comptes_df['type_client'].isnull().sum() > 0:
    raise ValueError("‚ùå Des NaN subsistent dans type_client apr√®s jointure.")
else:
    print("‚úÖ Jointure r√©ussie : aucun NaN dans type_client.")

# === Param√®tres globaux ===
types_operations = {
    'DEBIT': ['Retrait DAB', 'Achat TPE', 'Virement sortant', 'Prelevement automatique',
              'Frais bancaires', 'Commission', 'Achat en ligne', 'Retrait agence',
              'Paiement facture', 'Transfert international sortant', 'Paiement carte',
              'Achat carburant', 'Supermarche', 'Restaurant', 'Pharmacie', 'Transport',
              'Emission cheque'],
    'CREDIT': ['Virement entrant', 'Depot especes', 'Depot cheque', 'Salaire', 'Remboursement',
               'Transfert international entrant', 'Interets', 'Depot agence', 'Virement interne',
               'Prime', 'Allocation', 'Pension', 'Indemnite', 'Cashback', 'Bonus', 'Encaissement cheque']
}

start_date = datetime(2022, 1, 1)
end_date = datetime(2024, 12, 31)

# === Fonctions utilitaires ===
def generate_operation_id(counter): return f"OP{counter:07d}"
def generate_random_datetime(start, end): return start + timedelta(days=random.randint(0, (end - start).days), seconds=random.randint(0, 86400))

def get_date_operation_valide(date_ouverture):
    date_min = max(date_ouverture if pd.notna(date_ouverture) else start_date, start_date)
    if date_min > end_date:
        date_min = end_date - timedelta(days=30)
    return generate_random_datetime(date_min, end_date)

def get_montant_base_par_type_client(type_client):
    base = {
        'Etudiant': {'min': 5, 'max': 200, 'moyenne': 50},
        'Sans revenu': {'min': 10, 'max': 100, 'moyenne': 30},
        'Particulier': {'min': 20, 'max': 1000, 'moyenne': 200},
        'Professionnel': {'min': 50, 'max': 5000, 'moyenne': 800},
        'Entreprise': {'min': 100, 'max': 10000, 'moyenne': 2000},
        'VIP': {'min': 200, 'max': 50000, 'moyenne': 5000}
    }
    return base.get(type_client, base['Particulier'])

def get_montant_operation(op, type_client, solde):
    base = get_montant_base_par_type_client(type_client)
    if 'Salaire' in op or 'Prime' in op: return round(random.uniform(base['moyenne']*3, base['moyenne']*8), 2)
    if 'Retrait' in op: return round(random.uniform(base['min'], min(solde*0.4, base['max']*0.3)), 2)
    if any(x in op for x in ['Achat', 'TPE', 'Supermarche', 'Restaurant']): return round(random.uniform(base['min'], base['moyenne']), 2)
    if 'Virement' in op: return round(random.uniform(base['min']*2, base['moyenne']*2), 2)
    if 'Depot' in op: return round(random.uniform(base['min']*3, base['moyenne']*1.5), 2)
    if 'Frais' in op or 'Commission' in op: return round(random.uniform(5, 100), 2)
    if 'Transfert international' in op: return round(random.uniform(base['moyenne'], base['max']), 2)
    if 'Interets' in op: return round(solde * random.uniform(0.0001, 0.005), 2)
    return round(random.uniform(base['min'], base['moyenne']), 2)

def get_lieu_operation(op, ville):
    lieux = {
        'publics': ['Carrefour', 'Monoprix', 'MG', 'G√©ant'],
        'online': ['Amazon', 'AliExpress', 'Jumia'],
        'admin': ['CNSS', 'CNAM', 'Municipalit√©', 'Steg']
    }
    if any(w in op.lower() for w in ['supermarche', 'restaurant', 'tpe']): return random.choice(lieux['publics']) + f" - {ville}"
    if 'ligne' in op.lower(): return random.choice(lieux['online'])
    if 'salaire' in op.lower(): return f"Tr√©sorerie - {ville}"
    if 'facture' in op.lower(): return random.choice(lieux['admin']) + f" - {ville}"
    if 'agence' in op.lower(): return f"Agence Attijari Bank - {ville}"
    return ville

def get_nb_operations(type_client, etat): 
    if etat != 'Actif': return random.randint(1, 5)
    base = {
        'Etudiant': (50, 100),
        'Sans revenu': (20, 60),
        'Particulier': (80, 150),
        'Professionnel': (150, 300),
        'Entreprise': (250, 500),
        'VIP': (300, 800)
    }
    return random.randint(*base.get(type_client, (80, 150)))

def is_cheque_eligible(compte):
    e = compte.get('eligible_chequier', False)
    d = compte.get('deja_cheque', False)
    if isinstance(e, str): e = e.lower() in ['true', '1', 'yes', 'oui']
    if isinstance(d, str): d = d.lower() in ['true', '1', 'yes', 'oui']
    return e or d

def choisir_type_operation(sens, eligible_cheque):
    ops = types_operations[sens].copy()
    ops_cheques = [o for o in ops if 'cheque' in o.lower()]
    if not eligible_cheque:
        ops = [o for o in ops if 'cheque' not in o.lower()]
    elif random.random() < 0.7 and ops_cheques:
        return random.choice(ops_cheques)
    return random.choice(ops)

def get_cheque_info(op, montant):
    if 'cheque' in op.lower():
        n = random.randint(1, 5)
        montant_par = max(10.0, round(montant / n, 2))
        montant = n * montant_par
        return n, montant_par, montant
    return 0, 0.0, montant

def calculer_nouveau_solde(solde, montant, sens):
    return solde + montant if sens == 'CREDIT' else solde - montant

# === G√©n√©ration des op√©rations ===
print("\n‚è≥ G√©n√©ration des op√©rations (2022-2024)...")
operations = []
counter = 1

for _, compte in comptes_df.iterrows():
    id_compte = compte['id_compte']
    type_client = compte['type_client']
    etat = compte['etat_compte']
    solde = compte['solde_initial']
    ville = compte['agence'].replace("Attijari Bank ", "")
    ouverture = compte['date_ouverture']
    eligible = is_cheque_eligible(compte)

    nb_ops = get_nb_operations(type_client, etat)
    dates = sorted([get_date_operation_valide(ouverture) for _ in range(nb_ops)])

    for i, date_op in enumerate(dates):
        sens = 'CREDIT' if i == 0 and type_client in ['Entreprise', 'VIP', 'Professionnel'] else random.choices(['CREDIT', 'DEBIT'], [30, 70])[0]
        type_op = choisir_type_operation(sens, eligible)
        montant = get_montant_operation(type_op, type_client, abs(solde))

        # Contr√¥le du d√©couvert
        if sens == 'DEBIT' and solde - montant < -1000:
            if random.random() < 0.6:
                montant = max(5, solde + 500)
            else:
                sens = 'CREDIT'
                type_op = choisir_type_operation(sens, eligible)
                montant = get_montant_operation(type_op, type_client, abs(solde))

        n_cheques, m_cheque, montant_final = get_cheque_info(type_op, montant)
        lieu = get_lieu_operation(type_op, ville)
        solde_apres = calculer_nouveau_solde(solde, montant_final, sens)

        operations.append({
            'id_operation': generate_operation_id(counter),
            'id_compte': id_compte,
            'type_operation': type_op,
            'montant_total': montant_final,
            'date_operation': date_op.date(),
            'lieu_operation': lieu,
            'montant_par_cheque': m_cheque,
            'nombre_cheques': n_cheques,
            'sens_operation': sens,
            'solde_avant': solde,
            'solde_apres': solde_apres
        })

        counter += 1
        solde = solde_apres

# === Export CSV final ===
df_ops = pd.DataFrame(operations)
df_ops.to_csv(r"C:\Users\MSI\Daesktop\Attijari_bank\stage\operations.csv", index=False)
print(f"\n‚úÖ {len(df_ops):,} op√©rations g√©n√©r√©es et enregistr√©es avec succ√®s.")




üìã Chargement des donn√©es clients et comptes...
‚úÖ Jointure r√©ussie : aucun NaN dans type_client.

‚è≥ G√©n√©ration des op√©rations (2022-2024)...

‚úÖ 67,411,391 op√©rations g√©n√©r√©es et enregistr√©es avec succ√®s.


KeyError: 'annee'

In [3]:

df_ops.head()

Unnamed: 0,id_operation,id_compte,type_operation,montant_total,date_operation,lieu_operation,montant_par_cheque,nombre_cheques,sens_operation,solde_avant,solde_apres
0,OP0000001,9070725833,Depot cheque,195.03,2022-01-10,Sidi Bouzid,195.03,1,CREDIT,8706.1,8901.13
1,OP0000002,9070725833,Achat carburant,539.34,2022-01-16,Sidi Bouzid,0.0,0,DEBIT,8901.13,8361.79
2,OP0000003,9070725833,Depot cheque,876.76,2022-01-24,Sidi Bouzid,438.38,2,CREDIT,8361.79,9238.55
3,OP0000004,9070725833,Emission cheque,750.24,2022-01-25,Sidi Bouzid,375.12,2,DEBIT,9238.55,8488.31
4,OP0000005,9070725833,Encaissement cheque,470.8,2022-02-09,Sidi Bouzid,94.16,5,CREDIT,8488.31,8959.11


In [5]:
df_ops.info

<bound method DataFrame.info of          id_operation   id_compte       type_operation  montant_total  \
0           OP0000001  9070725833         Depot cheque         195.03   
1           OP0000002  9070725833      Achat carburant         539.34   
2           OP0000003  9070725833         Depot cheque         876.76   
3           OP0000004  9070725833      Emission cheque         750.24   
4           OP0000005  9070725833  Encaissement cheque         470.80   
...               ...         ...                  ...            ...   
67411386   OP67411387  8426195829      Emission cheque         180.35   
67411387   OP67411388  8426195829      Emission cheque         377.16   
67411388   OP67411389  8426195829         Depot cheque        1162.36   
67411389   OP67411390  8426195829      Emission cheque         470.56   
67411390   OP67411391  9737413374            Indemnite         559.24   

         date_operation lieu_operation  montant_par_cheque  nombre_cheques  \
0            

In [7]:
# === Partie statistique compl√®te ===
df_ops['annee'] = pd.to_datetime(df_ops['date_operation']).dt.year

# Calcul des statistiques
nb_cheques = df_ops['nombre_cheques'].gt(0).sum()
pourcentage_cheques = (nb_cheques / len(df_ops)) * 100
total_montant = df_ops['montant_total'].sum()
ops_par_compte = df_ops.groupby('id_compte')['id_operation'].count()
erreurs_solde = df_ops[df_ops['solde_apres'] < -1000]

# Exporter les statistiques dans un fichier texte
with open(r"C:\Users\MSI\Desktop\Attijari_bank\stage\stats_operations.txt", "w", encoding="utf-8") as f:
    f.write(f"üßæ Total op√©rations : {len(df_ops):,}\n\n")
    f.write("üîÅ R√©partition CREDIT/DEBIT:\n")
    f.write(df_ops['sens_operation'].value_counts().to_string())
    f.write("\n\nüè∑Ô∏è Top 10 types d‚Äôop√©rations:\n")
    f.write(df_ops['type_operation'].value_counts().head(10).to_string())
    f.write("\n\nüìÜ R√©partition par ann√©e:\n")
    f.write(df_ops['annee'].value_counts().sort_index().to_string())
    f.write(f"\n\n‚úâÔ∏è Op√©rations avec ch√®ques : {nb_cheques:,} ({pourcentage_cheques:.2f}%)")
    f.write(f"\nüí∞ Montant total : {total_montant:,.2f} TND")
    f.write(f"\nüìà Montant moyen : {df_ops['montant_total'].mean():.2f} TND")
    f.write(f"\nüë§ Moyenne op√©rations par compte : {ops_par_compte.mean():.2f}")
    f.write(f"\n‚ö†Ô∏è D√©passements solde < -1000 TND : {len(erreurs_solde)}\n")
