<a href="https://colab.research.google.com/github/abdessamadmekkaoui/taxi/blob/main/stage_mois_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# Installation des dépendances
!pip install sentence-transformers ipywidgets

# Imports
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime



In [17]:
class IntelligentSalesAnalyst:
    def __init__(self):
        self.df = None
        print("Chargement du modèle...")
        self.model = SentenceTransformer('distiluse-base-multilingual-cased-v1')
        print("Modèle chargé!")

        self.analysis_patterns = {
            'meilleur mois ventes': self.best_month_analysis,
            'pire mois ventes': self.worst_month_analysis,
            'tendance ventes': self.sales_trend_analysis,
            'total ventes': self.total_sales_analysis,
            'moyenne ventes': self.average_sales_analysis,
            'statistiques générales': self.general_statistics,
            'comparaison périodes': self.period_comparison
        }

    def load_data(self, file_path):
        try:
            self.df = pd.read_csv(file_path)
            # Tentative de conversion des colonnes de date
            date_columns = self.df.select_dtypes(include=['object']).columns
            for col in date_columns:
                try:
                    self.df[col] = pd.to_datetime(self.df[col])
                except:
                    continue

            print("Données chargées avec succès!")
            print("\nAperçu des données:")
            display(self.df.head())
            print("\nColonnes disponibles:", self.df.columns.tolist())
            return True
        except Exception as e:
            print(f"Erreur lors du chargement des données: {str(e)}")
            return False

    def get_date_and_sales_cols(self):
        date_col = [col for col in self.df.columns if self.df[col].dtype == 'datetime64[ns]'][0]
        sales_col = [col for col in self.df.columns if 'vente' in col.lower() or 'revenue' in col.lower() or 'sale' in col.lower()][0]
        return date_col, sales_col

    def best_month_analysis(self):
        date_col, sales_col = self.get_date_and_sales_cols()
        monthly_sales = self.df.groupby(self.df[date_col].dt.strftime('%Y-%m'))[sales_col].sum()
        best_month = monthly_sales.idxmax()
        return f"Le meilleur mois était {best_month} avec {monthly_sales[best_month]:,.2f} € de ventes"

    def worst_month_analysis(self):
        date_col, sales_col = self.get_date_and_sales_cols()
        monthly_sales = self.df.groupby(self.df[date_col].dt.strftime('%Y-%m'))[sales_col].sum()
        worst_month = monthly_sales.idxmin()
        return f"Le pire mois était {worst_month} avec {monthly_sales[worst_month]:,.2f} € de ventes"

    def sales_trend_analysis(self):
        date_col, sales_col = self.get_date_and_sales_cols()
        monthly_sales = self.df.groupby(self.df[date_col].dt.strftime('%Y-%m'))[sales_col].sum()
        growth = ((monthly_sales.iloc[-1] - monthly_sales.iloc[0]) / monthly_sales.iloc[0]) * 100
        return f"Tendance sur la période: {'hausse' if growth > 0 else 'baisse'} de {abs(growth):.2f}%"

    def total_sales_analysis(self):
        _, sales_col = self.get_date_and_sales_cols()
        total = self.df[sales_col].sum()
        return f"Total des ventes: {total:,.2f} €"

    def average_sales_analysis(self):
        _, sales_col = self.get_date_and_sales_cols()
        avg = self.df[sales_col].mean()
        return f"Moyenne des ventes: {avg:,.2f} €"

    def general_statistics(self):
        _, sales_col = self.get_date_and_sales_cols()
        stats = self.df[sales_col].describe()
        return "\n".join([f"{index}: {value:,.2f} €" for index, value in stats.items()])

    def period_comparison(self):
        date_col, sales_col = self.get_date_and_sales_cols()
        yearly_sales = self.df.groupby(self.df[date_col].dt.year)[sales_col].sum()
        comparison = yearly_sales.pct_change() * 100
        return "\n".join([f"Évolution {year}: {change:,.2f}%" for year, change in comparison.items() if not np.isnan(change)])

    def analyze_question(self, question):
        if self.df is None:
            return "Veuillez d'abord charger un fichier CSV"

        question_embedding = self.model.encode([question])[0]
        pattern_embeddings = self.model.encode(list(self.analysis_patterns.keys()))
        similarities = cosine_similarity([question_embedding], pattern_embeddings)[0]
        best_match_idx = np.argmax(similarities)

        if similarities[best_match_idx] > 0.5:
            analysis_function = list(self.analysis_patterns.values())[best_match_idx]
            return analysis_function()
        else:
            return "Je ne comprends pas votre question. Pouvez-vous la reformuler?"

class SalesAnalystInterface:
    def __init__(self):
        self.analyst = IntelligentSalesAnalyst()
        self.setup_interface()

    def setup_interface(self):
        self.upload_button = widgets.Button(description='Charger fichier CSV')
        self.upload_button.on_click(self.upload_clicked)

        self.question_input = widgets.Text(
            placeholder='Posez votre question ici...',
            description='Question:',
            disabled=True,
            layout=widgets.Layout(width='80%')
        )

        self.ask_button = widgets.Button(
            description='Poser la question',
            disabled=True
        )
        self.ask_button.on_click(self.ask_clicked)

        self.output = widgets.Output()

        display(widgets.VBox([
            widgets.HTML("<h3>Assistant d'Analyse des Ventes</h3>"),
            self.upload_button,
            widgets.HBox([self.question_input, self.ask_button]),
            widgets.HTML("<h4>Questions possibles:</h4>"),
            widgets.HTML("""
            <ul>
                <li>meilleur mois ventes</li>
                <li>pire mois ventes</li>
                <li>tendance ventes</li>
                <li>total ventes</li>
                <li>moyenne ventes</li>
                <li>statistiques générales</li>
                <li>comparaison périodes</li>
            </ul>
            """),
            self.output
        ]))

    def upload_clicked(self, b):
        self.output.clear_output()
        with self.output:
            print("Chargement du fichier...")
            try:
                uploaded = files.upload()
                if uploaded:
                    filename = list(uploaded.keys())[0]
                    success = self.analyst.load_data(filename)
                    if success:
                        self.question_input.disabled = False
                        self.ask_button.disabled = False
            except Exception as e:
                print(f"Erreur lors du chargement: {str(e)}")

    def ask_clicked(self, b):
        question = self.question_input.value
        if question:
            with self.output:
                print(f"\nQuestion: {question}")
                answer = self.analyst.analyze_question(question)
                print(f"Réponse: {answer}")

# Lancement de l'interface
interface = SalesAnalystInterface()

Chargement du modèle...
Modèle chargé!


VBox(children=(HTML(value="<h3>Assistant d'Analyse des Ventes</h3>"), Button(description='Charger fichier CSV'…