# Déploiement de l'Application avec Flask pour des Prédictions Probabilistes

In [None]:
from flask import Flask, request, render_template, jsonify
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

app = Flask(__name__)

# Charger le modèle et les encoders
model = joblib.load('model/best_model.pkl')
label_encoders = joblib.load('model/label_encoders.pkl')

In [None]:
# Colonnes d'intérêt
columns_of_interest = [
    'league', 'season', 'team_name', 'team_country', 'player_name', 'player_age',
    'counter_team_name', 'counter_team_country', 'transfer_fee_amnt', 'market_val_amnt', 'is_free', 'is_retired'
]

# Fonction de comparaison des entrées utilisateur
def compare_user_input(user_input, data, model, label_encoders):
    # Encode the user input
    user_input_encoded = {}
    for column in columns_of_interest:
        if column in label_encoders:
            if user_input.get(column, '') not in label_encoders[column].classes_:
                label_encoders[column].classes_ = np.append(label_encoders[column].classes_, user_input.get(column, ''))
            user_input_encoded[column] = label_encoders[column].transform([user_input.get(column, '')])[0]
        else:
            user_input_encoded[column] = user_input.get(column, 0)

    user_input_array = np.array([user_input_encoded.get(col, 0) for col in columns_of_interest])

    # Create pairs with examples from the dataset
    pairs = []
    for i in range(len(data)):
        data_row = data.iloc[i].values
        pairs.append(np.concatenate((user_input_array, data_row)))

    pairs = np.array(pairs)

    # Predict similarity probabilities
    probabilities = model.predict_proba(pairs)[:, 1]  # Probability of being similar (label 1)

    # Calculate the overall truth percentage
    max_probability = np.max(probabilities)
    truth_percentage = max_probability * 100  # Convert to percentage

    # Identify likely false fields
    likely_false_fields = []
    for column in columns_of_interest:
        modified_input = user_input_array.copy()
        original_value = modified_input[columns_of_interest.index(column)]
        for i in range(len(data)):
            data_row = data.iloc[i].values
            # Change the value of the column to match the dataset and see if probability increases
            modified_input[columns_of_interest.index(column)] = data_row[columns_of_interest.index(column)]
            modified_pairs = [np.concatenate((modified_input, data_row))]
            modified_probability = model.predict_proba(modified_pairs)[:, 1]
            if np.max(modified_probability) > max_probability:
                likely_false_fields.append(column)
                break
            modified_input[columns_of_interest.index(column)] = original_value  # Reset to original value

    return truth_percentage, likely_false_fields

In [None]:
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    user_input = request.json
    # Example reduced data for demonstration, replace with actual data for production
    data = pd.read_csv('transfers.csv').sample(n=1000, random_state=42)
    truth_percentage, likely_false_fields = compare_user_input(user_input, data, model, label_encoders)
    return jsonify({
        'truth_percentage': truth_percentage,
        'likely_false_fields': likely_false_fields
    })

if __name__ == '__main__':
    app.run(debug=True)