<a href="https://colab.research.google.com/github/erwannlenoach/notepad/blob/master/Copie_de_profit_split_(4).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
import os
import json


In [None]:
# Step 1: Generate Synthetic Data
def generate_synthetic_data(num_samples=10000, seed=42):
    np.random.seed(seed)

    # Define industries and functions
    industries = ['manufacturing', 'services', 'technology', 'retail']
    functions = ['manufacturing', 'R&D', 'marketing', 'sales']

    # Assign industries and functions to headquarters and subsidiaries
    hq_industry = np.random.choice(industries, num_samples)
    subs_industry = np.random.choice(industries, num_samples)
    hq_function = np.random.choice(functions, num_samples)
    subs_function = np.random.choice(functions, num_samples)

    # Define industry-specific multipliers for revenue, cost, assets, and liabilities
    industry_multipliers = {
        'manufacturing': {'revenue': 1.0, 'cost': 0.7, 'assets': 1.2, 'liabilities': 0.8},
        'services': {'revenue': 0.9, 'cost': 0.6, 'assets': 1.0, 'liabilities': 0.7},
        'technology': {'revenue': 1.2, 'cost': 0.8, 'assets': 1.5, 'liabilities': 0.9},
        'retail': {'revenue': 0.8, 'cost': 0.5, 'assets': 0.8, 'liabilities': 0.6},
    }

    # Generate headquarters data
    hq_revenue = np.random.normal(loc=1000000, scale=200000, size=num_samples) * np.vectorize(lambda x: industry_multipliers[x]['revenue'])(hq_industry)
    hq_cost = hq_revenue * np.random.uniform(0.5, 0.7, num_samples) * np.vectorize(lambda x: industry_multipliers[x]['cost'])(hq_industry)
    hq_profit = hq_revenue - hq_cost
    hq_assets = np.random.normal(loc=500000, scale=100000, size=num_samples) * np.vectorize(lambda x: industry_multipliers[x]['assets'])(hq_industry)
    hq_liabilities = hq_assets * np.random.uniform(0.3, 0.5, num_samples) * np.vectorize(lambda x: industry_multipliers[x]['liabilities'])(hq_industry)

    # Generate subsidiary data
    subs_revenue = hq_revenue * np.random.uniform(0.2, 0.4, num_samples) * np.vectorize(lambda x: industry_multipliers[x]['revenue'])(subs_industry)
    subs_cost = subs_revenue * np.random.uniform(0.6, 0.8, num_samples) * np.vectorize(lambda x: industry_multipliers[x]['cost'])(subs_industry)
    subs_profit = subs_revenue - subs_cost
    subs_assets = hq_assets * np.random.uniform(0.1, 0.3, num_samples) * np.vectorize(lambda x: industry_multipliers[x]['assets'])(subs_industry)
    subs_liabilities = subs_assets * np.random.uniform(0.4, 0.6, num_samples) * np.vectorize(lambda x: industry_multipliers[x]['liabilities'])(subs_industry)

    # Generate intercompany transaction data
    intercompany_sales = subs_revenue * np.random.uniform(0.1, 0.3, num_samples)
    intercompany_purchases = intercompany_sales * np.random.uniform(0.8, 1.2, num_samples)

    # Define additional factors based on OECD guidelines
    hq_functions = np.random.uniform(0.1, 0.5, num_samples)
    subs_functions = np.random.uniform(0.2, 0.6, num_samples)
    hq_risks = np.random.uniform(0.1, 0.3, num_samples)
    subs_risks = np.random.uniform(0.2, 0.4, num_samples)

    # Calculate profit allocation key using a combination of factors
    total_assets = hq_assets + subs_assets
    total_liabilities = hq_liabilities + subs_liabilities
    total_functions = hq_functions + subs_functions
    total_risks = hq_risks + subs_risks

    hq_weighted_factors = (hq_assets / total_assets) * 0.25 + (hq_liabilities / total_liabilities) * 0.15 + (hq_functions / total_functions) * 0.3 + (hq_risks / total_risks) * 0.3
    subs_weighted_factors = (subs_assets / total_assets) * 0.25 + (subs_liabilities / total_liabilities) * 0.15 + (subs_functions / total_functions) * 0.3 + (subs_risks / total_risks) * 0.3

    profit_allocation_key = subs_weighted_factors / (hq_weighted_factors + subs_weighted_factors)

    # Compile the data into a DataFrame
    data = {
        'hq_revenue': hq_revenue,
        'hq_cost': hq_cost,
        'hq_profit': hq_profit,
        'hq_assets': hq_assets,
        'hq_liabilities': hq_liabilities,
        'subs_revenue': subs_revenue,
        'subs_cost': subs_cost,
        'subs_profit': subs_profit,
        'subs_assets': subs_assets,
        'subs_liabilities': subs_liabilities,
        'hq_functions': hq_functions,
        'subs_functions': subs_functions,
        'hq_risks': hq_risks,
        'subs_risks': subs_risks,
        'hq_industry': hq_industry,
        'subs_industry': subs_industry,
        'hq_function': hq_function,
        'subs_function': subs_function,
        'profit_allocation_key': profit_allocation_key,
    }

    df = pd.DataFrame(data)
    return df

# Generate the dataset

In [None]:
# Step 2: Prepare the Data
# One-hot encode the categorical variables
df_encoded = pd.get_dummies(df, columns=['hq_industry', 'subs_industry', 'hq_function', 'subs_function'])

# Separate features and target variable
X = df_encoded.drop(columns=['profit_allocation_key'])
y = df_encoded['profit_allocation_key']

# Split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Step 3: Build the Neural Network
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile the model with a smaller learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=['mean_absolute_error'])


In [None]:
# Step 4: Train the Model with early stopping
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_val_scaled, y_val))

In [None]:
# Step 5: Evaluate the Model
test_loss, test_mae = model.evaluate(X_test_scaled, y_test)
print(f'Test Loss: {test_loss}, Test MAE: {test_mae}')

In [None]:
# Step 6: Predict on Test Data and Plot
y_pred = model.predict(X_test_scaled)

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.3)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Actual vs Predicted Profit Allocation Key')
plt.show()

In [None]:
# Step 7: Create a DataFrame for Predictions vs Actual
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred.flatten()})
print(results_df.head(10))

In [None]:
# save the model

output_dir = "model"
os.makedirs(output_dir, exist_ok=True)
model.save('model/profit_split_model.keras')

In [None]:
!pip install tensorflowjs

In [None]:
!tensorflowjs_converter --input_format=keras /content/model/profit_split_model.h5 content/model/profit_split_tfjs_model

In [None]:
# Sauvegarder les paramètres du scaler
scaler_params = {
    'mean': scaler.mean_.tolist(),
    'std': scaler.scale_.tolist()
}

with open('scaler_params.json', 'w') as f:
    json.dump(scaler_params, f)