In [4]:
import pandas as pd

# Charger toutes les tables en DataFrames (modifiez le chemin si vous utilisez des fichiers JSON)
cashflow_df = pd.read_json('CashFlow.json')
expenses_df = pd.read_json('Expenses.json')
revenue_df = pd.read_json('Revenue.json')
investments_df = pd.read_json('Investments.json')
funding_df = pd.read_json('Funding.json')


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [2]:
import pandas as pd
import random
from datetime import timedelta

def augment_expenses(df, num_augments=5):
    augmented_data = []
    for _, row in df.iterrows():
        for _ in range(num_augments):
            # Modifier le montant ±10%
            new_amount = row["amount_expenses"] * random.uniform(0.9, 1.1)

            # Décaler la date de ±7 jours
            new_date = pd.to_datetime(row["date"]) + timedelta(days=random.randint(-7, 7))

            # Ajouter l'entrée augmentée
            augmented_data.append({
                "id": row["id"],
                "date": new_date.strftime("%Y-%m-%d"),
                "amount_expenses": round(new_amount, 2),
                "expense_category": row["expense_category"],
                "department": row["department"],
                "description": row["description"]
            })

    return pd.DataFrame(augmented_data)

def augment_revenue(df, num_augments=5):
    augmented_data = []
    for _, row in df.iterrows():
        for _ in range(num_augments):
            # Modifier le montant ±15%
            new_amount = row["amount_revenue"] * random.uniform(0.85, 1.15)

            # Décaler la date de ±10 jours
            new_date = pd.to_datetime(row["date"]) + timedelta(days=random.randint(-10, 10))

            # Ajouter l'entrée augmentée
            augmented_data.append({
                "id": row["id"],
                "date": new_date.strftime("%Y-%m-%d"),
                "amount_revenue": round(new_amount, 2),
                "product_line": row["product_line"],
                "customer_type": row["customer_type"],
                "description": row["description"]
            })
    return pd.DataFrame(augmented_data)
def augment_investments(df, num_augments=5):
    augmented_data = []
    for _, row in df.iterrows():
        for _ in range(num_augments):
            # Modifier le montant d'investissement et la valeur actuelle ±10%
            new_investment_amount = row["investment_amount"] * random.uniform(0.9, 1.1)
            new_value = row["current_value"] * random.uniform(0.9, 1.1)

            # Décaler la date de ±15 jours
            new_date = pd.to_datetime(row["date"]) + timedelta(days=random.randint(-15, 15))

            # Ajouter l'entrée augmentée
            augmented_data.append({
                "id": row["id"],
                "investment_type": row["investment_type"],
                "investment_amount": round(new_investment_amount, 2),
                "date": new_date.strftime("%Y-%m-%d"),
                "returns": row["returns"],
                "risk_level": row["risk_level"],
                "current_value": round(new_value, 2),
                "description": row["description"]
            })

    return pd.DataFrame(augmented_data)

def augment_funding(df, num_augments=5):
    augmented_data = []
    for _, row in df.iterrows():
        for _ in range(num_augments):
            # Modifier le montant levé et l'évaluation ±10%
            new_amount_raised = row["amount_raised"] * random.uniform(0.9, 1.1)
            new_valuation = row["valuation"] * random.uniform(0.9, 1.1)

            # Décaler la date de ±20 jours
            new_date = pd.to_datetime(row["date"]) + timedelta(days=random.randint(-20, 20))

            # Ajouter l'entrée augmentée
            augmented_data.append({
                "id": row["id"],
                "funding_round": row["funding_round"],
                "amount_raised": round(new_amount_raised, 2),
                "date": new_date.strftime("%Y-%m-%d"),
                "investor_name": row["investor_name"],
                "valuation": round(new_valuation, 2),
                "description": row["description"]
            })

    return pd.DataFrame(augmented_data)


In [3]:
import gc 
gc.collect()

0

In [4]:
expenses_aug = augment_expenses(expenses_df,500)
revenue_aug = augment_revenue(revenue_df,500)
investments_aug = augment_investments(investments_df,500)
funding_aug = augment_funding(funding_df,500)

In [5]:
# Conversion explicite des colonnes 'date' au format datetime64[ns]
cashflow_df['date'] = pd.to_datetime(cashflow_df['date'])  
cashflow_df['date'] = cashflow_df['date'].dt.strftime('%Y-%m-%d')

revenue_aug['date'] = pd.to_datetime(revenue_aug['date'])
revenue_aug['date'] = revenue_aug['date'].dt.strftime('%Y-%m-%d')

expenses_aug['date'] = pd.to_datetime(expenses_aug['date'])
expenses_aug['date'] = expenses_aug['date'].dt.strftime('%Y-%m-%d')

investments_aug['date'] = pd.to_datetime(investments_aug['date'])
investments_aug['date'] = investments_aug['date'].dt.strftime('%Y-%m-%d')

funding_aug['date'] = pd.to_datetime(funding_aug['date'], errors='coerce')
funding_aug['date'] = funding_aug['date'].dt.strftime('%Y-%m-%dz')


# Merge par date pour combiner cashflow, revenue, expenses et autres
df = pd.merge(cashflow_df, revenue_aug[['date', 'amount_revenue']], on='date', how='left', suffixes=('', '_revenue'))
df = pd.merge(df, expenses_aug[['date', 'amount_expenses']], on='date', how='left', suffixes=('', '_expenses'))
df = pd.merge(df, investments_aug[['date', 'investment_amount']], on='date', how='left')
df = pd.merge(df, funding_aug[['date', 'amount_raised']], on='date', how='left')

# Remplacer NaN par 0 dans les colonnes numériques
df.fillna(0, inplace=True)

# Affichage des données consolidées
print("done")


MemoryError: Unable to allocate 2.44 GiB for an array with shape (1, 327689378) and data type float64

In [8]:
# Caractéristiques et cibles
X = df[['amount_revenue', 'amount_expenses', 'investment_amount', 'amount_raised']]
y_inflow = df['cash_inflow']
y_outflow = df['cash_outflow']
y_netflow = df['net_cash_flow']
print("done")

done


In [9]:
from sklearn.model_selection import train_test_split

# Diviser en ensembles d'entraînement et de test
X_train, X_test, y_inflow_train, y_inflow_test = train_test_split(X, y_inflow, test_size=0.2, random_state=42)
_, _, y_outflow_train, y_outflow_test = train_test_split(X, y_outflow, test_size=0.2, random_state=42)
_, _, y_netflow_train, y_netflow_test = train_test_split(X, y_netflow, test_size=0.2, random_state=42)

print("done")


done


In [10]:
from sklearn.linear_model import LinearRegression

# Initialiser les modèles
model_inflow = LinearRegression()
model_outflow = LinearRegression()
model_netflow = LinearRegression()

# Entraîner les modèles
model_inflow.fit(X_train, y_inflow_train)
model_outflow.fit(X_train, y_outflow_train)
model_netflow.fit(X_train, y_netflow_train)

print("done")

done


In [11]:
from sklearn.metrics import mean_squared_error, r2_score

# Prédictions
y_inflow_pred = model_inflow.predict(X_test)
y_outflow_pred = model_outflow.predict(X_test)
y_netflow_pred = model_netflow.predict(X_test)

# Calculer les MSE et R² Score
print(f"Inflow - MSE: {mean_squared_error(y_inflow_test, y_inflow_pred):.2f}, R²: {r2_score(y_inflow_test, y_inflow_pred):.2f}")
print(f"Outflow - MSE: {mean_squared_error(y_outflow_test, y_outflow_pred):.2f}, R²: {r2_score(y_outflow_test, y_outflow_pred):.2f}")
print(f"Net Flow - MSE: {mean_squared_error(y_netflow_test, y_netflow_pred):.2f}, R²: {r2_score(y_netflow_test, y_netflow_pred):.2f}")


Inflow - MSE: 111404349.05, R²: 0.15
Outflow - MSE: 13305578.09, R²: 0.14
Net Flow - MSE: 114756846.07, R²: 0.08


In [13]:
# Exemple de nouvelles données
new_data = pd.DataFrame({
    'amount_revenue': [100000],
    'amount_expenses': [40000],
    'investment_amount': [5000],
    'amount_raised': [20000]
})

# Prédire les flux de trésorerie
predicted_inflow = model_inflow.predict(new_data)
predicted_outflow = model_outflow.predict(new_data)
predicted_netflow = model_netflow.predict(new_data)

print(f"Predicted Cash Inflow: {predicted_inflow[0]:.2f}")
print(f"Predicted Cash Outflow: {predicted_outflow[0]:.2f}")
print(f"Predicted Net Cash Flow: {predicted_netflow[0]:.2f}")


Predicted Cash Inflow: 58030.31
Predicted Cash Outflow: 16344.37
Predicted Net Cash Flow: 41685.94
