In [None]:
import numpy as np

class CausalBandit:
    def __init__(self, true_parameters):
        self.true_parameters = true_parameters

    def pull_arm(self, arm):
        return np.random.binomial(1, self.true_parameters[arm])

class Agent:
    def __init__(self, num_arms):
        self.num_arms = num_arms
        self.total_rewards = [0] * num_arms
        self.total_pulls = [0] * num_arms

    def choose_arm(self):
        pass

    def update(self, arm, reward):
        self.total_rewards[arm] += reward
        self.total_pulls[arm] += 1

class EpsilonGreedyAgent(Agent):
    def __init__(self, num_arms, epsilon):
        super().__init__(num_arms)
        self.epsilon = epsilon

    def choose_arm(self):
        if np.random.random() < self.epsilon:
            return np.random.choice(self.num_arms)
        else:
            return np.argmax([r / max(1, p) for r, p in zip(self.total_rewards, self.total_pulls)])

class UCB1Agent(Agent):
    def choose_arm(self):
        ucb_values = [r / max(1, p) + np.sqrt(2 * np.log(sum(self.total_pulls)) / max(1, p)) for r, p in zip(self.total_rewards, self.total_pulls)]
        return np.argmax(ucb_values)

class ThompsonSamplingAgent(Agent):
    def choose_arm(self):
        sampled_parameters = [np.random.beta(r + 1, p - r + 1) for r, p in zip(self.total_rewards, self.total_pulls)]
        return np.argmax(sampled_parameters)

def grid_search(agents, true_parameters_grid, num_trials):
    regrets = {agent.__class__.__name__: [] for agent in agents}
    for true_parameters in true_parameters_grid:
        for agent in agents:
            bandit = CausalBandit(true_parameters)
            for _ in range(num_trials):
                chosen_arm = agent.choose_arm()
                reward = bandit.pull_arm(chosen_arm)
                agent.update(chosen_arm, reward)
                regret = max(true_parameters) - true_parameters[chosen_arm]
                regrets[agent.__class__.__name__].append(regret)
    return regrets

# Define parameters
num_arms = 4
true_parameters_grid = [[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.4, 0.5]]  # Example grid of Bernoulli parameters
num_trials = 1000
epsilon_values = [0.1, 0.2, 0.3]  # Example epsilon values for epsilon-greedy agent

# Initialize agents
agents = [EpsilonGreedyAgent(num_arms, epsilon) for epsilon in epsilon_values] + [UCB1Agent(num_arms), ThompsonSamplingAgent(num_arms)]

# Perform grid search
regrets = grid_search(agents, true_parameters_grid, num_trials)

# Calculate cumulative regrets
cumulative_regrets = {agent: sum(regrets[agent]) for agent in regrets}

print("Cumulative Regrets:")
for agent, regret in cumulative_regrets.items():
    print(f"{agent}: {regret}")


In [None]:
import numpy as np
import networkx as nx
import random
import matplotlib.pyplot as plt

# Define the environment
class Environment:
    def __init__(self, num_vars, causal_graph):
        self.num_vars = num_vars
        self.causal_graph = causal_graph
        self.state = np.random.normal(0, 0.1, num_vars)

    def step(self, action):
        # Apply the action (intervention) to the corresponding variable
        self.state[action] = np.random.normal()

        # Update the state based on the causal graph
        for node in nx.topological_sort(self.causal_graph):
            parents = list(self.causal_graph.predecessors(node))
            if parents:
                self.state[node] = sum(self.state[parent] for parent in parents) + np.random.normal(0, 0.1)

        # Compute the reward (dummy implementation)
        reward = np.abs(np.sum(self.state))

        return self.state.copy(), reward

# Define the agent
class CausalAgent:
    def __init__(self, num_vars, causal_graph):
        self.num_vars = num_vars
        self.causal_graph = causal_graph
        self.causal_model = nx.DiGraph()
        self.intervention_count = np.zeros((num_vars, num_vars))
        self.causal_model_history = []  # Store causal model updates
        self.max_history_size = 10  # Maximum number of stored causal models

    def store_causal_model(self):
      # Append the current causal model to the history list
        self.causal_model_history.append(self.causal_model.copy())
        
        # Trim the history list if it exceeds the maximum size
        if len(self.causal_model_history) > self.max_history_size:
            self.causal_model_history = self.causal_model_history[-self.max_history_size:]


    def plot_causal_model_updates(self):
        plt.figure(figsize=(10, 6))
        for i, causal_model in enumerate(self.causal_model_history):
            plt.subplot(len(self.causal_model_history), 1, i+1)
            nx.draw(causal_model, with_labels=True)
            plt.title(f'Episode {i+1} - Causal Model')
        plt.tight_layout()
        plt.show()

    def update_causal_model(self, state, action, next_state):
        
        # Simplified causal discovery rules
        self.causal_model.add_nodes_from(range(self.num_vars))

        # Find the variable that changed between the current state and next state
        changed_var = np.where(next_state != state)[0]

        if len(changed_var) > 0:
            changed_var = changed_var[0]  # Select the first changed variable
            # Association rule
            self.causal_model.add_edge(action, changed_var)

            # Causation rule
            if not self.causal_model.has_edge(changed_var, action):
                self.causal_model.add_edge(changed_var, action)

        self.intervention_count[action] += 1


    def select_action(self, state):
        # Epsilon-greedy action selection
        epsilon = 0.1
        if random.random() < epsilon:
            return random.randint(0, self.num_vars - 1)

        # Add missing nodes to the causal graph
        for i in range(self.num_vars):
            if i not in self.causal_model.nodes:
                self.causal_model.add_node(i)

        # Restrict action set to actionable variables that are ancestors of the reward
        actionable_vars = [var for var in range(self.num_vars) if nx.has_path(self.causal_model, var, self.num_vars - 1)]

        # Select the action based on intrinsic reward
        intrinsic_rewards = [self.compute_intrinsic_reward(var) for var in actionable_vars]
        
        return actionable_vars[np.argmax(intrinsic_rewards)]

    def compute_number_undetermined_relation_in_history(self, var, neighbor):

        return
    
    def compute_number_directed_relation_in_history(self, var, neighbor):
        return

    def compute_intrinsic_reward(self, var):
        # Compute intrinsic reward based on the causal model's stability
        neighbors = list(self.causal_model.neighbors(var))
        intrinsic_reward = 0
        for neighbor in neighbors:
            rel_count = self.intervention_count[var, neighbor]
            intrinsic_reward += np.arctan(rel_count) / (rel_count + 1e-6)
        return intrinsic_reward 

# Simulate the agent
num_vars = 5
causal_graph = nx.DiGraph()
causal_graph.add_nodes_from(range(num_vars))

# Define the causal graph (dummy example)
for i in range(num_vars - 1):
    causal_graph.add_edge(i, i + 1)

env = Environment(num_vars, causal_graph)
agent = CausalAgent(num_vars, causal_graph)

for episode in range(500):
    state = env.state.copy()
    for t in range(100):
        action = agent.select_action(state)
        next_state, reward = env.step(action)
        agent.update_causal_model(state, action, next_state)
        agent.store_causal_model()
        state = next_state

# Print the learned causal model
print("Learned Causal Model:")
print(agent.causal_model.edges())
print(agent.causal_model_history)
agent.plot_causal_model_updates()

In [None]:
## Define a function to plot the causal model
#def plot_causal_model(causal_model, label):
#    plt.figure(figsize=(8, 6))  # Adjust the figure size as needed
#    pos = nx.spring_layout(causal_model)  # Layout algorithm for node positioning
#
#    # Draw the causal model with adjusted parameters
#    nx.draw(causal_model, pos, with_labels=True, node_size=1000, font_size=10, node_color='skyblue', edge_color='gray', width=1.5, alpha=0.8)
#    plt.title('Causal Model')
#    plt.legend([label], loc='upper left')  # Add the episode label to the legend
#    plt.show()
#
## Plot the stored causal models
#plt.figure(figsize=(10, 6))
#for i, causal_model in enumerate(agent.causal_model_history):
#    plot_causal_model(causal_model, label=f'Episode {i+1}')
#plt.xlabel('Nodes')
#plt.ylabel('Edges')
#plt.title('Evolution of Causal Model over Episodes')
#plt.show()

In [None]:
import matplotlib.pyplot as plt

# Initialize lists to store episode rewards
episode_rewards = []

# Simulate the agent and collect rewards
for episode in range(500):
    state = env.state.copy()
    total_reward = 0
    for t in range(100):
        action = agent.select_action(state)
        next_state, reward = env.step(action)
        agent.update_causal_model(state, action, next_state)
        state = next_state
        total_reward += reward
    episode_rewards.append(total_reward)

# Plot the cumulative reward over episodes
plt.plot(range(1, len(episode_rewards) + 1), episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Cumulative Reward')
plt.title('Cumulative Reward over Episodes')
plt.show()


In [None]:
#import matplotlib.pyplot as plt
#
#N = 100
## Initialize a dictionary to store action frequencies
#action_freq = {action: [0] * N for action in range(num_vars)}
#
## Simulate the agent and collect action frequencies
#for episode in range(500):
#    state = env.state.copy()
#    reward = 0
#    for t in range(N):
#        action = agent.select_action(state)
#        next_state, _ = env.step(action)
#        agent.update_causal_model(state, action, next_state)
#        state = next_state
#        action_freq[action] += 1  # Increment action frequency
#
## Calculate action selection frequencies over time and their standard deviations
#action_freq_over_time = {action: [sum(action_freq[action][:t]) / (episode + 1) for t in range(1, N + 1)] for action in range(num_vars)}
#action_std_over_time = {action: [np.std(action_freq[action][:t]) for t in range(1, N + 1)] for action in range(num_vars)}
#
#print(action_freq)
## Plot action selection frequencies over time with error bars
#plt.figure(figsize=(10, 6))
#for action in range(num_vars):
#    plt.errorbar(range(1, N + 1), action_freq_over_time[action], label=f'Action {action}')
#plt.xlabel('Time step')
#plt.ylabel('Action selection frequency')
#plt.title('Action Selection Frequency over Time with Error Bars')
##plt.legend()
#plt.show()
#

In [None]:
## Calculate action selection frequencies over time
#action_freq_over_time = {action: [sum(action_freq[action][:t]) for t in range(1, len(action_freq[action]) + 1)] for action in range(num_vars)}
#
## Calculate entropy of the action selection distribution over time
#entropy_over_time = []
#for t in range(len(action_freq_over_time[0])):
#    if t == 0:
#        entropy_over_time.append(0)  # Skip calculation for the first time step
#    else:
#        action_probs = [sum(action_freq[action][:t]) / t for action in range(num_vars)]
#        entropy = -np.sum([p * np.log(p) for p in action_probs if p > 0])
#        entropy_over_time.append(entropy)
#
## Plot entropy of action selection distribution over time
#plt.figure(figsize=(10, 6))
#plt.plot(range(1, len(entropy_over_time) + 1), entropy_over_time, label='Entropy')
#plt.xlabel('Time step')
#plt.ylabel('Entropy')
#plt.title('Entropy of Action Selection Distribution over Time')
#plt.legend()
#plt.show()
#

In [None]:
import numpy as np
import matplotlib.pyplot as plt

rel_counts = np.arange(1, 101)
intrinsic_rewards = np.arctan(rel_counts) / (rel_counts + 1e-6)

plt.plot(rel_counts, intrinsic_rewards)
plt.xlabel('Relative Count')
plt.ylabel('Intrinsic Reward')
plt.title('Plot of Intrinsic Reward vs Relative Count')
plt.grid(True)
plt.show()


Allons un peu plus loin

In [None]:
import numpy as np
import networkx as nx

# Define the number of variables and time steps
num_vars = 5
T = 10

# Generate a random causal graph
G = nx.DiGraph()
for i in range(num_vars):
    G.add_node(i)
for i in range(num_vars):
    for j in range(i+1, num_vars):
        if np.random.rand() < 0.3:
            G.add_edge(i, j)

# Define the structural causal model
U = np.random.randn(num_vars)
F = {i: lambda parents: sum(U[j] for j in parents) + np.random.randn() for i in range(num_vars)}
P = {i: [j for j in G.predecessors(i)] for i in range(num_vars)}

# Define the reward condition
reward_vars = [0, 2, 4]  # Subset of variables to consider for reward
reward_range = (-0.5, 0.5)  # Arbitrary range for reward

# Define the agent's action space
A = [set() for _ in range(num_vars)]
for i in range(num_vars):
    for j in range(num_vars):
        if np.random.rand() < 0.2:
            A[i].add(j)

# Simulate the reinforcement learning environment
def simulate_episode():
    state = [F[i](P[i]) for i in range(num_vars)]
    cumulative_reward = 0
    for t in range(T):
        action = np.random.choice(len(A))
        manipulated_vars = A[action]
        for var in manipulated_vars:
            state[var] = np.random.randn()
        if all(reward_range[0] <= state[var] <= reward_range[1] for var in reward_vars):
            cumulative_reward += 1
    return cumulative_reward

# Run multiple episodes and print the average cumulative reward
num_episodes = 1000
total_reward = 0
for _ in range(num_episodes):
    total_reward += simulate_episode()
print(f"Average cumulative reward: {total_reward / num_episodes}")

In [None]:
import numpy as np
import networkx as nx
from collections import defaultdict

# Define the number of variables and time steps
num_vars = 5
T = 10

# Generate a random causal graph
G = nx.DiGraph()
for i in range(num_vars):
    G.add_node(i)
for i in range(num_vars):
    for j in range(i+1, num_vars):
        if np.random.rand() < 0.3:
            G.add_edge(i, j)

# Define the structural causal model
U = np.random.randn(num_vars)
F = {i: lambda parents: sum(U[j] for j in parents) + np.random.randn() for i in range(num_vars)}
P = {i: [j for j in G.predecessors(i)] for i in range(num_vars)}

# Define the reward condition
reward_vars = [0, 2, 4]  # Subset of variables to consider for reward
reward_range = (-0.5, 0.5)  # Arbitrary range for reward

# Define the agent's action space
A = [set() for _ in range(num_vars)]
for i in range(num_vars):
    for j in range(num_vars):
        if np.random.rand() < 0.2:
            A[i].add(j)

# Q-learning parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate

# Q-table
Q = defaultdict(lambda: np.zeros(len(A)))

# Simulate the reinforcement learning environment
def simulate_episode(epsilon):
    state = [F[i](P[i]) for i in range(num_vars)]
    cumulative_reward = 0
    for t in range(T):
        if np.random.rand() < epsilon:
            action = np.random.choice(len(A))  # Explore
        else:
            action = np.argmax(Q[tuple(state)])  # Exploit
        next_state = state.copy()
        manipulated_vars = A[action]
        for var in manipulated_vars:
            next_state[var] = np.random.randn()
        reward = 1 if all(reward_range[0] <= next_state[var] <= reward_range[1] for var in reward_vars) else 0
        Q[tuple(state)][action] += alpha * (reward + gamma * np.max(Q[tuple(next_state)]) - Q[tuple(state)][action])
        state = next_state
        cumulative_reward += reward
    return cumulative_reward

# Run multiple episodes and train the agent
num_episodes = 10000
total_reward = 0
for episode in range(num_episodes):
    total_reward += simulate_episode(epsilon)
    if episode % 1000 == 0:
        print(f"Episode {episode}: Average cumulative reward: {total_reward / (episode + 1)}")

# Test the trained agent
num_test_episodes = 100
test_reward = 0
for _ in range(num_test_episodes):
    test_reward += simulate_episode(0)  # Set epsilon to 0 for exploitation
print(f"Average test cumulative reward: {test_reward / num_test_episodes}")

In [None]:
import numpy as np
import networkx as nx
from collections import defaultdict
import pandas as pd
from cdt import data
from cdt.causality.pairwise import ANM
from cdt.causality.graph import LiNGAM
from tqdm import tqdm

# Define the number of variables and time steps
num_vars = 5
T = 10

# Generate a random causal graph
G = nx.DiGraph()
for i in range(num_vars):
    G.add_node(i)
for i in range(num_vars):
    for j in range(i+1, num_vars):
        if np.random.rand() < 0.3:
            G.add_edge(i, j)
# Define the structural causal model
U = np.random.randn(num_vars)
F = {i: lambda parents: sum(U[j] for j in parents) + np.random.randn() for i in range(num_vars)}
P = {i: [j for j in G.predecessors(i)] for i in range(num_vars)}

# Define the reward condition
reward_vars = [0, 2, 4]  # Subset of variables to consider for reward
reward_range = (-0.5, 0.5)  # Arbitrary range for reward

# Define the agent's action space
A = [set() for _ in range(num_vars)]
for i in range(num_vars):
    for j in range(num_vars):
        if np.random.rand() < 0.2:
            A[i].add(j)

# Q-learning parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate

# Q-table
Q = defaultdict(lambda: np.zeros(len(A)))

# Causal discovery
causal_data = []
lingam = None

# Simulate the reinforcement learning environment
def simulate_episode(epsilon):
    global lingam
    global causal_data

    state = [F[i](P[i]) for i in range(num_vars)]
    cumulative_reward = 0
    episode_data = []
    for t in range(T):
        if np.random.rand() < epsilon:
            action = np.random.choice(len(A))  # Explore
        else:
            action = np.argmax(Q[tuple(state)])  # Exploit
        next_state = state.copy()
        manipulated_vars = A[action]
        for var in manipulated_vars:
            next_state[var] = np.random.randn()
        reward = 1 if all(reward_range[0] <= next_state[var] <= reward_range[1] for var in reward_vars) else 0
        Q[tuple(state)][action] += alpha * (reward + gamma * np.max(Q[tuple(next_state)]) - Q[tuple(state)][action])
        episode_data.append(dict(zip([f"X{i}" for i in range(num_vars)], state + next_state)))  # Store causal data
        state = next_state
        cumulative_reward += reward

    causal_data.append(pd.DataFrame(episode_data))  # Store episode data as DataFrame

    # Perform causal discovery regularly
    if len(causal_data) >= 100:
        causal_df = pd.concat(causal_data, ignore_index=True)
        print(causal_df)
        #anm = ANM(data.CausalData(causal_df))
        lingam = LiNGAM(causal_df)
        learned_graph = lingam.estimate()
        print("Learned graph:")
        print(nx.to_agraph(learned_graph))
        causal_data = []  # Reset causal data

    return cumulative_reward
# Run multiple episodes and train the agent
num_episodes = 1000
total_reward = 0
for episode in (range(num_episodes)):
    total_reward += simulate_episode(epsilon)
    if episode % 100 == 0:
        print(f"Episode {episode}: Average cumulative reward: {total_reward / (episode + 1)}")

# Compare the learned graph with the true graph
print("True graph:")
print(nx.to_agraph(G))
if lingam:
    print("Final learned graph:")
    print(nx.to_agraph(lingam.estimate()))

# Test the trained agent
num_test_episodes = 100
test_reward = 0
for _ in tqdm(range(num_test_episodes)):
    test_reward += simulate_episode(0.1)  # Set epsilon to 0 for exploitation
print(f"Average test cumulative reward: {test_reward / num_test_episodes}")

In [None]:
import cdt
print(cdt.SETTINGS.rpath)

In [None]:
#cdt.SETTINGS.rpath = "/Applications/R.app/Contents/MacOS/R"
cdt.SETTINGS.rpath = "/usr/local/bin/R"

In [None]:

from typing import Callable, Union, Tuple, Dict, List, Optional
class StructuralCausalModel: 
    """
    Structural Causal Model for Causal Bandit implementation.
    """

    def __init__(
        self,
        variables: List[str], 
        structural_equations: Dict[str, Tuple[Callable[[Tuple[int, Union[int, None]]], int], Dict[str, Union[int, None]]]]
             
    ):
        """
        Instantiate StructuralCausalModel class.

        Parameters
        ----------

        variables: list[variable:str]
            List containing the name of the variables. 

        structural_equations: dict[variable: (func, list[variable:str])]
            A dictionary containing the structural relations between variables
            and their values. 

            
        """
        self.variables = variables # list of variables
        self.values = { var: None for var in variables } # list of values taken by each variable
        self.structural_equations = structural_equations # functions for each variable
        self.causal_graph = self.build_causal_graph(variables, structural_equations) # a causal graph of the SCM

    def build_causal_graph(
        self, 
        variables: List[str],
        structural_equations: Dict[str, Tuple[Callable[[Tuple[int, Union[int, None]]], int], Dict[str, Union[int, None]]]]
        ) -> nx.DiGraph: 

        """
        Build a causal graph from variables list and structural equations. 

        Parameters
        ----------
        variables: list[variable]
        structural_equations: dict[variable, eqution]

        Returns
        ----------
        a DiGraph

        """

        output_graph = nx.DiGraph()
        nodes = variables
        edges = []
        
        for variable, equation in structural_equations.items(): 
            children_node = variable
            (function, vars) = equation
            
            for var, value in vars.items():
                parent_node = var
                edges.append((parent_node, children_node))

        output_graph.add_nodes_from(nodes)
        output_graph.add_edges_from(edges)

        return output_graph

    def graph(self):
        """ Draw the internal causal graph
        """
        nx.draw(self.causal_graph)

    def get_sample(self, set_values: Optional[Union[Dict[str, int],None]] = None) -> dict[str, Union[int, None]]:
        """
        Sample from SCM (could be manipulated through set_values).

        Parameters
        ----------
        set_values: dict[variable, int]
            The values fixed by intervention on variables.

        Returns
        ----------
        output_assignements : dict[variable, int] 

        """


        output_assignments = {var : None for var in self.variables}

        if set_values is not None: 
            
            # Assign values to manipulated variables
            for variable, value in set_values.items():
                output_assignments[variable] = value
                self.values[variable] = value
            
            # Assign values inside the structural_equations for variables which parents are manipulated variables
            for node in self.causal_graph.nodes:
                
                structural_function, parents = self.structural_equations[node]
                for parent in parents.keys():
                    if parent in set_values.keys():
                        parents[parent] = set_values[parent]

            # Assign values to remaining variables for output
            for node in nx.topological_sort(self.causal_graph):
                if node in set_values.keys():
                    pass
                else:
                    structural_function, parents = self.structural_equations[node]
                    output_assignments[node] = structural_function(parents)  
                    # when a value is assigned, make sure that it is also assigned in the structural_equations 
                    for node2 in self.causal_graph.nodes:
                        structural_function, parents = self.structural_equations[node2]
                        if node in parents.keys():
                            parents[node] = output_assignments[node]

        else:
            for node in nx.topological_sort(self.causal_graph):
                    structural_function, parents = self.structural_equations[node]
                    output_assignments[node] = structural_function(parents)  
                    # when a value is assigned, make sure that it is also assigned in the structural_equations 
                    for node2 in self.causal_graph.nodes:
                        structural_function, parents = self.structural_equations[node2]
                        if node in parents.keys():
                            parents[node] = output_assignments[node]


        return output_assignments  


In [None]:
import networkx as nx

def directed_edit_distance(G1, G2):
    # Calculer les différences entre G1 et G2
    G1_edges = set(G1.edges())
    G2_edges = set(G2.edges())
    
    # Trouver les arêtes uniquement présentes dans G1 ou G2
    edges_only_in_G1 = G1_edges - G2_edges
    edges_only_in_G2 = G2_edges - G1_edges
    
    # Le coût d'édition peut être simplifié en tant que nombre d'arêtes à ajouter ou à supprimer
    edit_cost = len(edges_only_in_G1) + len(edges_only_in_G2)
    
    return edit_cost

# Créer deux graphes orientés pour l'exemple
G1 = nx.DiGraph()
G2 = nx.DiGraph()

# Ajouter des arêtes aux graphes
G1.add_edges_from([(1, 2), (2, 3), (3, 4)])
G2.add_edges_from([(1, 2), (2, 4), (4, 3)])

# Calculer la distance d'édition
edit_distance = directed_edit_distance(G1, G2)
print(f"La distance d'édition dirigée est: {edit_distance}")


In [None]:
import networkx as nx
from networkx.algorithms.flow import maximum_flow

def compare_graph_flows(G1, G2, source, sink):
    # Calculer le flot maximal entre source et sink dans G1
    flow_value_G1, _ = maximum_flow(G1, source, sink)
    
    # Calculer le flot maximal entre source et sink dans G2
    flow_value_G2, _ = maximum_flow(G2, source, sink)
    
    # Comparer les flots maximaux
    flow_difference = abs(flow_value_G1 - flow_value_G2)
    
    return flow_difference

# Exemple de création de deux graphes orientés
G1 = nx.DiGraph()
G2 = nx.DiGraph()

# Ajouter des arêtes et des capacités pour G1
G1.add_edge('a', 'b', capacity=10)
G1.add_edge('b', 'c', capacity=5)

# Ajouter des arêtes et des capacités pour G2
G2.add_edge('a', 'b', capacity=7)
G2.add_edge('b', 'c', capacity=8)

# Comparer les flots maximaux entre 'a' (source) et 'c' (puits)
difference = compare_graph_flows(G1, G2, 'a', 'c')
print(f"La différence de flot maximal entre G1 et G2 est: {difference}")


In [None]:
import numpy as np

def epsilon_greedy_policy(Q, D, state, epsilon, num_actions):
    """
    Epsilon-greedy policy to select an action based on Q-values and causal curiosity bonus.
    """
    if np.random.rand() < epsilon:
        return np.random.randint(num_actions)
    else:
        values = Q[state] + D
        return np.argmax(values)

def update_causal_graph(observation, causal_discovery_algorithm, graph_distance_metric):
    """
    Update the causal graph based on the new observation and the causal discovery algorithm.
    Compute the graph distance between the new and old causal graphs using the specified metric.
    """
    state, action, next_state = observation
    new_causal_graph = causal_discovery_algorithm(state, action, next_state)
    graph_distance = graph_distance_metric(causal_graph, new_causal_graph)
    return new_causal_graph, graph_distance

def q_learning_causal_curiosity(env, num_episodes, alpha, epsilon, gamma, beta, delta):
    """
    Q-learning Causal Curiosity Algorithm
    """
    num_states = env.observation_space.n
    num_actions = env.action_space.n
    
    # Initialize Q-values, causal curiosity bonuses, and causal graph
    Q = np.zeros((num_states, num_actions))
    D = np.zeros(num_actions)
    causal_graph = initialize_causal_graph()
    
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        
        while not done:
            # Select action using epsilon-greedy policy with Q-values and causal curiosity bonus
            action = epsilon_greedy_policy(Q, D, state, epsilon, num_actions)
            
            # Take action, observe next state and reward
            next_state, reward, done, _ = env.step(action)
            
            # Update Q-values
            Q[state, action] += alpha * (reward + gamma * np.max(Q[next_state]) - Q[state, action])
            
            # Update causal graph and compute graph distance
            new_causal_graph, graph_distance = update_causal_graph((state, action, next_state),
                                                                    causal_discovery_algorithm,
                                                                    graph_distance_metric)
            
            # Update causal curiosity bonus
            D[action] += delta * D[action] + beta * graph_distance
            
            # Update state and causal graph
            state = next_state
            causal_graph = new_causal_graph
    
    return Q, D, causal_graph

In [None]:
import numpy as np
import networkx as nx

def sample_from_normal(args):
    return int(np.random.normal())

class CausalEnvironment:
    def __init__(self, num_vars, num_actions, T, density=0.3, seed=None):
        if seed is not None:
            np.random.seed(seed)

        # Generate the Structural Causal Model (SCM)
        self.variables = [f'X{i}' for i in range(num_vars)]
        self.exogenous_vars = [f'U{i}' for i in range(num_vars)]
        self.structural_equations = {}

         # Generate a directed acyclic graph (DAG) with random edges between variables
        self.causal_graph = nx.DiGraph()
        for v in self.variables:
            self.causal_graph.add_node(v)

    
        adj_mat = np.random.choice([0,1], size=(num_vars, num_vars), p=[1-density,density])
        adj_mat = np.tril(adj_mat, -1)
        print(adj_mat)

        for i in range(num_vars):
            for j in range(num_vars):
                if adj_mat[i][j] == 1:
                    self.causal_graph.add_edge(i, j)

        #for i in range(1, num_vars-1):
        #    parent_indices = np.random.choice([j for j in range(i)], size=np.random.randint(0, i), replace=False)
        #    for j in parent_indices:
        #        self.causal_graph.add_edge(f'X{j}', f'X{i}')
        #    self.causal_graph.add_edge(f'U{i}', f'X{i}')

        # Check if the graph is acyclic
        if not nx.is_directed_acyclic_graph(self.causal_graph):
            raise ValueError("The generated causal graph contains cycles.")

        
        # Define structural equations for variables and exogenous variables
        for i, var in enumerate(self.variables):
            parents = [p for p in self.causal_graph.predecessors(var)]
            self.structural_equations[var] = (lambda *args: int(np.random.normal()), {p: None for p in parents})

        for i, u in enumerate(self.exogenous_vars):
            self.structural_equations[u] = (sample_from_normal, {})

        self.scm = StructuralCausalModel(self.variables + self.exogenous_vars, self.structural_equations)

        # Define the transition function based on the SCM and actions
        def transition(state, action):
            set_values = {}
            for var, value in action:
                set_values[var] = value
            new_state = self.scm.get_sample(set_values)
            return new_state

        # Define the reward function (outside the SCM)
        def reward(state):
            R = sum(state.values())  # Example reward function (sum of variable values)
            return R

        self.transition = transition
        self.reward = reward
        self.T = T
        self.num_actions = num_actions

    def reset(self):
        state = self.scm.get_sample()
        return state

    def step(self, action):
        current_state = self.scm.values
        next_state = self.transition(current_state, action)
        reward = self.reward(next_state)
        done = False  # Assuming episodes are not terminating
        info = {}

        return next_state, reward, done, info

In [None]:
num_vars = 10
num_actions = 3
T = 10
seed = 42
env = CausalEnvironment(num_vars, num_actions, T, seed=seed)


In [None]:
print(nx.draw(env.causal_graph))

In [None]:
env.step([('X1',1)])

In [None]:
import numpy as np

class QLearningAgent:
    def __init__(self, env, alpha, gamma, epsilon, epsilon_decay):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.num_actions = len(env.variables)
        self.Q = {}  # Dictionary to store Q-values
        
    def update_Q(self, state, action, reward, next_state):
        state_key = tuple(sorted(state.items()))
        next_state_key = tuple(sorted(next_state.items()))

        if state_key not in self.Q:
            self.Q[state_key] = np.zeros((self.num_actions,))
        if next_state_key not in self.Q:
            self.Q[next_state_key] = np.zeros((self.num_actions,))

        action_index = self.action_to_index(action)

        old_q_value = self.Q[state_key][action_index]
        max_next_q_value = np.max(self.Q[next_state_key])
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * max_next_q_value - old_q_value)

        self.Q[state_key][action_index] = new_q_value

    def choose_action(self, state):
        state_key = tuple(sorted(state.items()))

        if state_key not in self.Q or np.random.rand() < self.epsilon:
            return self.index_to_action(np.random.randint(self.num_actions))  # Explore
        else:
            action_index = np.argmax(self.Q[state_key])
            return self.index_to_action(action_index)  # Exploit

    def action_to_index(self, action):
        return action[1]

    def index_to_action(self, index):
        return ('X' + str(index), index)

    def train(self, num_episodes):
        cumulative_rewards = []
        exploration_behavior = []

        for episode in (range(num_episodes)):
            state = self.env.reset()
            done = False
            episode_reward = 0
            exploration_count = 0
            
            for _ in range(100):
                action = self.choose_action(state)
                if np.random.rand() < self.epsilon:
                    exploration_count += 1

                next_state, reward, done, _ = self.env.step([action])
                self.update_Q(state, action, reward, next_state)

                state = next_state
                episode_reward += reward
                

            cumulative_rewards.append(episode_reward)
            exploration_behavior.append(exploration_count)
            self.epsilon *= self.epsilon_decay

        return cumulative_rewards, exploration_behavior

In [None]:
import matplotlib.pyplot as plt

# Create the environment
env = CausalEnvironment(num_vars=5, num_actions=5, T=10, seed=42)

# Initialize the Q-learning agent
agent = QLearningAgent(env, alpha=0.1, gamma=0.9, epsilon=1.0, epsilon_decay=0.99)

# Train the agent
num_episodes = 1000
cumulative_rewards, exploration_behavior = agent.train(num_episodes)

# Plot cumulative reward
plt.figure(figsize=(10, 6))

plt.plot(range(num_episodes), cumulative_rewards)
plt.xlabel('Episode')
plt.ylabel('Cumulative Reward')
plt.title('Cumulative Reward over Episodes')
plt.show()

# Plot exploration behavior
plt.figure(figsize=(10, 6))
plt.plot(range(num_episodes), exploration_behavior)
plt.xlabel('Episode')
plt.ylabel('Exploration Count')
plt.title('Exploration Behavior over Episodes')
plt.show()

In [None]:
%load_ext cython

In [None]:
#%%cython -a
import numpy as np
import pandas as pd
import cdt
from cdt.metrics import precision_recall, SID, SHD
from tqdm import tqdm
from causallearn.search.ConstraintBased.PC import pc
from causallearn.graph import SHD

def shd_cl_metric(new_graph, graph):

    metric = SHD.SHD(new_graph, graph)

    return metric.get_shd()

def pc_algo(history):
    
    #print('pc hsitory', history)
    array_data = dicts_to_numpy_array(history)

    
    cg = pc(array_data, alpha=0.2, verbose=False)#, alpha=0.05, indep_test="chisq")
    

    return cg.G

class CausalCuriosityQLearningAgent:
    def __init__(self, env, alpha, gamma, epsilon, epsilon_decay, delta, beta):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.delta = delta
        self.beta = beta
        self.num_actions = len(env.variables)
        
        self.Q = {}  # Dictionary to store Q-values
        self.D = np.zeros((self.num_actions,)) # Dictionary to store Distance value
        self.causal_graph = None
        self.history = []
        self.history_distance = []
        
    def update_Q(self, state, action, reward, next_state):
        state_key = tuple(sorted(state.items()))
        next_state_key = tuple(sorted(next_state.items()))

        if state_key not in self.Q:
            self.Q[state_key] = np.zeros((self.num_actions,))
        if next_state_key not in self.Q:
            self.Q[next_state_key] = np.zeros((self.num_actions,))

        action_index = self.action_to_index(action)

        old_q_value = self.Q[state_key][action_index]
        max_next_q_value = np.max(self.Q[next_state_key])
        new_q_value = old_q_value + self.alpha * (reward + self.gamma * max_next_q_value - old_q_value)

        self.Q[state_key][action_index] = new_q_value

    def update_D(self, action, graph_distance):
        
        action_index = self.action_to_index(action)
        if action_index not in self.D:
            self.D[action_index] = 0
        self.D[action_index] = self.delta * self.D[action_index] + self.beta * graph_distance
        
        #max_next_d_value = np.max(self.D)
        #self.D[action_index] = self.D[action_index] + self.delta * (graph_distance + self.beta * max_next_d_value - self.D[action_index])

    def update_causal_graph(self, state, action, causal_discovery_algo=pc_algo, graph_distance_metric=shd_cl_metric):
        
        new_causal_graph = causal_discovery_algo(self.history) 

        if self.causal_graph == None:
            self.causal_graph = new_causal_graph
        
        graph_distance = graph_distance_metric(new_causal_graph, self.causal_graph) # un peu là aussi

        return new_causal_graph, graph_distance


    def choose_action(self, state):
        state_key = tuple(sorted(state.items()))

        if state_key not in self.Q or np.random.rand() < self.epsilon:
            return self.index_to_action(np.random.randint(self.num_actions))  # Explore
        else:
            action_index = np.argmax(self.Q[state_key])
            #action_index = np.argmax(self.Q[state_key] + self.D)
            return self.index_to_action(action_index)  # Exploit

    def action_to_index(self, action):
        
        return action[1]

    def index_to_action(self, index):
        return ('X' + str(index), 1)

 

    def train(self, num_episodes, causal_discovery_algo=pc_algo, graph_distance_metric=shd_cl_metric, flag=False):
        
        cumulative_rewards = []
        exploration_behavior = []

        for episode in tqdm(range(num_episodes)):
            state = self.env.reset()
            done = False
            episode_reward = 0
            exploration_count = 0
            
            for i in (range(100)):
                action = self.choose_action(state)
                if np.random.rand() < self.epsilon:
                    exploration_count += 1

                next_state, reward, done, _ = self.env.step([action])
                self.history.append(next_state)

                int_reward = 0

                if flag:
                    if i > 10 and (i % 50 == 0):     
                        new_causal_graph, graph_distance = self.update_causal_graph(state, action, causal_discovery_algo, graph_distance_metric)
                    
                        self.history_distance.append(graph_distance)
                        #self.update_D(action, graph_distance)
                        self.causal_graph = new_causal_graph
                        #if not (np.argmax(self.Q) == 0): 
                        int_reward = self.beta*graph_distance 
                    else:
                        int_reward = 0


                self.update_Q(state, action, reward + int_reward, next_state)
                state = next_state
                episode_reward += reward 
                

            cumulative_rewards.append(episode_reward)
            exploration_behavior.append(exploration_count)
            self.epsilon *= self.epsilon_decay
            
        return cumulative_rewards, exploration_behavior

    

#def fci_algo(history):
#
#    array_data = dicts_to_numpy_array(history)
#
#    g, _ = fci(array_data)
#
#    return g

def ges(history):

    array_data = dicts_to_numpy_array(history)

    record = ges(array_data)

    return record


def glasso_ges(history):
    
    glasso = cdt.independence.graph.Glasso()
    print('Graph computation started...')
    df_history = pd.DataFrame(history)
    print('Glasso init...')
    print('Glasso predict...')
    skeleton = glasso.predict(df_history)
    new_skeleton = cdt.utils.remove_indirect_links(skeleton, alg='aracne')
    print('GES init...')
    model = cdt.causality.graph.GES()
    print('GES predict...')
    #output_graph = model.predict(df_history, new_skeleton)
    print('Done.')

    return skeleton #output_graph

def shd_metric(new_graph, graph):

    return SHD(new_graph, graph)

def dicts_to_numpy_array(dicts_list):
    """
    Convertit une liste de dictionnaires en un tableau NumPy.
    
    :param dicts_list: Liste de dictionnaires avec les mêmes clés.
    :return: Un tableau NumPy où chaque ligne correspond aux valeurs d'un dictionnaire.
    """
    # Initialisation d'une liste vide pour stocker les tableaux NumPy intermédiaires
    #print('in dicts to numpy')
    
    numpy_arrays_list = []
    
    for data_dict in dicts_list:
        # Extraction des valeurs du dictionnaire courant
        values = list(data_dict.values())
        
        # Conversion des valeurs en un tableau NumPy et ajout à la liste
        numpy_arrays_list.append(np.array(values))
    
    # Concaténation de tous les tableaux NumPy intermédiaires en un seul tableau
    # Utilise vstack pour empiler verticalement si la structure des données le requiert
    combined_array = np.vstack(numpy_arrays_list)
    
    return combined_array





In [None]:
class BaselineAgent(CausalCuriosityQLearningAgent):
    def __init__(self, env, alpha=None, gamma=None, epsilon=1.0, epsilon_decay=0.99, delta=None, beta=None):
        # Initialize the superclass with all the necessary arguments
        super().__init__(env, alpha, gamma, epsilon, epsilon_decay, delta, beta)

    def choose_action(self, state):
        state_key = tuple(sorted(state.items()))

        if state_key not in self.Q or np.random.rand() < self.epsilon:
            # Explore
            return self.index_to_action(np.random.randint(self.num_actions))
        else:

            # action_index = np.argmax(self.Q[state_key])

            action_index = np.random.randint((self.num_actions))
            return self.index_to_action(action_index)  # Exploit

    def train(self, num_episodes, causal_discovery_algo=None, graph_distance_metric=None):

        cumulative_rewards = []
        exploration_behavior = []

        for episode in tqdm(range(num_episodes)):
            state = self.env.reset()
            done = False
            episode_reward = 0
            exploration_count = 0

            for i in (range(100)):
                action = self.choose_action(state)
                if np.random.rand() < self.epsilon:
                    exploration_count += 1

                next_state, reward, done, _ = self.env.step([action])

                state = next_state
                episode_reward += reward

            cumulative_rewards.append(episode_reward)
            exploration_behavior.append(exploration_count)
            self.epsilon *= self.epsilon_decay

        return cumulative_rewards, exploration_behavior

In [None]:
import matplotlib.pyplot as plt

# Create the environment
env = CausalEnvironment(num_vars=5, num_actions=5, T=10, density=0.5, seed=42)

# Initialize the Q-learning agent and causal Q learning agent
agent = CausalCuriosityQLearningAgent(env, alpha=0.1, gamma=0.9, epsilon=1.0, epsilon_decay=0.99, 
                                                delta=0.2, beta = 0.1)

#agent = QLearningAgent(env, alpha=0.1, gamma=0.9, epsilon=1.0, epsilon_decay=0.99)
agent_causal = CausalCuriosityQLearningAgent(env, alpha=0.1, gamma=0.9, epsilon=1.0, epsilon_decay=0.99, 
                                                delta=0.2, beta = 0.1)
baseline = BaselineAgent(env)           

# Train the agent
num_episodes = 1000
cumulative_rewards_causal, exploration_behavior_causal = agent_causal.train(num_episodes, pc_algo, shd_cl_metric, flag=True)
cumulative_rewards, _ = agent.train(num_episodes, flag=False)
cumulative_rewards_random, _ = agent.train(num_episodes)


In [None]:
# Plot cumulative reward
#.figure(figsize=(10, 6))
#.plot(range(num_episodes), cumulative_rewards_causal)
#.xlabel('Episode')
#.ylabel('Cumulative Reward')
#.title('Cumulative Reward over Episodes')
#.show()

# Plot exploration behavior
#plt.figure(figsize=(10, 6))
#plt.plot(range(num_episodes), exploration_behavior_causal)
#plt.xlabel('Episode')
#plt.ylabel('Exploration Count')
#plt.title('Exploration Behavior over Episodes')
#plt.show()

In [None]:
# Plotting cumulative rewards
plt.figure(figsize=(12, 8))
plt.plot(cumulative_rewards, label='Traditional Q-Learning')
plt.plot(cumulative_rewards_causal, label='Causal Curiosity Q-Learning')
plt.xlabel('Episodes')
plt.ylabel('Cumulative Reward')
plt.title('Comparison of Cumulative Rewards')
plt.legend()
plt.show()



In [None]:
from scipy.stats import ttest_ind
# Perform the T-test
t_stat, p_value = ttest_ind(cumulative_rewards, cumulative_rewards_causal)

print(f"T-statistic: {t_stat}, P-value: {p_value}")


In [None]:
#%%cython -a
import numpy as np

# Define the range of values for beta and delta to be tested
beta_values = np.linspace(start=0.1, stop=1.0, num=10)  # Example range for beta
delta_values = np.linspace(start=0.1, stop=1.0, num=10)  # Example range for delta

performance_results = np.zeros((len(beta_values), len(delta_values)))

num_episodes = 100

for i, beta in (enumerate(beta_values)):
    for j, delta in enumerate(delta_values):
        # Initialize the agent with current beta and delta
        agent_causal = CausalCuriosityQLearningAgent(env, alpha=0.1, gamma=0.9, epsilon=1.0, 
                                                     epsilon_decay=0.99, delta=delta, beta=beta)
        
        # Train the agent and get performance metric
        cumulative_reward, _ = agent_causal.train(num_episodes, pc_algo, shd_cl_metric)
        
        # Store the cumulative reward or other performance metric
        performance_results[i, j] = cumulative_reward[-1] #np.mean(cumulative_reward)  # Example: using mean cumulative reward


In [None]:
import matplotlib.pyplot as plt

# Create a meshgrid for plotting
B, D = np.meshgrid(beta_values, delta_values)

# Plot the results
plt.figure(figsize=(10, 8))
cp = plt.contourf(B, D, performance_results, 20, cmap='viridis')  # Adjust colormap as needed
plt.colorbar(cp)
plt.title('Agent Performance Across Beta and Delta Values')
plt.xlabel('Beta (Curiosity Reward Parameter)')
plt.ylabel('Delta (Curiosity Decay Rate)')
plt.show()


In [None]:
#%%cython -a
import numpy as np

# Define the range of values for beta and delta to be tested
beta_values = np.linspace(start=0.0, stop=1.0, num=10)  # Example range for beta

delta_values = np.linspace(start=0.0, stop=1.0, num=10)  # Example range for delta

performance_results = np.zeros((len(beta_values),))# len(delta_values)))

num_episodes = 100

for i, beta in (enumerate(beta_values)):
   
        # Initialize the agent with current beta and delta
    agent_causal = CausalCuriosityQLearningAgent(env, alpha=0.1, gamma=0.9, epsilon=1.0, 
                                                     epsilon_decay=0.99, delta=0.0, beta=beta)
    
    # Train the agent and get performance metric
    cumulative_reward, _ = agent_causal.train(num_episodes, pc_algo, shd_cl_metric, flag=True)
    
    # Store the cumulative reward or other performance metric
    performance_results[i] = np.mean(cumulative_reward)  # Example: using mean cumulative reward


In [None]:
plt.figure(figsize=(10, 8))
plt.title('Agent Performance Across Beta Values')
plt.plot(beta_values, performance_results)

In [None]:
shd_metric(agent_causal.causal_graph, agent_causal.causal_graph)

In [None]:
plt.plot(agent_causal.history_distance)

In [None]:
# Step 3: Use Pandas to create a rolling window object
window_size = 10
rolling_window = pd.Series(agent_causal.history_distance).rolling(window=window_size)

# Step 4: Calculate the rolling average
rolling_average = rolling_window.mean()

# Step 5: Plot the original data along with the rolling average
plt.figure(figsize=(10, 5))
plt.plot(agent_causal.history_distance, label='Original Data', color='blue')
plt.plot(rolling_average, label=f'Rolling Average (Window Size: {window_size})', color='red')
plt.legend()
plt.title('Rolling Average of graph distance between succesive models')
plt.xlabel('Episode')
plt.ylabel('Value')
plt.grid(True)
plt.show()

In [None]:
class BaselineAgent(CausalCuriosityQLearningAgent):
    def __init__(self, env, alpha=None, gamma=None, epsilon=1.0, epsilon_decay=0.99, delta=None, beta=None):
        # Initialize the superclass with all the necessary arguments
        super().__init__(env, alpha, gamma, epsilon, epsilon_decay, delta, beta)

    def choose_action(self, state):
        state_key = tuple(sorted(state.items()))

        if state_key not in self.Q or np.random.rand() < self.epsilon:
            # Explore
            return self.index_to_action(np.random.randint(self.num_actions))
        else:

            # action_index = np.argmax(self.Q[state_key])

            action_index = np.random.randint((self.num_actions))
            return self.index_to_action(action_index)  # Exploit

    def train(self, num_episodes, causal_discovery_algo=None, graph_distance_metric=None):

        cumulative_rewards = []
        exploration_behavior = []

        for episode in tqdm(range(num_episodes)):
            state = self.env.reset()
            done = False
            episode_reward = 0
            exploration_count = 0

            for i in (range(100)):
                action = self.choose_action(state)
                if np.random.rand() < self.epsilon:
                    exploration_count += 1

                next_state, reward, done, _ = self.env.step([action])

                state = next_state
                episode_reward += reward

            cumulative_rewards.append(episode_reward)
            exploration_behavior.append(exploration_count)
            self.epsilon *= self.epsilon_decay

        return cumulative_rewards, exploration_behavior

In [None]:
random_agent = BaselineAgent(env)
cumulative_rewards_random, _ = random_agent.train(num_episodes)

In [None]:
# Plotting cumulative rewards
plt.figure(figsize=(12, 8))
plt.plot(cumulative_rewards_random, label='Baseline')
plt.plot(cumulative_rewards, label='Traditional Q-Learning')
plt.plot(cumulative_rewards_causal, label='Causal Curiosity Q-Learning')
plt.xlabel('Episodes')
plt.ylabel('Cumulative Reward')
plt.title('Comparison of Cumulative Rewards')
plt.legend()
plt.show()

In [None]:
from scipy.stats import ttest_ind
# Perform the T-test
t_stat, p_value = ttest_ind(cumulative_rewards_random, cumulative_rewards_causal)

print(f"T-statistic: {t_stat}, P-value: {p_value}")


In [None]:
import cdt
import pandas as pd
glasso = cdt.independence.graph.Glasso()
data = [
    {'A': 1, 'B': 2, 'C': 3},
    {'A': 4, 'B': 5, 'C': 6},
    {'A': 7, 'B': 8, 'C': 9}
]
df_history = pd.DataFrame(array_of_dicts)
skeleton = glasso.predict(df_history)
print(skeleton)
new_skeleton = cdt.utils.remove_indirect_links(skeleton, alg='aracne')
cdt.SETTINGS.rpath = "/usr/local/bin/R"

#model = cdt.causality.graph.GES()



In [None]:
pip install causal-learn

In [None]:
from causallearn.search.ConstraintBased.PC import pc


In [None]:
import numpy as np
# Extracting keys to define the order of features
features = list(data[0].keys())

# Creating an empty NumPy array to hold the data
num_samples = len(data)
num_features = len(features)
array_data = np.zeros((num_samples, num_features))

# Filling in the array with the data from dictionaries
for i, sample in enumerate(data):
    for j, feature in enumerate(features):
        array_data[i, j] = sample[feature]

print(array_data)

In [None]:
cg = pc(array_data, alpha=0.05, indep_test="chisq")


In [None]:
cg.to_nx_graph()

In [None]:
cg.draw_nx_graph(skel=False)

In [None]:
from causallearn.graph import SHD


In [None]:
b = SHD.SHD(cg.G, cg.G)

In [None]:
b.get_shd()

In [None]:
import numpy as np

def dicts_to_numpy_array(dicts_list):
    """
    Convertit une liste de dictionnaires en un tableau NumPy.
    
    :param dicts_list: Liste de dictionnaires avec les mêmes clés.
    :return: Un tableau NumPy où chaque ligne correspond aux valeurs d'un dictionnaire.
    """
    # Initialisation d'une liste vide pour stocker les tableaux NumPy intermédiaires
    print('in dicts to numpy')
    numpy_arrays_list = []
    
    for data_dict in dicts_list:
        # Extraction des valeurs du dictionnaire courant
        values = list(data_dict.values())
        
        # Conversion des valeurs en un tableau NumPy et ajout à la liste
        numpy_arrays_list.append(np.array(values))
    
    # Concaténation de tous les tableaux NumPy intermédiaires en un seul tableau
    # Utilise vstack pour empiler verticalement si la structure des données le requiert
    combined_array = np.vstack(numpy_arrays_list)
    
    return combined_array

# Exemple d'utilisation
dicts_list = [
    {'X0': 2, 'X1': 1},  # Remplacez ... par le reste des clés et valeurs
    {'X0': 3, 'X1': 0},  # Second dictionnaire, etc.
    # Ajoutez d'autres dictionnaires selon le besoin
]

result_array = dicts_to_numpy_array(dicts_list)
print(result_array)
