# Causal Discovery & ML - From Assumptions to Applications

### Loading Libraries

In [6]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# IterTools
from itertools import combinations

# Statistical Models
from scipy import stats
import statsmodels.api as sm

# Networkx
import networkx as nx

# JavaScript Object Notation
import json

# Data Visualization
import graphviz
import seaborn as sns
import matplotlib.pyplot as plt

# Scikit-Learn
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV
from sklearn.metrics import mean_absolute_percentage_error

# Castle
import castle
from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation 

from castle.algorithms import PC, GES
from castle.algorithms import ANMNonlinear, ICALiNGAM, DirectLiNGAM
from castle.algorithms import Notears, NotearsNonlinear, GOLEM

from castle.common.priori_knowledge import PrioriKnowledge

from castle.common.independence_tests import hsic_test

# PyTorch & Transformers
import torch

# DoWhy Causal Libraries
import dowhy
from dowhy import gcm
from dowhy import CausalModel
from dowhy.causal_model import CausalModel

# Notebook Iteration 
from tqdm import tqdm

# Operating Systems & Environments
import os
os.environ['CASTLE_BACKEND'] = 'pytorch'

# Copy
from copy import deepcopy

# Light Gradient Boosting Models
from lightgbm import LGBMRegressor, LGBMClassifier

#### Setting Viz Standards

In [7]:
COLORS = [
    '#00B0F0',
    '#FF0000',
    '#B0F000'
]

#### Setting Seeds

In [8]:
# Set random seed
SEED = 18

np.random.seed(SEED)

## Introduction to gCastle

### Synthetic Data in gCastle

In [10]:
# Generate a scale-free adjacency matrix
adj_matrix = DAG.scale_free(
    n_nodes=10,
    n_edges=17,
    seed=SEED
)

In [None]:
adj_matrix

In [None]:
# Visualize the adjacency matrix
g = nx.DiGraph(adj_matrix)

plt.figure(figsize=(12, 8))
nx.draw(
    G=g,
    node_color=COLORS[0],
    node_size=1200,
    arrowsize=17,
    with_labels=True,
    font_color='white',
    font_size=21,
    pos=nx.circular_layout(g)
)

In [None]:
dataset = IIDSimulation(
    W=adj_matrix, 
    n=10000, 
    method='linear', 
    sem_type='gauss'
)

In [None]:
# Access the generated data
dataset.X

### Fitting a First Causal Discovery Model

In [None]:
# Instantiate the model
pc = PC()

In [None]:
# Fit the model
pc.learn(dataset.X)

In [None]:
pred_dag = pc.causal_matrix
pred_dag

#### Visualizing The Model

In [None]:
g_pred = nx.DiGraph(pred_dag)

plt.figure(figsize=(12, 8))
nx.draw(
    G=g_pred,
    node_color=COLORS[0],
    node_size=1200,
    arrowsize=17,
    with_labels=True,
    font_color='white',
    font_size=21,
    pos=nx.circular_layout(g)
)

In [None]:
GraphDAG(
    est_dag=pred_dag, 
    true_dag=adj_matrix)

plt.show()

#### Model Evaluation Metrics

In [None]:
metrics = MetricsDAG(
    B_est=pred_dag, 
    B_true=adj_matrix)

In [None]:
metrics.metrics['F1']

In [None]:
metrics.metrics

In [None]:
	
def get_n_undirected(g):
    
    total = 0
    
    for i in range(g.shape[0]):
        for j in range(g.shape[0]):
            if (g[i, j] == 1) and (g[i, j] == g[j, i]):
                total += .5
    
    return total