# <h1><center>Causal Learning</center></h1>

In [7]:
#importing essential modules
import sys
import os
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
from causalnex.discretiser import Discretiser
from causalnex.structure import DAGRegressor
from causalnex.inference import InferenceEngine
from causalnex.network import BayesianNetwork
from causalnex.network.sklearn import BayesianNetworkClassifier
from causalnex.structure.notears import from_pandas
from causalnex.structure.notears import from_pandas, from_pandas_lasso
from causalnex.discretiser.discretiser_strategy import ( DecisionTreeSupervisedDiscretiserMethod )




In [4]:
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, f1_score, accuracy_score, precision_score

In [5]:
sns.set()
%matplotlib inline
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option("expand_frame_repr", False)
pd.set_option('display.float_format', '{:.2f}'.format)

In [8]:
#importing local modules
sys.path.append(os.path.abspath(os.path.join('../scripts')))
import visualization as vz
from pre_process import Process
from overview import Overview

In [10]:
#creating processing object
process = Process()

## Reading Data 

In [16]:
df = pd.read_csv("../data/data_cleaner.csv")
df.head(5)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.12,0.23,0.28,0.15,0.24,0.08,0.85,0.91,5.98,86.2,0.01,0.05,0.05,0.02,0.03,0.01,25.38,17.33,184.6,1937.05,0.16,0.63,0.71,0.27,0.42,0.12
1,842517,M,20.57,17.77,132.9,1326.0,0.08,0.08,0.09,0.07,0.18,0.06,0.54,0.73,3.4,74.08,0.01,0.01,0.02,0.01,0.01,0.0,24.99,23.41,158.8,1937.05,0.12,0.19,0.24,0.19,0.28,0.09
2,84300903,M,19.69,21.25,130.0,1203.0,0.11,0.16,0.2,0.13,0.21,0.06,0.75,0.79,4.58,86.2,0.01,0.04,0.04,0.02,0.02,0.0,23.57,25.53,152.5,1709.0,0.14,0.42,0.45,0.24,0.36,0.09
3,84348301,M,11.42,20.38,77.58,386.1,0.13,0.23,0.24,0.11,0.25,0.08,0.5,1.16,3.44,27.23,0.01,0.06,0.06,0.02,0.04,0.01,14.91,26.5,98.87,567.7,0.19,0.63,0.69,0.26,0.42,0.12
4,84358402,M,20.29,14.34,135.1,1297.0,0.1,0.13,0.2,0.1,0.18,0.06,0.76,0.78,5.44,86.2,0.01,0.02,0.06,0.02,0.02,0.01,22.54,16.67,152.2,1575.0,0.14,0.2,0.4,0.16,0.24,0.08


In [15]:
df.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')

In [17]:
features = ['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst']

In [19]:
#numerate diagnosis for 
x['diagnosis'] = x['diagnosis'].apply(lambda x: 1 if x == "M" else 0)
x.head(5)

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst
0,1,17.99,10.38,122.8,1001.0,0.12,0.23,0.28,0.15,0.24,0.08,0.85,0.91,5.98,86.2,0.01,0.05,0.05,0.02,0.03,0.01,25.38,17.33,184.6,1937.05,0.16,0.63,0.71,0.27
1,1,20.57,17.77,132.9,1326.0,0.08,0.08,0.09,0.07,0.18,0.06,0.54,0.73,3.4,74.08,0.01,0.01,0.02,0.01,0.01,0.0,24.99,23.41,158.8,1937.05,0.12,0.19,0.24,0.19
2,1,19.69,21.25,130.0,1203.0,0.11,0.16,0.2,0.13,0.21,0.06,0.75,0.79,4.58,86.2,0.01,0.04,0.04,0.02,0.02,0.0,23.57,25.53,152.5,1709.0,0.14,0.42,0.45,0.24
3,1,11.42,20.38,77.58,386.1,0.13,0.23,0.24,0.11,0.25,0.08,0.5,1.16,3.44,27.23,0.01,0.06,0.06,0.02,0.04,0.01,14.91,26.5,98.87,567.7,0.19,0.63,0.69,0.26
4,1,20.29,14.34,135.1,1297.0,0.1,0.13,0.2,0.1,0.18,0.06,0.76,0.78,5.44,86.2,0.01,0.02,0.06,0.02,0.02,0.01,22.54,16.67,152.2,1575.0,0.14,0.2,0.4,0.16
