In [1]:
## import

import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
from scipy.special import expit as sigmoid
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
# import graphviz
import notears.utils as ut
from notears import nonlinear_concept, nonlinear_old
import igraph as ig
# import lingam
# from lingam.utils import make_prior_knowledge, make_dot
import ray
import pickle as pk
from scipy.special import expit as sigmoid
import time
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import math

## environmental setup

print([np.__version__, pd.__version__])
torch.set_default_dtype(torch.double)
np.set_printoptions(precision=3, suppress=True)

@ray.remote(num_returns=1)
def get_result(df_x, opt):
        
    ## 1
    should_std, val_lambda, w_threshold = opt[0], opt[1], opt[2]    
    np.random.seed(123) 
    ut.set_random_seed(123) 

    ## 2

    # 'budget', 
    # 'w0', 'w1','w2', 'w3','w4', 'w5','w6', 'w7','w8', 'w9','w10', 'w11',   
    # 'd0','d1','d2', 'd3','d4', 'd5','d6', 'd7','d8', 'd9',   
    # 'p0','p1','p2', 'p3','p4', 'p5','p6', 'p7','p8', 'p9','p10', 'p11', 'p12',    
    # 'c0','c1','c2', 'c3','c4', 'c5','c6', 'c7','c8', 'c9','c10', 'c11', 'c12', 'c13',    
    # 'g0','g1','g2', 'g3','g4',
    # 'imdb_user_rating',
    # 'revenue'        
    # concepts = [1, 12, 10, 13, 14, 5, 1, 1] 
    concepts = [1, 12, 10, 13, 14, 5, 1, 1] ## bud, writer, directors, producers, cast, genre, imdb, rev   
    Xflat = df_x.values
    
    ## 3
    if should_std:
        scalerFlat = StandardScaler().fit(Xflat)
        Xflat = scalerFlat.transform(Xflat)    
    Xflat = Xflat.astype('float32')
    n, dflat = Xflat.shape
    dcon = len(concepts)
    
    ## 4
    mask = np.ones((dcon, dcon)) * np.nan
    print(concepts, dcon, dflat)
    assert len(concepts) == dcon 
    assert sum(concepts) == dflat
    assert Xflat.shape[1] == dflat    

    ## initializing model and running the optimizationportion_parent
    try:
        metainfo = {}
        metainfo['dflat'] = dflat
        metainfo['dcon'] = dcon
        metainfo['concepts'] = concepts                            
        model = nonlinear_concept.NotearsMLP(
            dims=[dflat, 10, 1], bias=True,
            mask=mask, w_threshold=w_threshold, learned_model=None, ## w_threshold=0.3
            metainfo=metainfo
        )
        W_notears, res = nonlinear_concept.notears_nonlinear(
            model, Xflat, lambda1=val_lambda, lambda2=val_lambda,
            h_tol=1e-8, rho_max=1e+18
        ) ## lambda1=0.01, lambda2=0.01, h_tol=1e-8, rho_max=1e+16
        # assert ut.is_dag(W_notears)
        np.savetxt('outputs/W_con_' + str(should_std) + str(val_lambda) + str(w_threshold) + '.csv', W_notears, delimiter=',')
        print('W_con', W_notears)
        #
        #
    except Exception as e:
        print('========================================', e)
        file1 = open('logger.log', 'a+')  
        s1 = "Error ==> {}\n".format(e)
        file1.writelines(s1)
        file1.close()                    


    ## initializing model and running the optimizaportion_parenttion
    def conv_flat_to_con(A, concepts):

        ##
        A = np.abs(A) ## in the optimization this works on square matrix, so there we don't need to abs it
        dflat = sum(concepts)
        dcon = len(concepts)
        Arow = np.zeros((dcon,dflat))
        Ad = np.zeros((dcon,dcon))
        end_concept = np.cumsum(concepts)

        ##
        start_i = 0
        for i in range(dcon):
            end_i = end_concept[i]
            Arow[i,:] = (A[start_i:end_i,:].sum(axis=0))/(end_i-start_i)
            start_i = end_i
        start_i = 0
        for i in range(dcon):
            end_i = end_concept[i]
            Ad[:,i] = (Arow[:,start_i:end_i].sum(axis=1))/(end_i-start_i)
            start_i = end_i

        ##
        new_adj_mat = np.zeros((dcon,dcon))
        for i in range(dcon):
            for j in range(dcon):
                if Ad[i][j] != 0:
                    new_adj_mat[i][j] = 1

        return new_adj_mat

    try:
        model3 = nonlinear_old.NotearsMLP(dims=[dflat, 10, 1], bias=True)
        W_notears3 = nonlinear_old.notears_nonlinear(
            model3, Xflat, lambda1=val_lambda, lambda2=val_lambda, w_threshold=w_threshold,
            h_tol=1e-8, rho_max=1e+18
        ) ## lambda1=0.01, lambda2=0.01, w_threshold=0.3, h_tol=1e-8, rho_max=1e+16
        W_notears3 = conv_flat_to_con(W_notears3, concepts)
        # assert ut.is_dag(W_notears3)
        np.savetxt('outputs/W_flat_' + str(should_std) + str(val_lambda) + str(w_threshold) + '.csv', W_notears3, delimiter=',')
        print('W_flat', W_notears3)        
        #
        #
    except Exception as e:
        file1 = open('logger.log', 'a+')  
        s1 = "Error ==> {}\n".format(e)
        file1.writelines(s1)
        file1.close()                    

    return 0

if __name__=='__main__':
    ray.shutdown()
    ray.init(ignore_reinit_error=True, num_cpus=56) ## detects automatically: num_cpus=64
    

    list_option = [
        (False, 0.01, 0.3),
        (False, 0.01, 0.2),        
        (False, 0.001, 0.3),
        (False, 0.001, 0.2),        
        
        (True, 0.01, 0.3),
        (True, 0.01, 0.2),        
        (True, 0.001, 0.3),
        (True, 0.001, 0.2),                
    ]
    df_x = pd.read_csv('datasets/movie_processed_1.csv')
    ## bud, 
    ## wri, dir, pro, cast, genre, 
    ## imdb, rev   
    df_x = df_x[[
        # 'budget',   
        # 'c1','c2', 'c3','c4', 'c5','c6', 'c7','c8', 'c9','c10', 'c11', 'c12', 'c13', 'c14','c15', 'c16','c17',    
        # 'g1','g2', 'g3','g4', 'g5',
        # 'imdb_user_rating',
        # 'revenue'        
        
        'budget', 
        'w0', 'w1','w2', 'w3','w4', 'w5','w6', 'w7','w8', 'w9','w10', 'w11',   
        'd0','d1','d2', 'd3','d4', 'd5','d6', 'd7','d8', 'd9',   
        'p0','p1','p2', 'p3','p4', 'p5','p6', 'p7','p8', 'p9','p10', 'p11', 'p12',    
        'c0','c1','c2', 'c3','c4', 'c5','c6', 'c7','c8', 'c9','c10', 'c11', 'c12', 'c13',    
        'g0','g1','g2', 'g3','g4',
        'imdb_user_rating',
        'revenue'        
    ]]
        
    
    list_result_id = []
    for opt in list_option:
        result_id = get_result.remote(
            df_x, opt
        )
        list_result_id.append(result_id)
    list_result = ray.get(list_result_id)



['1.21.3', '1.0.1']
[2m[36m(pid=41120)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41120)[0m -----iteration no:  0
[2m[36m(pid=41115)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41125)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41125)[0m -----iteration no:  0
[2m[36m(pid=41115)[0m -----iteration no:  0
[2m[36m(pid=41108)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41108)[0m -----iteration no:  0
[2m[36m(pid=41093)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41093)[0m -----iteration no:  0
[2m[36m(pid=41092)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41092)[0m -----iteration no:  0
[2m[36m(pid=41102)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41102)[0m -----iteration no:  0
[2m[36m(pid=41086)[0m [1, 12, 10, 13, 14, 5, 1, 1] 8 57
[2m[36m(pid=41086)[0m -----iteration no:  0
[2m[36m(pid=41115)[0m -----iteration no:  0
[2m[36m(pid=41115)[0m --------------------in while loop where rho:  1.0 , rho_max:

In [2]:
df_x = pd.read_csv('datasets/movie_processed_1.csv')

In [3]:
df_x

Unnamed: 0,budget,w0,w1,w2,w3,w4,w5,w6,w7,w8,...,c11,c12,c13,g0,g1,g2,g3,g4,imdb_user_rating,revenue
0,17000000.0,-0.038540,0.098260,0.081265,-0.008760,0.122639,-0.077397,0.055825,0.076549,0.033185,...,0.005689,0.002576,0.015662,-0.580614,-0.127464,-0.276190,-0.272838,0.210208,6.0,3.033116e+07
1,0.0,-0.042361,0.088928,0.091320,0.006176,0.097711,-0.047093,0.078845,0.097018,0.052463,...,0.009741,-0.006514,0.020250,-0.461639,-0.103872,-0.003902,-0.350894,-0.043026,6.0,0.000000e+00
2,220000000.0,-0.046881,0.105719,0.100069,-0.017983,0.107853,-0.048771,0.075875,0.103300,0.022309,...,0.013242,0.002664,0.016247,-0.474974,-0.720920,0.051662,-0.423692,-0.291247,3.0,1.519558e+09
3,60000000.0,-0.046881,0.105719,0.100069,-0.017983,0.107853,-0.048771,0.075875,0.103300,0.022309,...,0.013242,0.002664,0.016247,-0.474974,-0.720920,0.051662,-0.423692,-0.291247,3.0,4.852283e+07
4,220000000.0,-0.034002,0.119371,0.093465,-0.012530,0.095016,-0.060965,0.056555,0.076235,0.058853,...,0.013422,0.002801,0.020294,-0.591133,-0.575884,-0.148398,-0.355397,-0.115872,8.0,1.519558e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1963,,,,,,,,,,,...,0.017524,0.004100,0.012493,-0.374565,-0.105555,-0.024654,-0.217308,0.134497,,
1964,,,,,,,,,,,...,0.017524,0.004100,0.012493,-0.374565,-0.105555,-0.024654,-0.217308,0.134497,,
1965,,,,,,,,,,,...,0.017524,0.004100,0.012493,-0.374565,-0.105555,-0.024654,-0.217308,0.134497,,
1966,,,,,,,,,,,...,0.017524,0.004100,0.012493,-0.374565,-0.105555,-0.024654,-0.217308,0.134497,,
