In [285]:
import pandas as pd
import numpy
from numpy import matrix
numpy.set_printoptions(formatter={'float': lambda x: "{0:0.6f}".format(x)})
from scipy.stats import norm

class BayesianNetworks:
    
    ##Initialization
    def __init__(self,file_name,handle_missing_data="any"):
        import pandas as pd
        import numpy
        from numpy import matrix
        numpy.set_printoptions(formatter={'float': lambda x: "{0:0.6f}".format(x)})
        
        ##Only takes the columns that have these datatypes
        self.ALLOWED_DTYPES = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
        
        ##Only takes .xlsx,.xls,.csv files
        self.file_ext = file_name.split(".")[-1]
        
        if  self.file_ext in ['xlsx','xls']:
            self.df = pd.read_excel(file_name)
        elif self.file_ext == 'csv':
            self.df = pd.read_csv(file_name)
        else:
            raise Exception("Only .xlsx, .xls, .csv files supported!")
        
        #Removes all columns which doesn't have numbers
        self.df = self.df.select_dtypes(include=self.ALLOWED_DTYPES).dropna(1,handle_missing_data).dropna(0,handle_missing_data)
        self.columns = list(self.df.columns)
        
        ##Basic Stats from the data
        self.df_as_matrix = matrix(self.df).T
        self.mean = [numpy.mean(list(self.df[column])) for column in self.df.columns]
        self.median = [numpy.median(list(self.df[column])) for column in self.df.columns]
        self.var = [numpy.var(list(self.df[column])) for column in self.df.columns]
        self.std = [numpy.std(list(self.df[column])) for column in self.df.columns]
        self.cov = numpy.cov(self.df_as_matrix)
        self.cor = numpy.corrcoef(self.df_as_matrix)
        self.pdf = matrix([self._univariate_pdf(column) for column in self.df.columns])
    
    #Private Functions
    def _index_of_column(self,column):
        return self.columns.index(column)
    
    def _univariate_pdf(self,column):
        e = numpy.e
        pi = numpy.pi
        mean = self.mean[self._index_of_column(column)]
        stddev = self.std[self._index_of_column(column)]
        coefficient = 1/(numpy.sqrt(2*pi)*stddev)
        return [coefficient*e**(-1/2.0*((i-mean)/stddev)**2) for i in self.df[column]]
    
    def _multivariate_pdf(self,columns):
        indices = [1 if i in columns else 0 for i in self.columns ]
        mean = [self.mean[i] for i in range(len(indices)) if indices[i] == 1]
        cov = matrix([[self.cov.tolist()[i][j] for i in range(len(indices)) if indices[i] == 1] for j in range(len(indices)) if indices[j] == 1])
        multivariate_list = matrix([bn.df_as_matrix.tolist()[i] for i in range(len(indices)) if indices[i] == 1]).T
        cov_determinant = numpy.linalg.det(cov)
        cov_inverse = cov**-1
        coefficient = (1/((numpy.pi*2)**2*numpy.sqrt(cov_determinant)))
        x_minus_mean = [i - mean for i in multivariate_list]
        return [coefficient*numpy.e**(-1/2.0*i*cov_inverse*i.T).tolist()[0][0] for i in x_minus_mean]
    
    
    #Public Functions
    def log_likelihood(self,pdf_list):
        return sum([numpy.log(i) for i in pdf_list])
    
    def univariate_log_likelihood(self):
        return sum([self.log_likelihood(self.pdf.tolist()[self._index_of_column(column)]) for column in self.df.columns])
    
    def multivariate_log_likelihood(self):
        return self.log_likelihood(self._multivariate_pdf(self.df.columns))
    

In [286]:
bn = BayesianNetworks('./university data.xlsx')

In [287]:
bn.univariate_log_likelihood()

-1315.0987925607392

In [288]:
bn.multivariate_log_likelihood()

-1304.7782458098391

In [None]:
n = input()

In [None]:
string = ""
for i in range(n):
    string += str(1)

In [None]:

len(["{0:b}".format(i) for i in range(int(string,2))])
    

In [None]:
string

In [None]:
def univariate_pdf(df,column):
    pi = numpy.pi
    sigma = stddev(df,column)
    mu = mean(df,column)
    e = numpy.e
    root2pi = numpy.sqrt(2*pi)
    return [1/(root2pi*sigma)*e**(-1/2.0*((i-mu)/sigma)**2) for i in df[column]]

pdf1 = univariate_pdf(df,"CS_Score")
pdf2 = univariate_pdf(df,"Research_Overhead")
pdf3 = univariate_pdf(df,"Admin_Base_Pay")
pdf4 = univariate_pdf(df,"Tuition_Out_State")

pdf_univariate = [pdf1[i]*pdf2[i]*pdf3[i]*pdf4[i] for i in range(49)]
independent_log_likelihood = sum(numpy.log(pdf_univariate))
print("logLikelihood = " + str(independent_log_likelihood))

def multivariate_pdf(df,covarianceMat,no_of_columns):
    inverse_covarianceMat = covarianceMat**-1
    determinant_covarianceMat = numpy.linalg.det(covarianceMat)
    mu = matrix([mu1,mu2,mu3,mu4]).T
    multivariate_list = [matrix(list(df.iloc[i][2:2+no_of_columns])) for i in range(49)]
    
    pdf = []
    for i in range(49):
        x = matrix(multivariate_list[i].tolist()[0]).T
        coefficient = (1/((math.pi*2)**2*math.sqrt(determinant_covarianceMat)))
        pdf.append(math.e**(-1/2.0*((x-mu).T*inverse_covarianceMat*(x-mu)).tolist()[0][0])*coefficient)
    return pdf

multivariate_log_likelihood = sum([numpy.log(i) for i in multivariate_pdf(df,covarianceMat,4)])
print("multivariatelogLikelihood = " + str(multivariate_log_likelihood))

BNgraph = matrix([[1,0,0,0],[1,1,0,0],[0,0,1,0],[1,0,0,1]])
def BNlogLikelihood(list1,list2):
    A = matrix([[len(list1),sum(list1)],[sum(list1),sum([i**2 for i in list1])]]).I
    Y = matrix([sum(list2),sum([i*j for i,j in zip(list1,list2)])]).T
    [[b0],[b1]] = (A*Y).tolist()
    variance = sum([(b0 + (b1*i) - j)**2 for i,j in zip(list1,list2)])/len(list1)
    return (-1/2.0*len(list1)*numpy.log(2*math.pi*variance) -len(list1)/2.0)
    
def univariate_loglikelihood(pdf1,pdf2):
    pdf_univariate = [pdf1[i]*pdf2[i] for i in range(49)]
    return sum(numpy.log(pdf_univariate))

BNLogLikelihoodFinal = BNlogLikelihood(list(df["CS_Score"]),list(df["Research_Overhead"])) + BNlogLikelihood(list(df["CS_Score"]),list(df["Tuition_Out_State"])) + univariate_loglikelihood(pdf1,pdf3)
print("BNlogLikelihood = {:0.3f}".format(BNLogLikelihoodFinal))