In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import sys
import matplotlib.pyplot as plt


In [None]:
def ut_as_list( dframe, diag=1, cols=['Row','Column','Value'] ):
  """
  for a symmetric dataframe, where cols=rows, get the upper triangle as a list of row/column pairs
  diag = 1 (default): ignore diagonal
  diag = 0: include diagonal
  """
  #if (dframe.index.name == dframe.columns.name):
  dframe.index.name = cols[0]
  dframe.columns.name = cols[1]
  #             dframe.index.name = dframe.index.name + '.1'
  #             dframe.index.name = dframe.index.name + '.2'
  d = dframe.where( np.triu( np.ones( dframe.shape ), k=diag).astype(bool))
  d = d.stack().reset_index()
  d.columns=cols
  return d

def PCA_whitening(X):
    centered_X = X - np.mean(X, axis = 0)
    cov = np.cov(centered_X.T)
    # Calculate the eigenvalues and eigenvectors of the covariance matrix
    eigVals, eigVecs = np.linalg.eig(cov)
    # Apply the eigenvectors to X
    transf_x = centered_X @ eigVecs
    whitened_x = transf_x / np.sqrt(eigVals + 1e-5)
    return whitened_x

def partial(x,y,z,cc):
    #
    # x, y, z = gene (row/column) names
    # cc = dataframe; symmetric matrix of pearson correlations
    #
    pxy = cc.loc[x,y]
    pxz = cc.loc[x,z]
    pyz = cc.loc[y,z]
    pxy_z = (pxy - pxz*pyz) / (np.sqrt(1-pxz**2) * np.sqrt(1-pyz**2) )
#     pxy_z['ratio']= pxy_z**2/ pxy**2
    
    return pxy_z

def get_all_partials( g1, g2, cc):
    pxy = cc.loc[g1][g2]
    pxy_vect = np.array( list([pxy])*(cc.shape[0]) ) #vector
    pxz = cc.loc[g1]                              #vector
    pyz = cc.loc[g2]                              #vector
    pxy_all = (pxy_vect -  np.multiply(pxz, pyz)) / ( np.sqrt( 1-pxz**2) * np.sqrt( 1-pyz**2) )
    framename = 'pc_' + g1 + '_' + g2
    pxy_all = pxy_all.to_frame(framename)
    pxy_all.drop( [g1, g2], axis=0, inplace=True) # don't include these!
    pxy_all['ratio'] = pxy_all[framename]**2 / pxy**2
    pxy_all.sort_values('ratio', ascending=False, inplace=True)
    return pxy_all

In [None]:
data= pd.read_csv('Ceres_dataframe.csv',header=0,index_col=0)
print(data.shape)

In [None]:
w_data=PCA_whitening(data)

In [None]:
ccvals = np.corrcoef( w_data )
np.fill_diagonal( ccvals, 0) 
cc = pd.DataFrame( index=data.index.values, columns=data.index.values, data= ccvals )

# # Create PCC Pairs
PCC_pairs = ut_as_list(cc,cols=['Gene1','Gene2','G1G2_PCC']).sort_values(by='G1G2_PCC',key=abs, ascending=False)

In [None]:
filtered_list=PCC_pairs[~PCC_pairs['G1G2_PCC'].between(-0.15,0.15)]

In [None]:
filtered_list["Gene3"] = ""
filtered_list["G1G2.G3_Partial"] = ""
filtered_list["G1G2.G3_Ratio"] = ""

filtered_list["G1G3_PCC"]=""
filtered_list["G1G3.G2_Partial"] = ""
filtered_list["G1G3.G2_Ratio"] = ""

filtered_list["G2G3_PCC"]=""
filtered_list["G2G3.G1_Partial"] = ""
filtered_list["G2G3.G1_Ratio"] = ""


In [None]:
for i in range (0,len(filtered_list)):
    g1=filtered_list.iloc[i]['Gene1']
    g2=filtered_list.iloc[i]['Gene2']
    
#     get all partials for g1 g2
    partials_g1g2=get_all_partials(g1,g2,cc)
    
#     save (first) partial, ratio, and gene 3 on new columns 

    filtered_list["G1G2.G3_Partial"].iloc[i]=partials_g1g2.iloc[0,0]
    filtered_list["Gene3"].iloc[i]=partials_g1g2.index[0]
    
    g3=filtered_list.iloc[i]['Gene3']
    
    filtered_list["G1G2.G3_Ratio"].iloc[i]=((partial(g1,g2,g3,cc))**2)/(cc.loc[g1][g2])**2
    
    filtered_list["G1G3_PCC"].iloc[i]=cc.loc[g1][g3]
    filtered_list["G2G3_PCC"].iloc[i]=cc.loc[g2][g3]
    
    partial_g1g3_g2=partial(g1,g3,g2,cc)
    ratio_g1g3_g2=(partial_g1g3_g2)**2/(cc.loc[g1][g3])**2
    filtered_list["G1G3.G2_Partial"].iloc[i]=partial_g1g3_g2
    filtered_list["G1G3.G2_Ratio"].iloc[i]=ratio_g1g3_g2
    
    partial_g2g3_g1=partial(g2,g3,g1,cc)
    ratio_g2g3_g1=(partial_g2g3_g1)**2/(cc.loc[g2][g3])**2
    filtered_list["G2G3.G1_Partial"].iloc[i]=partial_g2g3_g1
    filtered_list["G2G3.G1_Ratio"].iloc[i]=ratio_g2g3_g1
    

In [None]:
filtered_list.shape

In [None]:
moon=filtered_list[(filtered_list['G1G2.G3_Ratio']>2) | (filtered_list['G1G3.G2_Ratio'].values>2) | (filtered_list['G2G3.G1_Ratio'].values>2) ]
moon.reset_index(drop=True,inplace=True)
moon.shape

In [None]:
for i in range (0,len(moon)):
    if moon['G1G2_PCC'][i]<0 and moon['G1G3_PCC'][i]<0:
        moon.drop([i],axis=0,inplace=True)
    elif moon['G1G2_PCC'][i]<0 and moon['G2G3_PCC'][i]<0:
        moon.drop([i],axis=0,inplace=True)
    elif moon['G1G3_PCC'][i]<0 and moon['G2G3_PCC'][i]<0:
        moon.drop([i],axis=0,inplace=True)
        
moon.reset_index(drop=True,inplace=True)
moon.shape

In [None]:
moon['Moonlighter Gene']=""

for i in range (0,len(moon)):
    if moon['G1G2_PCC'][i]<0:
        moon['Moonlighter Gene'][i]=moon['Gene3'][i]
    elif moon['G1G3_PCC'][i]<0:
        moon['Moonlighter Gene'][i]=moon['Gene2'][i]
    elif  moon['G2G3_PCC'][i]<0:
        moon['Moonlighter Gene'][i]=moon['Gene1'][i]
        