In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import os
import matplotlib.pyplot as plt
import graphviz
from operator import itemgetter
from networkx.algorithms import approximation as approx
from networkx.algorithms import bipartite

In [None]:
# Labels
combined_phenotype='/content/drive/MyDrive/Capstone/COMBINED DATA/phenotypic_data.csv'

labels_mappings= pd.read_csv(combined_phenotype)
labels_mappings

Unnamed: 0,Subject,Subject Type,Label
0,40013,Control,0
1,40014,Control,0
2,40017,Control,0
3,40018,Control,0
4,40019,Control,0
...,...,...,...
416,2168,Patient,1
417,2169,Patient,1
418,2170,Patient,1
419,2171,Patient,1


In [None]:
def row_transform(arr, threshold):
  for i in range(len(arr)):
      arr[i] = arr[i] if arr[i]> threshold else abs(arr[i])+0.0001
  return arr

def binarize(df, threshold):
    df = df.transform(lambda x: row_transform(x, threshold))
    df=df.fillna(0)

In [None]:
cw = '/content/drive/MyDrive/Capstone/COMBINED DATA/COBRE Weighted/'
uw = '/content/drive/MyDrive/Capstone/COMBINED DATA/UCLA Weighted v2/'
aw = '/content/drive/MyDrive/Capstone/Data Augmentation/UCLA Augmented/'

In [None]:
# Add the directories as per requirement
dirs = [aw]

graphs = list()
labels = list()
count = 0
for dir in dirs:
    print(len(os.listdir(dir)))
    for file in os.listdir(dir):
        if file.find("(1)") ==1:
            continue
        if file.endswith(".csv"):
            if (dir == cw):
                subject = file[15:20]
            elif (dir == uw or dir==aw):
                subject = file[14:18]
            else:
                print("If you are using a statically binarized folder:")
                print("Add a condition along with cb condition for COBRE and ub condition for UCLA")
                print("Example: elif (dir == ub) becomes elif (dir == ub) or (dir == ub0_3)")
                exit(0)

            # APPENDING LABEL
            labels.append(int(subject))
            print(int(subject))
            # mask = labels_mappings['Subject'] == int(subject)
            #labels.append((labels_mappings[mask]['Label'].values[0], int(subject)))

            # APPENDING CORRESPONDING GRAPH - may change based on implementation
            df = pd.read_csv(dir+file, header=None)
            binarize(df, 0)
            G = nx.from_pandas_adjacency(df)
            graphs.append(G)          

172
3000
3010
3002
3009
3001
3006
3005
3003
3004
3007
3008
3011
3014
3013
3017
3016
3018
3012
3015
3019
3020
3021
3024
3023
3026
3025
3027
3028
3022
3030
3029
3031
3039
3038
3032
3036
3034
3035
3033
3037
3040
3041
3043
3042
3045
3046
3044
3047
3048
3050
3049
3051
3055
3054
3052
3053
3056
3057
3058
3059
3060
3061
3062
3063
3064
3069
3068
3070
3065
3067
3066
3071
3072
3073
3076
3074
3075
3079
3078
3081
3077
3080
3082
3084
3085
3083
3086
3091
3089
3088
3090
3087
3093
3096
3092
3095
3094
3099
3100
3097
3098
3101
3104
3103
3102
3105
3106
3110
3107
3109
3108
3111
3112
3113
3116
3115
3114
3119
3118
3121
3120
3117
3122
3123
3125
3126
3124
3127
3129
3128
3130
3131
3133
3132
3135
3134
3136
3138
3137
3140
3139
3141
3142
3144
3143
3145
3146
3148
3149
3147
3150
3151
3152
3155
3154
3153
3156
3158
3161
3160
3159
3157
3162
3163
3164
3165
3166
3171
3168
3170
3167
3169


# Utility Function 

In [None]:
class global_measures_weighted:

  def __init__(self, subject_no, schiz, G, df):
    self.G = G
    self.subject = subject_no
    if schiz:
      self.schiz = 'Schizophrenic'
    else:
      self.schiz = 'Control'
    self.df = df
    self.df.at[self.subject, "Subject"] = int(self.subject)
    self.df.at[self.subject, "Schizophrenic"] = schiz
    
    print("\nsubject " + str(subject_no))
    
    self.average_shortest_path_length()
    self.stoer_wagner()
    self.wiener_index()
    self.dijkstra_path()
    self.max_weight_matching()
    self.cut_size()
    self.conductance() 
    self.approx_randomized_partitioning()

  def average_shortest_path_length(self): 
    try:
        r = nx.average_shortest_path_length(self.G, weight='weight')
    except:
        r = 0
    self.df.at[self.subject, "Average shortest path"] = r

  def stoer_wagner(self):
    try:
        cut, partition = nx.stoer_wagner(self.G, weight='weight')
    except: 
        r = 0
    self.df.at[self.subject, "Stoer Wagner cuts"] = cut
    
  def wiener_index(self):
    try:
        r = nx.wiener_index(self.G, weight='weight')
    except:
        r = 0
    self.df.at[self.subject, "Wiener Index"] = r
  
  def dijkstra_path(self):
    try:
        r = nx.dijkstra_path(self.G, 0, 160, weight='weight')
    except: 
        r = list()
    self.df.at[self.subject, "Dijkstra path"] = len(r)

  def max_weight_matching(self):
    try:
        r = nx.max_weight_matching(self.G, maxcardinality=False, weight='weight')
    except: 
        r = list()
    self.df.at[self.subject, "Max weight matching"] = len(r)

  def barycenter(self):
    try:
        r = nx.barycenter(self.G, weight='weight', attr=None, sp=None)
    except:
        r = 0
    self.df.at[self.subject, "Barycenter"] = r

  def cut_size(self):
    S = {0, 1, 2, 3}
    try:
        r = nx.cut_size(self.G,S, weight='weight')
    except:
        r = 0
    self.df.at[self.subject, "Conductance"] = r

  def conductance(self):
    S = list(range(82))
    try:
        r = nx.conductance(self.G, S, T=None, weight='weight')
    except:
        r = 0
    self.df.at[self.subject, "Conductance"] = r

  def approx_randomized_partitioning(self):
    try:
        r = approx.randomized_partitioning(self.G, seed=None, p=0.5, weight='weight')
    except:
        r = ""
    self.df.at[self.subject, "Randomized Partitioning Heuristic"] = str(r)


# Driver function

In [None]:
output_df = pd.DataFrame(columns=['Subject', 'Schizophrenic'])
schiz=0
for i, g in enumerate(graphs):
    if(labels[i] > 3122):
        schiz =1
    global_measures_weighted(labels[i], schiz, g, output_df)


subject 3000

subject 3010

subject 3002

subject 3009

subject 3001

subject 3006

subject 3005

subject 3003

subject 3004

subject 3007

subject 3008

subject 3011

subject 3014

subject 3013

subject 3017

subject 3016

subject 3018

subject 3012

subject 3015

subject 3019

subject 3020

subject 3021

subject 3024

subject 3023

subject 3026

subject 3025

subject 3027

subject 3028

subject 3022

subject 3030

subject 3029

subject 3031

subject 3039

subject 3038

subject 3032

subject 3036

subject 3034

subject 3035

subject 3033

subject 3037

subject 3040

subject 3041

subject 3043

subject 3042

subject 3045

subject 3046

subject 3044

subject 3047

subject 3048

subject 3050

subject 3049

subject 3051

subject 3055

subject 3054

subject 3052

subject 3053

subject 3056

subject 3057

subject 3058

subject 3059

subject 3060

subject 3061

subject 3062

subject 3063

subject 3064

subject 3069

subject 3068

subject 3070

subject 3065

subject 3067

subject 3066

subje

In [None]:
output_df.to_csv('global_measures_weighted_aug.csv')

# Results

In [None]:
output_df

Unnamed: 0,Subject,Schizophrenic,Average shortest path,Stoer Wagner cuts,Wiener Index,Dijkstra path,Max weight matching,Conductance,Randomized Partitioning Heuristic
3000,3000,0,0.012555,20.479361,167.807115,6.0,82.0,0.495620,"(1648.6063707380802, ({0, 3, 4, 9, 12, 15, 16,..."
3010,3010,0,0.012412,16.898351,165.892491,8.0,82.0,0.498204,"(1539.5217418635789, ({0, 128, 3, 132, 5, 6, 1..."
3002,3002,0,0.010298,20.075623,137.648029,8.0,82.0,0.491145,"(1374.510072904605, ({1, 4, 6, 7, 14, 16, 18, ..."
3009,3009,0,0.010249,22.489461,136.991810,4.0,82.0,0.472525,"(1412.5937324993017, ({2, 3, 4, 5, 6, 8, 14, 1..."
3001,3001,0,0.009900,18.636786,132.329467,7.0,82.0,0.497004,"(1307.7308541961775, ({128, 129, 6, 7, 8, 9, 1..."
...,...,...,...,...,...,...,...,...,...
3171,3171,1,0.011135,18.207614,148.831164,7.0,82.0,0.504037,"(1488.015140906703, ({6, 8, 10, 11, 12, 14, 15..."
3168,3168,1,0.010344,21.208409,138.256592,4.0,82.0,0.485365,"(1401.7140954226836, ({0, 1, 3, 5, 6, 7, 8, 9,..."
3170,3170,1,0.012029,25.750936,160.775121,4.0,82.0,0.496349,"(1642.0291251558797, ({4, 5, 6, 9, 11, 15, 20,..."
3167,3167,1,0.009766,21.030312,130.530521,7.0,82.0,0.508939,"(1363.3718401436267, ({1, 4, 6, 12, 14, 15, 16..."
