In [1]:
import scipy.io as sio
import pandas as pd
import networkx as nx

# Code based on https://github.com/timpostuvan/link-prediction-facebook100/blob/master/all-data/facebook100_parser.py

def get_attribute_partition(matlab_object, attribute):
    attribute_rows = matlab_object["local_info"]
    
    try:
        index = attribute_dict[attribute]
    except KeyError:
        raise KeyError("Given attribute " + attribute + " is not a valid choice.\nValid choices include\n" + str(attribute_dict.keys()))

    current_id = 0
    values = dict()
    for row in attribute_rows:
        if not(len(row) == 7):
            raise ValueError("Row " + str(current_id) + " has " + str(len(row)) + " rather than the expected 7 rows!")
       
        val = row[index]
        values[current_id] = int(val)
        current_id += 1

    return values

node_attributes = ["student_fac", "gender", "major_index", "second_major", "dorm", "year", "high_school"]

attribute_dict = {
    "student_fac" : 0,
    "gender" : 1,
    "major_index" : 2,
    "second_major" : 3,
    "dorm" : 4,
    "year" : 5,
    "high_school" : 6,
    }

In [2]:
# Turn one .mat file into a nx graph
filename = "./facebook100/American75.mat"
matlab_object = sio.loadmat(filename)
scipy_sparse_graph = matlab_object["A"]
G = nx.from_scipy_sparse_array(scipy_sparse_graph)

for attribute in attribute_dict:
            values = get_attribute_partition(matlab_object, attribute)
            for node in values:
                    G.nodes[node][attribute] = values[node]

In [3]:
# print some data about the graph, and what a few edges/nodes look like
print("Number of nodes:", nx.number_of_nodes(G))
print("Number of edges", nx.number_of_edges(G))

print("Edge's from node 1", G.edges(1))

list(G.nodes(data=True))[:3]

Number of nodes: 6386
Number of edges 217662
Edge's from node 1 [(1, 453), (1, 1245), (1, 2539), (1, 3043), (1, 3354), (1, 3604), (1, 4742), (1, 5419)]


[(0,
  {'student_fac': 1,
   'gender': 1,
   'major_index': 265,
   'second_major': 247,
   'dorm': 0,
   'year': 2008,
   'high_school': 9367}),
 (1,
  {'student_fac': 5,
   'gender': 2,
   'major_index': 265,
   'second_major': 253,
   'dorm': 0,
   'year': 2006,
   'high_school': 23586}),
 (2,
  {'student_fac': 1,
   'gender': 1,
   'major_index': 0,
   'second_major': 0,
   'dorm': 0,
   'year': 2009,
   'high_school': 50418})]