# Stellargraph

In [57]:
import pandas as pd
import pickle
import os
import sys
import numpy as np
import cv2
import torch
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from shapely.geometry import Polygon
from skimage import transform
from torch.utils.data import DataLoader
import torch.nn.functional as F


from floortrans.models import get_model
from floortrans.loaders import (
    FloorplanSVG,
    DictToTensor,
    Compose,
    RotateNTurns
)
from floortrans.plotting import (
    segmentation_plot,
    polygons_to_image,
    draw_junction_from_dict,
    discrete_cmap
)
from floortrans.post_prosessing import (
    split_prediction,
    get_polygons,
    split_validation
)
from mpl_toolkits.axes_grid1 import AxesGrid

discrete_cmap()

os.environ['PYTHONPATH'] = '/Users/alishakhan/Desktop/Career/Ascent Integrated Tech/task1/CubiCasa5k_git:' + os.environ.get('PYTHONPATH', '')

##### 
# HELPER FUNCTIONS 
#####

# This function takes an image of a floor plan and a target class as input and returns a binary mask with 1's at the locations where the target class is present in the image
def isolate_class(rooms, CLASS: int):

    # Create a zero-filled numpy array with the same shape as the input image
    template = np.zeros_like(rooms)

    # Get the row and column indices of the pixels where the target class is present
    rows, cols = np.where(rooms == CLASS)

    # Set the corresponding pixels in the template to 1
    template[rows, cols] = 1
    
    # Return the binary mask
    return template

# Define a list of room classes considered as bad and good classes
bad=[0, 1, 2, 8, 11]
good=[3,4,5,6,7,9,10,12]

# This function takes an image and a list of significant nodes (i.e., classes) as input and returns the contours of the rooms, the contours of the doors, and the centroid locations of the significant nodes
def vis_nodes(img, significant_nodes):
    #signficant nodes exclude rooms we don't care about
    nodes = {}
    room_contours={}
    #door_contours={}
    for c in significant_nodes:
        nodes[c] = []
        
        # Initialize an empty list to hold the contours of the rooms of the current class
        room_contours[c] = []
        
        # Get a binary mask with 1's at the locations where the current class is present in the image
        t = isolate_class(img, c)
        
        # Find the contours of the connected components in the binary mask
        contours, _ = cv2.findContours(t.astype(np.uint8), mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
        
        # Iterate over each contour and add it to the list of room contours and compute the centroid of the contour and add it to the list of node locations
        for s in contours:
            room_contours[c].append(s)
            nodes[c].append(np.squeeze(np.array(s), 1).mean(0))
    template = img.copy()
    
    # Return the room contours, door contours, and node locations
    return(room_contours, room_contours[12], nodes)

# This function takes the contours of the rooms and doors as input and returns the connections between the rooms as a list of pairs of room indices and as a list of pairs of room centroid locations
def get_edges(img, room_contours, door_contours):
    # Initialize empty lists to hold the room connections as indices and as centroid locations
    connections_int = []
    connections_vis = []
    
    # Iterate over each room contour to compare with other room contours
    for i, room1 in enumerate(room_contours):
        # Check that the first room contour has at least 4 points (i.e., is not a line or a point)
        if len(room1) < 4:
            return -1
        
        # Convert room contour to numpy array and create a shapely Polygon object
        room1_arr = np.array(room1).squeeze(1)
        room1_ply = Polygon(room1_arr).buffer(1)
        
        # Iterate over the remaining room contours
        for j, room2 in enumerate(room_contours[i+1:], start=i+1):
            # Check that the second room contour has at least 4 points (i.e., is not a line or a point)
            if len(room2) < 4:
                return -1
            
            # Convert room contour to numpy array and create a shapely Polygon object
            room2_arr = np.array(room2).squeeze(1)
            room2_ply = Polygon(room2_arr).buffer(1)
            
            # Check if the polygons intersect
            if room1_ply.intersects(room2_ply):
                # If the polygons intersect, add the pair of room indices to the list of connections
                connections_int.append([i, j])
                # Add the pair of room centroids to the list of connection locations
                connections_vis.append([room1_arr.mean(0), room2_arr.mean(0)])
                
            else:
                # Iterate over the door contours to check if there is a door between the two rooms
                for door in door_contours:
                    # Convert door contour to numpy array and create a shapely Polygon object
                    door_arr = np.array(door).squeeze(1)
                    door_ply = Polygon(door_arr).buffer(1)
                    
                    # Check if the door intersects both rooms
                    if room1_ply.intersects(door_ply) and room2_ply.intersects(door_ply):
                        # If there is a door between the two rooms, add the pair of room indices to the list of connections
                        connections_int.append([i, j])
                        # Add the pair of room centroids to the list of connection locations
                        connections_vis.append([room1_arr.mean(0), room2_arr.mean(0)])
    
    # Return the list of connections as indices and as centroid locations
    return connections_int, connections_vis


def create_dataframes(A, embeddings, nodes_lst, areas, relative_areas, degree_list, room_classes_names):
    # Create the first DataFrame
    source, target = np.where(A.todense() == 1)
    edges = np.column_stack((source, target))
    edges_df = pd.DataFrame(edges, columns=['source', 'target'])
    
    # Create the second DataFrame
    data = {
        'Area': areas,
        'Relative Area': relative_areas,
        'Number of neighboring rooms': degree_list,
        'Room Type': [room_classes_names[node] for node in nodes_lst]
    }
    attributes_df = pd.DataFrame(data)
    
    return edges_df, attributes_df


rot = RotateNTurns() #
room_classes = ["Background", "Outdoor", "Wall", "Kitchen", "Living Room" ,"Bed Room", "Bath", "Entry", "Railing", "Storage", "Garage", "Undefined"]
icon_classes = ["No Icon", "Window", "Door", "Closet", "Electrical Applience" ,"Toilet", "Sink", "Sauna Bench", "Fire Place", "Bathtub", "Chimney"]
room_classes.append("Door")
data_folder = '../data/cubicasa5k/'
data_file = 'test.txt'
normal_set = FloorplanSVG(data_folder, data_file, format='txt', original_size=True)
data_loader = DataLoader(normal_set, batch_size=1, num_workers=0)
data_iter = iter(data_loader)
# Setup Model
model = get_model('hg_furukawa_original', 51)

n_classes = 44
split = [21, 12, 11]

# This function takes a file path as input and returns embeddings, embeddings2, and Y.
def process_file(file_path):
    edges_df_list=[]
    attributes_df_list=[]
    
    with open(file_path, 'rb') as f:
        file=pickle.load(f)
    embeddings=None
    Y=None
    
    for index, floorplan in file.items():
        if 11 not in set(floorplan.flatten()): #no undefined rooms when training
            icons = normal_set[index]['label'][1].numpy()
            rows, column = np.where(icons == 2)
            try:
                floorplan[rows, column] = 12
            except:
                continue

            # Get the contours of the rooms, doors, and nodes in the floorplan
            rooms, doors, nodes = vis_nodes(floorplan, good)
            
            rc=[]
            areas=[]
            positions=[]
            for k in rooms.keys():
                if k!=12: #not a door
                    rc+=rooms[k]
                    for count, c in enumerate(rooms[k]):
                        area=cv2.contourArea(c)
                        areas.append(area)
                        positions.append(np.array(c).squeeze(1).mean(0).tolist())
            
            try:
                idx, vis = get_edges(floorplan, rc, doors)
            except:
                get_edges(floorplan, rc, doors)==-1
                continue

            if not idx:
                continue

            nodes_lst = []
            
            for k in rooms.keys():
                if k!=12:
                    nodes_lst += ([k] * len(rooms[k]))
            nodes_lst_updated = []
            
            areas_updated=[]
            room_type_areas=[]
            for i in range(len(nodes_lst)):
                edges = set(np.array(idx).flatten())
                if i in edges:
                    nodes_lst_updated.append(nodes_lst[i])
                    areas_updated.append(areas[i])
            nodes_lst=nodes_lst_updated
            areas=areas_updated
            total_area=sum(areas)
            
            #feature #1 relative areas
            relative_areas=np.array(areas)/total_area
            
            node_attrs = {}
            for i, n in enumerate(nodes_lst):
                node_attrs[i] = room_classes[n]
            
            # Create a NetworkX graph object from the edges
            G = nx.Graph(idx)
            
            # Compute the adjacency matrix of the graph
            A = nx.adjacency_matrix(G)
            
            # calculate the degree of each node
            #feature #3 number of adjacent rooms
            degree_list = list(dict(G.degree(G.nodes())).values())
            
            
            # Convert the node classes to one-hot vectors and concatenate them with the relative areas
            X = F.one_hot(torch.tensor(nodes_lst), 11).numpy()
            room_classes_names = ["Background", "Outdoor", "Wall", "Kitchen", "Living Room" ,"Bed Room", "Bath", "Entry", "Railing", "Storage", "Garage"]

            edges_df, attributes_df = create_dataframes(A, embeddings, nodes_lst, areas, relative_areas, degree_list, room_classes_names)

            

            
            if np.isnan(areas).any() or np.isnan(relative_areas).any() or np.isnan(room_type_areas).any():
                continue
                

            X_with_areas=np.hstack(( np.array(relative_areas).reshape(-1, 1),np.array(areas).reshape(-1,1), np.reshape(degree_list, (len(degree_list), 1))))
            
            try:
                X_all=np.concatenate(([X_all, X_with_areas]))
            except:
                X_all=X_with_areas
                
            H=A@X_with_areas
            if embeddings is None:
                embeddings=H
            else:
                embeddings = np.concatenate(([embeddings , H ]), axis=0)
            
            if Y is None:
                Y=X
            else:
                Y=np.concatenate(([Y, X]), axis=0)
            
            edges_df_list.append(edges_df)
            attributes_df_list.append(attributes_df)

    room_classes_names = ["Background", "Outdoor", "Wall", "Kitchen", "Living Room" ,"Bed Room", "Bath", "Entry", "Railing", "Storage", "Garage"]
    column_names = ['Relative Area', 'Area',  'Number of neighboring rooms']
    df = pd.DataFrame(data=embeddings, columns=column_names)
    
    return embeddings, Y, df, X_all, edges_df_list, attributes_df_list #X_all is what can be used for the non-graph model.


In [58]:
embeddings, Y, df, X_all, edges_df_list, attributes_df_list=process_file("/Users/alishakhan/Desktop/Career/Ascent Integrated Tech/task1/dataframes/val_modified_1.pkl")


In [62]:
#non chat gpt

# Define the GCN model
gcn_model = GCN(
    layer_sizes=[16, 16],
    activations=['relu', 'relu'],
    generator=None, # We'll set this later
    dropout=0.5
)

from tensorflow.keras import optimizers, losses, metrics

# Compile the model
gcn_model.compile(
    optimizer=optimizers.Adam(lr=0.01),
    loss=losses.categorical_crossentropy,
    metrics=[metrics.categorical_accuracy]
)

# Train the model on each floorplan graph
SG_list = [SG1, SG2, SG3] # Create a list of the StellarGraph objects for each floorplan
y_list = [get_node_labels1(), get_node_labels2(), get_node_labels3()] # Create a list of the node labels for each floorplan

for i, SG in enumerate(SG_list):
    y = y_list[i] # Get the node labels for the i-th floorplan
    gcn_model.generator = SG # Set the generator to the i-th floorplan
    gcn_model.fit(
        SG.flow(
            node_ids=SG.nodes(),
            targets=y,
            batch_size=32
        ),
        epochs=50,
        verbose=1
    )

# Get the predicted node features for each floorplan graph
node_features1 = gcn_model.predict(SG1)
node_features2 = gcn_model.predict(SG2)
node_features3 = gcn_model.predict(SG3)

# Print the predicted node features for the first floorplan
print(node_features1[:, 0]) # The first column corresponds to the 'x' feature




TypeError: Generator should be a instance of FullBatchNodeGenerator, FullBatchLinkGenerator or ClusterNodeGenerator

In [81]:
import pandas as pd
import os

import stellargraph as sg
from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN

from tensorflow.keras import layers, optimizers, losses, metrics, Model
from sklearn import preprocessing, model_selection
from IPython.display import display, HTML
import matplotlib.pyplot as plt
%matplotlib inline

dataset = sg.datasets.Cora()
G, node_subjects = dataset.load()

train_subjects, test_subjects = model_selection.train_test_split(
    node_subjects, train_size=140, test_size=None, stratify=node_subjects
)
val_subjects, test_subjects = model_selection.train_test_split(
    test_subjects, train_size=500, test_size=None, stratify=test_subjects
)

target_encoding = preprocessing.LabelBinarizer()

train_targets = target_encoding.fit_transform(train_subjects)
val_targets = target_encoding.transform(val_subjects)
test_targets = target_encoding.transform(test_subjects)

generator = FullBatchNodeGenerator(G, method="gcn")
train_gen = generator.flow(train_subjects.index, train_targets)

gcn = GCN(
    layer_sizes=[16, 16], activations=["relu", "relu"], generator=generator, dropout=0.5
)

x_inp, x_out = gcn.in_out_tensors()

predictions = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out)

model = Model(inputs=x_inp, outputs=predictions)
model.compile(
    optimizer=optimizers.Adam(lr=0.01),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)

val_gen = generator.flow(val_subjects.index, val_targets)

from tensorflow.keras.callbacks import EarlyStopping

es_callback = EarlyStopping(monitor="val_acc", patience=50, restore_best_weights=True)

history = model.fit(
    train_gen,
    epochs=200,
    validation_data=val_gen,
    verbose=2,
    shuffle=False,  # this should be False, since shuffling data means shuffling the whole graph
    callbacks=[es_callback],
)

test_gen = generator.flow(test_subjects.index, test_targets)
test_metrics = model.evaluate(test_gen)
print("\nTest Set Metrics:")
for name, val in zip(model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))

all_nodes = node_subjects.index
all_gen = generator.flow(all_nodes)
all_predictions = model.predict(all_gen)
node_predictions = target_encoding.inverse_transform(all_predictions.squeeze())

df = pd.DataFrame({"Predicted": node_predictions, "True": node_subjects})
df.head(20)

Using GCN (local pooling) filters...
Epoch 1/200


  super().__init__(name, **kwargs)


1/1 - 2s - loss: 1.9741 - acc: 0.0786 - val_loss: 1.9140 - val_acc: 0.4080 - 2s/epoch - 2s/step
Epoch 2/200
1/1 - 0s - loss: 1.9144 - acc: 0.3857 - val_loss: 1.8690 - val_acc: 0.3560 - 137ms/epoch - 137ms/step
Epoch 3/200
1/1 - 0s - loss: 1.8544 - acc: 0.4000 - val_loss: 1.8159 - val_acc: 0.3420 - 132ms/epoch - 132ms/step
Epoch 4/200
1/1 - 0s - loss: 1.7804 - acc: 0.3857 - val_loss: 1.7533 - val_acc: 0.3600 - 129ms/epoch - 129ms/step
Epoch 5/200
1/1 - 0s - loss: 1.7154 - acc: 0.4786 - val_loss: 1.6840 - val_acc: 0.4080 - 129ms/epoch - 129ms/step
Epoch 6/200
1/1 - 0s - loss: 1.6440 - acc: 0.4571 - val_loss: 1.6125 - val_acc: 0.4640 - 131ms/epoch - 131ms/step
Epoch 7/200
1/1 - 0s - loss: 1.5272 - acc: 0.5143 - val_loss: 1.5405 - val_acc: 0.5200 - 129ms/epoch - 129ms/step
Epoch 8/200
1/1 - 0s - loss: 1.4327 - acc: 0.5929 - val_loss: 1.4671 - val_acc: 0.5500 - 129ms/epoch - 129ms/step
Epoch 9/200
1/1 - 0s - loss: 1.3258 - acc: 0.6214 - val_loss: 1.3904 - val_acc: 0.5740 - 133ms/epoch - 133

Epoch 73/200
1/1 - 0s - loss: 0.0427 - acc: 0.9714 - val_loss: 0.8009 - val_acc: 0.8280 - 128ms/epoch - 128ms/step

Test Set Metrics:
	loss: 0.6370
	acc: 0.8100


Unnamed: 0,Predicted,True
31336,Neural_Networks,Neural_Networks
1061127,Rule_Learning,Rule_Learning
1106406,Reinforcement_Learning,Reinforcement_Learning
13195,Reinforcement_Learning,Reinforcement_Learning
37879,Probabilistic_Methods,Probabilistic_Methods
1126012,Probabilistic_Methods,Probabilistic_Methods
1107140,Reinforcement_Learning,Theory
1102850,Neural_Networks,Neural_Networks
31349,Neural_Networks,Neural_Networks
1106418,Theory,Theory


In [87]:
import torch
import torch_geometric

import torch_geometric.nn as geom_nn
import torch.optim as optim

# Define the GCN architecture
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = geom_nn.GCNConv(input_dim, hidden_dim)
        self.conv2 = geom_nn.GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return torch.log_softmax(x, dim=1)

# Set up the model, optimizer, and loss function
input_dim = X_all.shape[1]
hidden_dim = 64
room_classes_names = ["Background", "Outdoor", "Wall", "Kitchen", "Living Room" ,"Bed Room", "Bath", "Entry", "Railing", "Storage", "Garage"]
output_dim = len(room_classes_names)
model = GCN(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Convert data to PyTorch tensors and create a torch_geometric.data.Data object
x = torch.tensor(X_all, dtype=torch.float)
y = torch.tensor(np.argmax(Y, axis=1), dtype=torch.long)
edge_index = torch.tensor(np.array([edges_df_list[0]['source'], edges_df_list[0]['target']]), dtype=torch.long)

num_nodes = x.shape[0]
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)

train_size = 140
val_size = 500
test_size = num_nodes - train_size - val_size

train_indices = torch.randperm(num_nodes)[:train_size]
val_indices = torch.randperm(num_nodes)[train_size:train_size + val_size]
test_indices = torch.randperm(num_nodes)[train_size + val_size:]

train_mask[train_indices] = True
val_mask[val_indices] = True
test_mask[test_indices] = True
data = torch_geometric.data.Data(x=x, y=y, edge_index=edge_index, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask)

# Train the model
num_epochs = 100
model.train()
for epoch in range(num_epochs):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = loss_fn(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    
# Set the model to evaluation mode
model.eval()

# Compute the accuracy on the test set
with torch.no_grad():
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum().item()
    accuracy = correct / data.test_mask.sum().item()

print("Test set accuracy: {:.4f}".format(accuracy))


ZeroDivisionError: division by zero

In [88]:
print("Length of train_indices:", len(train_indices))
print("Length of val_indices:", len(val_indices))
print("Length of test_indices:", len(test_indices))

print("Sum of train_mask:", train_mask.sum().item())
print("Sum of val_mask:", val_mask.sum().item())
print("Sum of test_mask:", test_mask.sum().item())


Length of train_indices: 140
Length of val_indices: 45
Length of test_indices: 0
Sum of train_mask: 140
Sum of val_mask: 45
Sum of test_mask: 0


In [89]:
train_size = 100
val_size = 50
test_size = num_nodes - train_size - val_size

train_indices = torch.randperm(num_nodes)[:train_size]
val_indices = torch.randperm(num_nodes)[train_size:train_size + val_size]
test_indices = torch.randperm(num_nodes)[train_size + val_size:]

train_mask[train_indices] = True
val_mask[val_indices] = True
test_mask[test_indices] = True
data = torch_geometric.data.Data(x=x, y=y, edge_index=edge_index, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask)

# Train the model
num_epochs = 100
model.train()
for epoch in range(num_epochs):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = loss_fn(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

# Set the model to evaluation mode
model.eval()

# Compute the accuracy on the test set
with torch.no_grad():
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum().item()
    accuracy = correct / data.test_mask.sum().item()

print("Test set accuracy: {:.4f}".format(accuracy))


Test set accuracy: 0.3143
