In [1]:
pip install networkx




In [2]:
pip install tensorflow




In [3]:
pip install pandas




In [4]:
pip install numpy




In [5]:
pip install scikit-learn




In [6]:
pip install matplotlib




In [7]:
pip install seaborn




In [6]:
import networkx as nx
import pandas as pd
import numpy as np
import random

def generate_graph(min_nodes, max_nodes, p, min_clique_size, max_clique_size):
    n = random.randint(min_nodes, max_nodes)  # Randomly choose the number of nodes
    clique_size = random.randint(min_clique_size, min(max_clique_size, n))  # Random clique size that's possible
    G = nx.gnp_random_graph(n, p)

    add_clique = random.choice([True, False])  # Randomly decide whether to add a clique
    if add_clique:
        clique_nodes = random.sample(list(G.nodes()), clique_size)
        for i in range(clique_size):
            for j in range(i + 1, clique_size):
                G.add_edge(clique_nodes[i], clique_nodes[j])

    # Check if a clique of the given size exists
    has_clique = any(len(clique) >= clique_size for clique in nx.find_cliques(G))
    edge_list = G.edges()
    return G, has_clique, n, edge_list, clique_size

def graph_to_adj_matrix(G, n):
    """Convert graph to an adjacency matrix of size n x n."""
    return nx.to_numpy_array(G, nodelist=range(n))

# Parameters
min_nodes = 5
max_nodes = 15
edge_prob = 0.3
min_clique_size = 3
max_clique_size = 6
num_graphs = 1000  # Number of graphs to generate

# Store graph data
data = []

for _ in range(num_graphs):
    G, has_clique, num_nodes, edge_list, clique_size = generate_graph(min_nodes, max_nodes, edge_prob, min_clique_size, max_clique_size)
    adj_matrix = graph_to_adj_matrix(G, num_nodes)
    edge_list_str = ';'.join(f"{u}-{v}" for u, v in edge_list)  # Convert edge list to a semicolon-separated string
    data.append([num_nodes, clique_size, edge_list_str, adj_matrix.flatten(), has_clique])

# Convert to DataFrame and save to CSV
df = pd.DataFrame(data, columns=['Num_Nodes', 'Clique_Size', 'Edge_List', 'Adjacency_Matrix', 'Has_Clique'])
df['Adjacency_Matrix'] = df['Adjacency_Matrix'].apply(lambda x: ','.join(map(str, x.astype(int))))
df.to_csv('graph_data.csv', index=False)

print("Data saved to 'graph_data.csv'.")


Data saved to 'graph_data.csv'.


In [22]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Concatenate
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('graph_data.csv')

# Data Integrity Checks
df['Adjacency_Matrix'] = df['Adjacency_Matrix'].astype(str)
if 'Edge_List' in df.columns:
    df['Edge_List'] = df['Edge_List'].astype(str)

# Function to convert adjacency matrix string to a padded numpy array
def convert_and_pad(matrix_str, max_nodes):
    matrix = np.array(list(map(int, matrix_str.split(','))))
    size = int(np.sqrt(len(matrix)))  # Infer the size of the matrix
    padded_matrix = np.zeros((max_nodes, max_nodes), dtype=int)
    padded_matrix[:size, :size] = matrix.reshape(size, size)
    return padded_matrix

# Find the maximum size of the graph (i.e., the largest adjacency matrix)
max_nodes = max(int(np.sqrt(len(item))) for item in df['Adjacency_Matrix'])

# Apply conversion and padding
df['Adjacency_Matrix'] = df['Adjacency_Matrix'].apply(lambda x: convert_and_pad(x, max_nodes))

# Extract features and labels
X = np.stack(df['Adjacency_Matrix'].values)  # Features for adjacency matrices
y = df['Has_Clique'].values  # Labels

# Reshape X to include a channel dimension (required for Conv2D)
X = X.reshape(-1, max_nodes, max_nodes, 1)

# Process additional numeric features
numeric_features = df[['Num_Nodes', 'Clique_Size']].values
edge_features = df['Edge_List'].apply(lambda x: len(x.split(';'))).values.reshape(-1, 1)

# Normalize numeric and edge features
scaler = StandardScaler()
numeric_features = scaler.fit_transform(numeric_features)
edge_features = scaler.fit_transform(edge_features)

# Combine all additional features
additional_features = np.hstack([numeric_features, edge_features])

# Split data into training and test sets
X_train, X_test, X_train_af, X_test_af, y_train, y_test = train_test_split(X, additional_features, y, test_size=0.2, random_state=42)

# Define a multi-input model
input_graph = Input(shape=(max_nodes, max_nodes, 1))
x_graph = Conv2D(10, (3, 3), activation='relu')(input_graph)
x_graph = MaxPooling2D((2, 2))(x_graph)
x_graph = Flatten()(x_graph)

input_features = Input(shape=(additional_features.shape[1],))
combined = Concatenate()([x_graph, input_features])
x = Dense(64, activation='relu')(combined)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=[input_graph, input_features], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit([X_train, X_train_af], y_train, epochs=100, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate([X_test, X_test_af], y_test, verbose=2)
print(f'Test accuracy: {test_acc:.4f}')


Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.5549 - loss: 0.6613 - val_accuracy: 0.6875 - val_loss: 0.5821
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7612 - loss: 0.5832 - val_accuracy: 0.8313 - val_loss: 0.5347
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8578 - loss: 0.4968 - val_accuracy: 0.8375 - val_loss: 0.4077
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8869 - loss: 0.3560 - val_accuracy: 0.8750 - val_loss: 0.3325
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8826 - loss: 0.3062 - val_accuracy: 0.8438 - val_loss: 0.3753
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8939 - loss: 0.2535 - val_accuracy: 0.8625 - val_loss: 0.3031
Epoch 7/100
[1m20/20[0m [32m━━

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 6.6186e-04 - val_accuracy: 0.8875 - val_loss: 0.7048
Epoch 52/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 6.9522e-04 - val_accuracy: 0.8875 - val_loss: 0.7121
Epoch 53/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 6.0848e-04 - val_accuracy: 0.8875 - val_loss: 0.7213
Epoch 54/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 5.0416e-04 - val_accuracy: 0.8875 - val_loss: 0.7200
Epoch 55/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 3.7300e-04 - val_accuracy: 0.8875 - val_loss: 0.7333
Epoch 56/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 5.8234e-04 - val_accuracy: 0.8875 - val_loss: 0.7239
Epoch 57/100
[

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 1.1187e-04 - val_accuracy: 0.8875 - val_loss: 0.8538
7/7 - 0s - 31ms/step - accuracy: 0.8450 - loss: 0.9330
Test accuracy: 0.8450


In [80]:
import numpy as np
import networkx as nx
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

def create_and_prepare_graph(num_nodes, edge_prob, max_nodes, scaler):
    """Generate a random graph, return its padded adjacency matrix and scaled features."""
    G = nx.gnp_random_graph(num_nodes, edge_prob)
    adjacency_matrix = nx.to_numpy_array(G)
    padded_matrix = np.zeros((max_nodes, max_nodes))
    padded_matrix[:num_nodes, :num_nodes] = adjacency_matrix

    # Example features: number of nodes, clique size, and assuming edge count as a third feature
    edge_count = G.number_of_edges()  # Calculate number of edges as a feature
    num_features = np.array([[num_nodes, 4, edge_count]])  # Include all features used in training

    # Scale the features
    num_features_scaled = scaler.transform(num_features)

    return padded_matrix.reshape(1, max_nodes, max_nodes, 1), num_features_scaled

# Correct setting of max_nodes and loading the scaler
max_nodes = 21  # Set this to match the model's expected input size for adjacency matrices
scaler = StandardScaler()  # Assuming the scaler is pre-fitted with the training data setup
# Example fitting for the scaler (replace with your actual fitting from training data)
scaler.fit([[10, 3, 15], [12, 4, 20]])  # Example fitting: Num_Nodes, Clique_Size, Edge_Count

# Create a new graph for testing
test_adj_matrix, test_features_scaled = create_and_prepare_graph(10, 0.5, max_nodes, scaler)



# Predict using the model
prediction = model.predict([test_adj_matrix, test_features_scaled])
predicted_label = (prediction > 0.5).astype(int)

print("Predicted Probability:", prediction.flatten()[0])
print("Predicted Label:", 'Contains a Clique' if predicted_label[0][0] == 1 else 'Does Not Contain a Clique')


Predicted Probability: 1.0
Predicted Label: Contains a Clique
