<a href="https://colab.research.google.com/github/ajaykgautam/PNamePid/blob/main/facebook_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt


In [18]:
# Task 1: Graph Representation
def load_graph_from_file(file_path):
    # Read the text file and create a graph
    #file_path = "https://drive.google.com/file/d/1IbpWtIMyYxSUnbdb4-g_Gjb_jAKrYYBt/view?usp=drive_link"
    G = nx.Graph()
    with open(file_path, 'r') as file:
        for line in file:
            if line.strip():  # Skip empty lines
                node1, node2 = line.strip().split()  # Assuming each line contains two node IDs
                G.add_edge(node1, node2)
    return G



In [12]:
# Task 2: Feature Engineering
def calculate_node_features(G):
    # Example: Calculate degree centrality and clustering coefficient as features
    degree_centrality = dict(nx.degree_centrality(G))
    clustering_coefficient = dict(nx.clustering(G))
    node_features = pd.DataFrame({'degree_centrality': degree_centrality, 'clustering_coefficient': clustering_coefficient})
    return node_features.fillna(0)  # Handle missing values if any



In [13]:
# Task 3: Data Preparation
def prepare_data_for_clustering(node_features):
    # Scale features for clustering
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(node_features)
    return scaled_features


In [14]:
# Task 5: Clustering
def apply_clustering(features, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(features)
    return cluster_labels



In [15]:
# Task 7: Interpretation and Analysis
def visualize_clusters(G, cluster_labels):
    pos = nx.spring_layout(G)  # Position nodes using a spring layout
    plt.figure(figsize=(10, 8))
    nx.draw_networkx_nodes(G, pos, node_color=cluster_labels, cmap=plt.cm.Set1, node_size=300)
    nx.draw_networkx_edges(G, pos, alpha=0.5)
    plt.title('Graph Clustering')
    plt.colorbar(label='Cluster')
    plt.show()



In [33]:
# Example usage:
if __name__ == "__main__":
    # Task 1: Load graph from file
    import os
    from google.colab import drive
    drive.mount("/content/mydata/facebook_combined")
    #file_path = 'https://drive.google.com/file/d/1IbpWtIMyYxSUnbdb4-g_Gjb_jAKrYYBt/view?usp=drive_link'
    file_path = '/content/mydata/facebook_combined.txt'
    graph = load_graph_from_file(file_path)

    # Task 2: Calculate node features
    node_features = calculate_node_features(graph)

    # Task 3: Prepare data for clustering
    features = prepare_data_for_clustering(node_features)

    # Task 5: Clustering
    num_clusters = 3
    cluster_labels = apply_clustering(features, num_clusters)

    # Task 7: Interpretation and Analysis
    visualize_clusters(graph, cluster_labels)

    # Optional: Evaluate clustering performance
    silhouette = silhouette_score(features, cluster_labels)
    print(f"Silhouette Score: {silhouette}")

ValueError: Mountpoint must be in a directory that exists