<a href="https://colab.research.google.com/github/hsabaghpour/Telecom-Churn-GraphGNN-XGBoost/blob/main/telecom_customer_churn_prediction_using_graph_based_solutions_and_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install spektral xgboost tensorflow

In [None]:
import numpy as np
import tensorflow as tf
from spektral.layers import GraphConv
from spektral.data import Dataset, Graph
from spektral.data.loaders import SingleLoader
from sklearn.model_selection import train_test_split
import xgboost as xgb

# Example data
# Node features: customer features (e.g., usage patterns, demographics)
X = np.random.rand(1000, 10)  # 1000 customers, 10 features each

# Edges (graph connectivity between customers based on network interactions)
A = np.random.rand(1000, 1000)
A[A < 0.95] = 0  # Sparsify the adjacency matrix (threshold for connectivity)

# Labels (churn or not churn)
y = np.random.randint(0, 2, 1000)

In [None]:
class TelecomCommunityGNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.graph_conv1 = GraphConv(16, activation='relu')
        self.graph_conv2 = GraphConv(16, activation='relu')
        self.graph_conv3 = GraphConv(8, activation='relu')
        self.dense = tf.keras.layers.Dense(2, activation='softmax')  # Output layer

    def call(self, inputs):
        x, a = inputs  # x: node features, a: adjacency matrix
        x = self.graph_conv1([x, a])
        x = self.graph_conv2([x, a])
        x = self.graph_conv3([x, a])
        return self.dense(x)

# Prepare adjacency matrix and features
adjacency_matrix = A
features = X
labels = y

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Initialize GNN model
gnn_model = TelecomCommunityGNN()

# Compile the GNN model
gnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train GNN model
gnn_model.fit([X_train, adjacency_matrix], y_train, epochs=20, batch_size=32, validation_split=0.1)

In [None]:
# Extract node embeddings (community representation)
embeddings = gnn_model.predict([X, A])

# Combine GNN embeddings with original features for XGBoost input
X_combined = np.concatenate([X, embeddings], axis=1)

In [None]:
# Split data for XGBoost
X_train_combined, X_test_combined, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=6)
xgb_model.fit(X_train_combined, y_train)

# Make predictions
y_pred = xgb_model.predict(X_test_combined)

# Evaluate model
from sklearn.metrics import accuracy_score, classification_report
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred)}")