# Credit Card Fraud detection Using the Page Rank Algorithm

Step 1: Import the required libraries

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

Step 2: Load and preprocess the dataset

In [None]:
df = pd.read_csv("C:\\Users\\Administrator\\Desktop\\Wangari Muchai\\UON\\Matrix Computations\\Data Sets\\Credit card Transactions2.csv")
df.head()
# Perform necessary preprocessing steps
# Handle missing values
df = df.dropna()

# Scale numerical features
numeric_features = ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
                    'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
                    'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
df[numeric_features] = df[numeric_features].apply(lambda x: (x - x.mean()) / x.std())

# Ensure the dataset is in a suitable format for analysis
# Convert 'Class' column to integer type if necessary
df['Class'] = df['Class'].astype(int)

# scaling numerical features, encoding categorical features, etc.
# Ensure the dataset is in a suitable format for analysis.
## Structure of the dataset
df.info()

Step 3: Define the function to check shared features or relationships

In [None]:
def share_common_features(df, transaction_id1, transaction_id2):
    # Extract the relevant columns for comparison
    transaction1 = df.loc[transaction_id1, ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
                                           'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
                                           'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount', 'Class']]
    transaction2 = df.loc[transaction_id2, ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
                                           'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
                                           'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount', 'Class']]

    # Implement your logic to compare the features or detect relationships
    # based on the specific attributes of the transactions

    # Example: Compare the Amounts of the transactions
    if transaction1['Amount'] == transaction2['Amount']:
        return True
    else:
        return False


Step 4: Construct the network graph and visualize it

In [None]:
G = nx.Graph()
transaction_ids = df.index.unique()
G.add_nodes_from(transaction_ids)

for i, transaction_id1 in enumerate(transaction_ids):
    for j in range(i + 1, len(transaction_ids)):
        transaction_id2 = transaction_ids[j]

        # Check if transactions share common features or relationships
        if share_common_features(df, transaction_id1, transaction_id2):
            G.add_edge(transaction_id1, transaction_id2)

# Visualize the network graph
pos = nx.spring_layout(G)  # Layout for node positioning
nx.draw_networkx(G, pos, with_labels=True, node_size=100, font_size=8)
plt.title("Network Graph of Credit Card Transactions")
plt.show()


Step 5: Calculate PageRank scores

In [None]:
pagerank_scores = nx.pagerank(G)


Step 6: Set a threshold to classify transactions as potentially fraudulent or not

In [None]:
threshold = 0.5

Step 7: Identify potential fraudulent transactions based on their PageRank scores

In [None]:
fraudulent_transactions = [
    transaction_id
    for transaction_id, score in pagerank_scores.items()
    if score > threshold
]

Step 8: Display the potential fraudulent transactions

In [None]:
print("Potential fraudulent transactions:")
for transaction_id in fraudulent_transactions:
    print(transaction_id)

Step 9: Evaluate the performance of fraud detection

In [None]:
# Assuming you have ground truth labels for the transactions in the dataset
ground_truth_labels = df['Class']

# Create a binary classification label based on the threshold
predicted_labels = [1 if transaction_id in fraudulent_transactions else 0 for transaction_id in transaction_ids]

# Calculate confusion matrix and classification report
cm = confusion_matrix(ground_truth_labels, predicted_labels)
cr = classification_report(ground_truth_labels, predicted_labels)

print("Confusion Matrix:")
print(cm)
print("\nClassification Report:")
print(cr)
