In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Purchase.csv")
df = df.drop('Third Party Account No', axis=1)
df.head(5)

Unnamed: 0,Date,Timestamp,Account No,Balance,Amount,Third Party Name
0,01/01/2023,00:00,355733816.0,526.0,-664.0,Halifax
1,01/01/2023,00:00,472213568.0,2412.0,-600.0,LBG
2,01/01/2023,00:00,624500124.0,2572.0,-654.0,LBG
3,01/01/2023,00:00,203466392.0,3935.66,-672.0,Halifax
4,01/01/2023,00:00,768271776.0,2208.0,-632.0,LBG


In [5]:
total_spent_per_account = df.groupby(['Account No', 'Third Party Name'])['Amount'].sum().reset_index()
final_balance_per_account = df.drop_duplicates('Account No', keep='last')[['Account No', 'Balance']]
total_consumption_per_account = df.groupby('Account No')['Amount'].sum().reset_index()


merged_data = pd.merge(total_spent_per_account, final_balance_per_account, on='Account No')
new = pd.merge(merged_data, total_consumption_per_account, on='Account No')

new['Amount_y'] = new['Amount_y'].abs()
new['Total Balance'] = new['Balance'] + new['Amount_y']
new['Spent Ratio'] = new['Amount_x'].abs() / new['Total Balance']
new['Purchase Percentage'] = new['Spent Ratio']

cleaned = new.drop('Balance', axis=1)
cleaned = cleaned.drop('Amount_y', axis=1)
cleaned = cleaned.drop('Spent Ratio', axis=1)
cleaned.rename(columns={'Amount_x': 'Amount'}, inplace=True)

account_mapping = {acc: f"c{i+1}" for i, acc in enumerate(df['Account No'].unique())}
cleaned['Customer'] = cleaned['Account No'].map(account_mapping)

unique_c = cleaned['Customer'].unique()
number_of_c = len(unique_c)

print(number_of_c)

cleaned.head(15)


976


Unnamed: 0,Account No,Third Party Name,Amount,Total Balance,Purchase Percentage,Customer
0,101531259.0,Brilliant Brushes,-76.45,17855.5521,0.004282,c417
1,101531259.0,Deliveroo,-32.69,17855.5521,0.001831,c417
2,101531259.0,Disney,-95.88,17855.5521,0.00537,c417
3,101531259.0,Fat Face,-47.5,17855.5521,0.00266,c417
4,101531259.0,Five Senses Art,-398.24,17855.5521,0.022303,c417
5,101531259.0,Grand Union BJJ,-1709.89,17855.5521,0.095762,c417
6,101531259.0,Head,-200.0,17855.5521,0.011201,c417
7,101531259.0,JustEat,-54.58,17855.5521,0.003057,c417
8,101531259.0,LBG,-10308.0,17855.5521,0.577299,c417
9,101531259.0,Matalan,-1595.5,17855.5521,0.089356,c417


In [7]:
top_100_customers = cleaned['Customer'].unique()[:100]
top_100_customers_data = cleaned[cleaned['Customer'].isin(top_100_customers)]

G = nx.DiGraph()

for idx, row in top_100_customers_data.iterrows():
    G.add_edge(row['Customer'], row['Third Party Name'], weight=row['Purchase Percentage'])

pos = {}
center = np.array([0.5, 0.5])  # 图的中心点
radius = 0.4  # 客户节点的布局半径
merchant_nodes = [node for node in G.nodes() if 'c' not in node]
customer_nodes = [node for node in G.nodes() if 'c' in node]

for i, merchant in enumerate(merchant_nodes):
    angle = 2 * np.pi * i / len(merchant_nodes)
    pos[merchant] = center + np.array([np.cos(angle), np.sin(angle)]) * 0.1  # 商户节点较为集中

for i, customer in enumerate(customer_nodes):
    angle = 2 * np.pi * i / len(customer_nodes)
    pos[customer] = center + np.array([np.cos(angle), np.sin(angle)]) * radius  # 客户节点在外圈   

edge_labels = nx.get_edge_attributes(G, 'weight') 

plt.figure(figsize=(24, 16))

node_colors = ['skyblue' if node.startswith('c') else 'lightgreen' for node in G.nodes()]


nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=300, font_size=5, edge_color='gray')

# 显示图形
plt.axis('off')  # 关闭坐标轴
plt.savefig('top_100_customers.png')
plt.close()
