In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

data = pd.read_csv("financial_anomaly_data.csv")  
data = data.dropna()  
numeric_data = data.select_dtypes(include=['number'])

scaler = StandardScaler()
scaled_data = scaler.fit_transform(numeric_data)

k = 4  
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(scaled_data)

data['Cluster'] = kmeans.labels_
cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)  

distances = []
for i in range(len(data)):
    point = scaled_data[i]
    cluster = kmeans.labels_[i]
    center = cluster_centers[cluster]
    distance = ((point - center) ** 2).sum() ** 0.5
    distances.append(distance)

data['Distance'] = distances


anomalies = data.sort_values(by='Distance', ascending=False).head(10)
print("Anomalies Detected",len(anomalies))
print(anomalies)




Anomalies Detected 10
               Timestamp TransactionID AccountID    Amount   Merchant  \
194474  16-05-2023 09:14         TXN52      ACC5  74772.64  MerchantD   
88262   03-03-2023 15:02        TXN797     ACC12  74772.89  MerchantJ   
46171   02-02-2023 09:31       TXN1502      ACC2  74773.12  MerchantB   
53276   07-02-2023 07:56       TXN1887      ACC4  74773.63  MerchantI   
215903  31-05-2023 06:23       TXN1473      ACC4  74773.66  MerchantB   
190925  13-05-2023 22:05        TXN195     ACC12  74773.73  MerchantE   
121187  26-03-2023 11:47       TXN1360     ACC13  74775.08  MerchantE   
201579  21-05-2023 07:39       TXN1257      ACC7  74775.19  MerchantI   
66609   16-02-2023 14:09        TXN118     ACC13  74775.68  MerchantG   
103240  14-03-2023 00:40        TXN939     ACC11  74775.97  MerchantF   

       TransactionType       Location  Cluster      Distance  
194474      Withdrawal  San Francisco        2  87430.387111  
88262       Withdrawal       New York        2  