# Finanicial Data Anonymization

In [1]:
!pip install anonypy
!pip install python-dp
!pip install flwr
!pip install Pyfhel

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting anonypy
  Downloading anonypy-0.1.7-py3-none-any.whl (10 kB)
Installing collected packages: anonypy
Successfully installed anonypy-0.1.7
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-dp
  Downloading python_dp-1.1.1-cp39-cp39-manylinux1_x86_64.whl (3.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-dp
Successfully installed python-dp-1.1.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flwr
  Downloading flwr-1.3.0-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting iterators<0.0.3,>=0.0.2
  Downloading iterators-0.0.2-py3-n

In [3]:
!pip install opacus

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting opacus
  Downloading opacus-1.4.0-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.8/224.8 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: opacus
Successfully installed opacus-1.4.0


In [4]:
import pandas as pd
import anonypy
import opacus
import flwr
# import pyfhel
import numpy as np

In [None]:
def k_anonymize(df, feature_columns, sensitive_column):
  p = anonypy.Preserver(df, feature_columns, sensitive_column)
  rows = p.anonymize_k_anonymity(k=2)

  dfn = pd.DataFrame(rows)
  return dfn

In [None]:
def l_diversify(df, feature_column, sensitive_column):
  return df_diverse

In [None]:
# Load the financial data from CSV
df = pd.read_csv('financial_data.csv')
feature_columns = ['Age', 'Income', 'Expenses', 'Balance']
sensitive_column = 'Account Number'

# Perform K-anonymity on the data
# df_anon = anonypy.anonymize(df, {'Age': 3, 'Income': 2})
dfn = k_anonymize(df, feature_columns, sensitive_column)

# Perform L-diversity on the data
df_diverse = l_diversify(df, feature_columns, sensitive_column)

## Differential Privacy

In [None]:
# Perform differential privacy on the data
epsilon = 1.0
delta = 1e-5
bounds = [(df_diverse['Age'].min(), df_diverse['Age'].max()),
          (df_diverse['Income'].min(), df_diverse['Income'].max()),
          (df_diverse['Expenses'].min(), df_diverse['Expenses'].max()),
          (df_diverse['Balance'].min(), df_diverse['Balance'].max())]

privacy_engine = opacus.PrivacyEngine(epsilon=epsilon, delta=delta)
privacy_engine.attach(optimizer)

In [None]:
# Define the client model
class FinancialClient(fl.client.NumPyClient):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def get_parameters(self):
        return client_model.get_weights()

    def set_parameters(self, parameters):
        client_model.set_weights(parameters)

    def fit(self, parameters, config):
        client_model.set_weights(parameters)

        # Train the client model with differential privacy
        privacy_engine.attach(client_optimizer)

        # Train the model
        client_model.compile(optimizer=client_optimizer, loss=client_loss)
        client_model.fit(self.X, self.y, epochs=10, batch_size=32, verbose=0)

        # Return the trained weights
        return client_model.get_weights(), len(self.X), {}

# Define the FLWR server
class FinancialServer(fl.server.Server):
    def __init__(self):
        super().__init__()
        self.server_model = server_model
        self.server_optimizer = server_optimizer
        self.server_loss = server_loss

    def evaluate(self, parameters):
        self.server_model.set_weights(parameters)
        loss = self.server_model.evaluate(self.X, self.y)
        return loss, len(self.X)

In [None]:
# Start the FLWR server
fl.server.start_server('[::]:8080', config={"num_rounds": 10})

# Connect to the FLWR server
fl.client.start_numpy_client("[::]:8080", client=FinancialClient(X, y))

# Decrypt the model coefficients
model_weights = client_model.get_weights()
model_weights_decrypted = [decrypt(ctxt) for ctxt in model_weights]

# Save the model coefficients to a file
with open('model_weights.txt', 'w') as f:
    for weight in model_weights_decrypted:
        f.write(str(weight) + '\n')

## Display the Model Statistics

In [None]:
import matplotlib.pyplot as plt

# Display statistics results
print('K-anonymity:')
print(data[quasi_identifiers].value_counts())
print()

print('L-diversity:')
print(data.groupby(quasi_identifiers)[sensitive_column].nunique())
print()

print('Differential privacy:')
print('Mean income:', data['Income'].mean())
print('Mean expenses:', data['Expenses'].mean())
print()

# Display model coefficients graph
model_coef = client.fit([public_key.serialize()], {})
plt.bar(range(len(model_coef)), model_coef)
plt.xlabel('Coefficient')
plt.ylabel('Value')
plt.title('Model coefficients')
plt.show()