<a href="https://colab.research.google.com/github/desstaw/PrivacyPreservingTechniques/blob/main/Differential_Privacy_sepsis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import random

import warnings
warnings.simplefilter('ignore')

**Explanation**

epsilon: smaller value of epsilon implies a higher level of privacy protection.

sensitivity:  the maximum amount that the query output can change if one record is added or removed from the dataset

Laplace ensures that the originial statistical data distribution is maintained when noise is added

In [None]:
# Load the original heart dataset
url = "https://raw.githubusercontent.com/desstaw/PrivacyPreservingTechniques/main/datasets/sepsis.csv"
df = pd.read_csv(url)
df = df[['Age', 'Gender', 'HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp', 'Hgb', 'WBC', 'Glucose', 'SepsisLabel']]

In [None]:
# First 20 rows before applying differential privacy
df.head(20)

Unnamed: 0,Age,Gender,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,Hgb,WBC,Glucose,SepsisLabel
0,50,1.0,72.115385,98.153846,36.607692,105.846154,76.807692,63.461538,20.416667,8.0,14.3,121.25,0.0
1,51,1.0,77.283333,96.758065,36.92,145.396552,102.275862,73.741379,15.071429,6.4,3.0,97.5,0.0
2,71,1.0,100.403846,94.788462,37.086538,130.4,87.16,65.88,19.384615,12.3,22.4,137.266667,0.0
3,51,0.0,76.315789,96.5,36.161538,153.763158,104.716216,74.236842,19.171053,9.75,7.15,103.944444,0.0
4,73,1.0,51.395349,95.469697,36.122222,133.131212,85.112903,69.474848,17.545455,12.4,7.85,96.0,0.0
5,61,0.0,65.954545,94.906977,37.157143,113.295455,81.090909,62.477273,14.068182,8.95,6.7,78.75,0.0
6,66,1.0,80.583333,97.166667,36.541667,101.194444,74.222222,59.527778,15.611111,10.85,19.3,133.0,0.0
7,43,1.0,112.033333,95.6,36.166667,117.733333,93.266667,81.766667,15.9,8.9,5.8,90.0,0.0
8,75,1.0,90.985714,96.4,37.484091,128.357143,88.114286,66.385714,19.382353,10.85,11.8,137.722222,0.0
9,85,0.0,81.22093,98.5,36.327273,106.697674,72.27907,52.755814,15.05,,,,0.0


In [None]:
epsilon = 1.0
sensitivity = 3.0

# Apply differential privacy to the 'age', 'trestbps', 'chol', 'thalach', and 'oldpeak' columns using Laplace noise
for col in ['Age', 'HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp', 'Hgb', 'WBC', 'Glucose']:
    df[col] += np.random.laplace(loc=0, scale=sensitivity/epsilon, size=len(df))

# Print the first five rows of the dataset after applying differential privacy
df.head(20)

Unnamed: 0,Age,Gender,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,Hgb,WBC,Glucose,SepsisLabel
0,56.197503,1.0,71.300669,106.806292,38.931659,107.181496,82.258447,64.75911,18.465508,14.885463,15.240731,130.278351,0.0
1,42.927833,1.0,77.356698,102.266719,40.425315,150.83255,103.599446,76.822468,9.21669,10.116294,4.654785,87.285464,0.0
2,70.876236,1.0,97.785296,95.995317,35.048311,125.941732,87.964145,67.718917,22.912978,13.574468,19.095773,129.852487,0.0
3,51.283425,0.0,81.467028,93.936542,34.338317,158.599318,105.675119,74.287765,15.050281,13.550529,7.0427,104.859353,0.0
4,73.13171,1.0,52.311809,95.336009,28.531191,137.282839,82.286591,70.744421,19.316056,11.608908,9.308719,94.494017,0.0
5,60.922652,0.0,69.405389,94.351298,40.268559,113.895151,88.324655,65.579065,12.053903,4.602403,9.3412,74.945166,0.0
6,67.801603,1.0,76.338369,98.543128,37.611621,101.542125,88.592295,64.77219,5.406476,12.868404,12.142114,132.029101,0.0
7,44.151069,1.0,110.24145,89.663514,37.729719,117.168522,101.78724,81.694571,17.251053,8.5073,8.257638,89.307099,0.0
8,75.290339,1.0,89.394463,103.234984,38.03325,119.89727,90.161507,64.51217,14.957278,4.908374,14.003864,131.55668,0.0
9,90.664913,0.0,85.988536,98.353124,36.524976,106.057536,74.620794,52.387365,14.087969,,,,0.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df.to_csv('/content/drive/MyDrive/Colab Notebooks/Sepsis/v3_sepsis.csv', index=False)