In [1]:
from ollama import chat
from ollama import ChatResponse

In [None]:
!ollama run deepseek-r1:1.5b

In [69]:
models = ["llama3.2", "phi4", "deepseek-r1:1.5b", "phi4-mini"]
model = models[3]

In [70]:
sensitive_data_prompt_template = 'Given a text containing sensitive information, return a comma-separated list of the sensitive information, the list should only contain single words, no phrases. This is the text: '

In [61]:
text = "Elias Strauss, a 24-year-old software engineer from Springfield, IL, recently purchased a one-year web hosting plan along with a domain registration and an SSL certificate for his new online portfolio. Using his Visa card ending in 3456, he made a payment of $233.79 on March 11, 2025. The invoice, numbered INV-2025-00458, was issued by WebServices Inc., confirming the transaction. Elias received a confirmation email at elias.strauss@gmail.com and was advised that his services would be active until March 2026. If he has any concerns, he can contact customer support at (800) 555-0199."

In [71]:
sensitive_data_prompt = sensitive_data_prompt_template + text
sensitive_data_prompt

'Given a text containing sensitive information, return a comma-separated list of the sensitive information, the list should only contain single words, no phrases. This is the text: Elias Strauss, a 24-year-old software engineer from Springfield, IL, recently purchased a one-year web hosting plan along with a domain registration and an SSL certificate for his new online portfolio. Using his Visa card ending in 3456, he made a payment of $233.79 on March 11, 2025. The invoice, numbered INV-2025-00458, was issued by WebServices Inc., confirming the transaction. Elias received a confirmation email at elias.strauss@gmail.com and was advised that his services would be active until March 2026. If he has any concerns, he can contact customer support at (800) 555-0199.'

In [73]:
print(model)
sensitive_data_response: ChatResponse = chat(model=model, messages=[
  {
    'role': 'user',
    'content': sensitive_data_prompt
  },
])
print(sensitive_data_response.message.content)

phi4-mini
Elias Strauss, Visa card ending in 3456, $233.79, INV-2025-00458, elias.strauss@gmail.com, WebServices Inc., March 11, 2025, March 2026, (800) 555-0199


In [74]:
sensitive_data = sensitive_data_response.message.content
sensitive_data

'Elias Strauss, Visa card ending in 3456, $233.79, INV-2025-00458, elias.strauss@gmail.com, WebServices Inc., March 11, 2025, March 2026, (800) 555-0199'

In [81]:
place_holder_prompt = "You get a comma-separated list of attributes and your task is to find a generic, descriptive, short place holder for each element. Return a JSON formatted key-value pairs, where the key is the original element. This is the list: " + sensitive_data

In [83]:
place_holder_response: ChatResponse = chat(model=model, messages=[
  {
    'role': 'user',
    'content': place_holder_prompt
  },
])
print(place_holder_response.message.content)

```json
{
    "Elias Strauss": "Owner/Employee",
    "Visa card ending in 3456": "Payment Method/Credit Card Info",
    "$233.79": "Transaction Amount/Price Paid",
    "INV-2025-00458": "Invoice ID/SKU Number",
    "elias.strauss@gmail.com": "Email Address/UserID@Domain.Com",
    "WebServices Inc.": "Company/Organization Name",
    "March 11, 2025": "Date of Event/Event Date",
    "March 2026": "Future Scheduled Date/Future Plan/Schedule",
    "(800) 555-0199": "Phone Number/TelNumber"
}
```


In [104]:
place_holders = str(place_holder_response.message.content)
if place_holders[:7] == '```json' and place_holders[-3:] == '```':
  place_holders = place_holders[7:-3]
place_holders

'\n{\n    "Elias Strauss": "Owner/Employee",\n    "Visa card ending in 3456": "Payment Method/Credit Card Info",\n    "$233.79": "Transaction Amount/Price Paid",\n    "INV-2025-00458": "Invoice ID/SKU Number",\n    "elias.strauss@gmail.com": "Email Address/UserID@Domain.Com",\n    "WebServices Inc.": "Company/Organization Name",\n    "March 11, 2025": "Date of Event/Event Date",\n    "March 2026": "Future Scheduled Date/Future Plan/Schedule",\n    "(800) 555-0199": "Phone Number/TelNumber"\n}\n'

In [108]:
import json
import re

In [109]:
place_holders_dict = json.loads(place_holders)
place_holders_dict

{'Elias Strauss': 'Owner/Employee',
 'Visa card ending in 3456': 'Payment Method/Credit Card Info',
 '$233.79': 'Transaction Amount/Price Paid',
 'INV-2025-00458': 'Invoice ID/SKU Number',
 'elias.strauss@gmail.com': 'Email Address/UserID@Domain.Com',
 'WebServices Inc.': 'Company/Organization Name',
 'March 11, 2025': 'Date of Event/Event Date',
 'March 2026': 'Future Scheduled Date/Future Plan/Schedule',
 '(800) 555-0199': 'Phone Number/TelNumber'}

In [110]:
pattern = re.compile("|".join(map(re.escape, place_holders_dict.keys())))
new_text =  pattern.sub(lambda match: place_holders_dict[match.group(0)], text)

In [111]:
new_text

'Owner/Employee, a 24-year-old software engineer from Springfield, IL, recently purchased a one-year web hosting plan along with a domain registration and an SSL certificate for his new online portfolio. Using his Payment Method/Credit Card Info, he made a payment of Transaction Amount/Price Paid on Date of Event/Event Date. The invoice, numbered Invoice ID/SKU Number, was issued by Company/Organization Name, confirming the transaction. Elias received a confirmation email at Email Address/UserID@Domain.Com and was advised that his services would be active until Future Scheduled Date/Future Plan/Schedule. If he has any concerns, he can contact customer support at Phone Number/TelNumber.'

In [103]:
place_holders[:7] == '```json'

True