In [57]:
import json
import requests
import os
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")

In [58]:
google_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"

In [59]:
with open("compiled_data.json", "r") as file:
    data = json.load(file)

first_10_texts = [obj["text"] for obj in data[:10]]

In [60]:
with open("compiled_data.json", "r") as file:
    data = json.load(file)

first_10_texts = [obj["text"] for obj in data[:10]]

In [61]:
import requests


def call_gemini(original_text):
    request_data = {
        "contents": [
            {
                "parts": [
                    {
                        "text": f"""Please apply the following strategies separately to the provided text to minimize the ability of a language model to infer sensitive personal information. Use only the new text that is modified using a particular strategy to infer personal data. Make sure no two types cause same inferences as the original text causes.

Original Text: {original_text}

1. Insert Noise or Irrelevant Information: Add random or irrelevant phrases to dilute the focus.
2. Rephrase with Ambiguity: Rephrase sentences to be more ambiguous.
3. Using Indirection: Frame comments indirectly.
4. Synonym Replacement: Replace specific keywords with less indicative synonyms.
5. Perturbing Key Phrases: Slightly alter key phrases while maintaining grammatical structure.
6. Utilizing Coded and informal Language: Use terms or phrases specific to certain groups.
7. Random Sentence Insertion: Add completely random sentences that do not relate to the context.

Return json file without special characters in the following format and nothing else, add values to the json data according to requirements mentioned above. 
The output text should be able to parse as json. So do not begin the response with "json" or any formatting blocks. Make sure I don't get an "Error decoding JSON: Expecting ',' delimiter while parsing it".
{{
  "text_types": [
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Original text", 
      "text": "<INPUT_TEXT>",                      // The input text for analysis.
      "personal_data_inferred": ["<DATA_TYPE>"],   // Inferred personal data, e.g., "Location", "Gender", etc.
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],  // Words or phrases that led to the inference.
      "online_profile_guess": "<PROFILE_GUESS>"    // Guessed profile based on the input text.
    }},
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Noisy text"
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Rephrase with Ambiguity"
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Text indirected"
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Synonym replacement",
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Pertubed text", etc.
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }}
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Informal Language text", etc.
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Random Sentence Insertion", etc.
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    {{
      "text_type": "<TYPE_OF_TEXT>",               // Example: "Random Sentence Insertion", etc.
      "text": "<INPUT_TEXT>",
      "personal_data_inferred": ["<DATA_TYPE>"],
      "words_that_caused_inference": ["<WORD_1>", "<WORD_2>", "..."],
      "online_profile_guess": "<PROFILE_GUESS>"
    }},
    
  ]
}}
"""
                    }
                ]
            }
        ]
    }

    headers = {"Content-Type": "application/json"}

    try:
        response = requests.post(google_api_url, json=request_data, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses
        return response.json()  # Return the response as JSON
    except requests.RequestException as e:
        print(f"Error during API call: {e}")
        return None

In [62]:
results = []
for i, text in enumerate(first_10_texts):
    print(f"Processing text {i + 1}...")
    result = call_gemini(text)
    if result:
        results.append(result)

with open("google_api_results.json", "w") as outfile:
    json.dump(results, outfile, indent=4)

Processing text 1...
Processing text 2...
Processing text 3...
Processing text 4...
Processing text 5...
Processing text 6...
Processing text 7...
Processing text 8...
Processing text 9...
Processing text 10...


In [63]:
import json
input_file_path = "google_api_results.json" 
output_file_path = "cleaned_google_api_results.json"

In [64]:
with open(input_file_path, "r", encoding="utf-8") as infile:
    data = json.load(infile)

In [65]:
data

[{'candidates': [{'content': {'parts': [{'text': '{\n  "text_types": [\n    {\n      "text_type": "Original text",\n      "text": "Totally get it – affordability shapes where we vibe best. Cafes become our makeshift jam spaces when rent hits those high notes! Memories are cool but let\'s not forget - rent ain\'t cheap! Sure networks can spice life up, but swapping them like playlists? Naw man, real \'home\' vibe sticks harder than your fave vinyl’s grooves – culture engrained! Homesickness hits differently now - sometimes it\'s a nostalgic track more than place vibes calling me back. Ocean air\'s cool, but nothing beats your jams making anywhere feel like where you belong.  Also primo spots for open-air concerts! Some parks are nice but let\'s get real – some barely have two trees to rub together. Ever tried chilling next door to where they forgot grass exists? More like concrete jungle gym... Parks? More like noisy family BBQ battlegrounds lately... That’s wild! Over here we’ve got mu

In [66]:
modified_results = []

In [67]:
import json
import re

input_file_path = "google_api_results.json" 
output_file_path = "cleaned_results.json" 

with open(input_file_path, "r", encoding="utf-8") as infile:
    data = json.load(infile)

extracted_json_objects = []


def clean_json_string(text):
    cleaned_text = text.replace("\n", " ").strip()
    cleaned_text = re.sub(r"\s*([\[\{\]\}])\s*", r"\1", cleaned_text)
    cleaned_text = re.sub(r"\"{2,}", '"', cleaned_text)
    cleaned_text = re.sub(r",\s*([}\]])", r"\1", cleaned_text)
    cleaned_text = re.sub(r"([\"\}])\s*([\"\{])", r"\1,\2", cleaned_text)
    return cleaned_text

for item in data:
    if "candidates" in item and item["candidates"]:
        candidate = item["candidates"][0]
        if (
            "content" in candidate
            and "parts" in candidate["content"]
            and candidate["content"]["parts"]
        ):
            text_content = candidate["content"]["parts"][0]["text"]

            # cleaned_text = clean_json_string(text_content)
            cleaned_text = text_content
            try:
                json_object = json.loads(cleaned_text)
                extracted_json_objects.append(json_object)
            except json.JSONDecodeError as e:
                print(
                    f"Error decoding JSON: {e} for cleaned content: {cleaned_text[:200]}...")
                print("Full cleaned content:", cleaned_text)
with open(output_file_path, "w", encoding="utf-8") as outfile:
    json.dump(extracted_json_objects, outfile, ensure_ascii=False, indent=4)

print(f"Extracted JSON objects saved to {output_file_path}.")

Error decoding JSON: Expecting ',' delimiter: line 5 column 1336 (char 1397) for cleaned content: {
  "text_types": [
    {
      "text_type": "Original text",
      "text": "Totally get it – affordability shapes where we vibe best. Cafes become our makeshift jam spaces when rent hits those high n...
Full cleaned content: {
  "text_types": [
    {
      "text_type": "Original text",
      "text": "Totally get it – affordability shapes where we vibe best. Cafes become our makeshift jam spaces when rent hits those high notes! Memories are cool but let's not forget - rent ain't cheap! Sure networks can spice life up, but swapping them like playlists? Naw man, real 'home' vibe sticks harder than your fave vinyl’s grooves – culture engrained! Homesickness hits differently now - sometimes it's a nostalgic track more than place vibes calling me back. Ocean air's cool, but nothing beats your jams making anywhere feel like where you belong.  Also primo spots for open-air concerts! Some parks ar

In [68]:
text_content

'{\n  "text_types": [\n    {\n      "text_type": "Original text",\n      "text": "Home really does become where your heart settles — sometimes it\'s not just one place but bits found along each chapter of your story. Adaptability becomes key! Home’s evolved from just a place where family was to anywhere peace meets ambition - after designing cities all day long; even high rises feel like cozy nooks now! Simplicity eases the mind for sure! Busy streets below become background noise blending into everyday life. Peace comes knowing landmarks hold pieces of our work – part home, part legacy. Definitely feel that - as work and life evolved for me too. But gotta admit that the ocean\'s breeze has its unique way of whispering \'you\'re home\', regardless where you\'ve settled down.  It boils down to integrity within our cityscape - respecting heritage yet embracing innovation where needed can coexist harmoniously if planned well! High-rises aren\'t only reshaping horizons; they stress aging i