### Dependencies

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
root_path = r'C:\Users\jesus\Coding-Projects\gemini_table_enricher'
# Change working directory to the parent directory
import os

os.chdir(root_path)

from table_enrichment_tool.enrichment import enrich_table
from table_enrichment_tool.gemini_api import available_models
from table_enrichment_tool.preprocessing import anonymize_rows, de_anonymize_rows

### Available Models on Gemini API

In [None]:
available_models()

### Anonymize Personal Data

It is good practice to not submit any personal information to a large language model (LLM) online. Ensure you anonymize your data before sending it to the Gemini API. Use the provided functions to generate unique IDs and remove personal information fields from your data.

In [None]:
SEED = "4hf7ekdl*djkf$"
PERSONAL_INFO_FIELDS = ['First Name', 'Last Name', 'Email', 'LinkedIn']
FIELDS_FOR_ID = ['First Name', 'Email']  # Fields to be used for ID generation

# File paths
input_file_path = r'data\destinations.csv'
output_file_path = r'data\anonymized_destinations.csv'

# Process the contacts
anonymize_rows(input_file_path, output_file_path, SEED, PERSONAL_INFO_FIELDS, FIELDS_FOR_ID, prefix="CON")

### Define Task

In [None]:
from table_enrichment_tool.steps import scrape_url_content, read_csv

# Define the steps to apply to each row, to enrich the external data fed to the LLM
steps = [
    {
        'function': scrape_url_content,
        'params': {
            'urls': 'row["Travel Website"]'
        }
    }
]

# Define file paths
csv_path = r'data\anonymized_destinations.csv'
output_path = r'data\anonymized_destinations_enriched.csv'

# Fields to be populated by the LLM
fields_dict = {
    "Weather": "Type:String under 400 characcters, Provide the typical weather for the contacts specified country and time of year they are visiting",
    
    "Cost of Living": "Type:String - max 1 sentence, Provide an estimate of the cost of living in the specified country",
    
    "Recommended District": "Type:String, Suggest a popular district or area to stay in the specified country based on travel preferences",
    
    "Activities": "Type:List, Depending on the contacts preference: Suggest 3 popular activities to do in the specified country",
    
    "Local Cuisine": "Type:List, Suggest maximum 3 local foods and dishes to try in the specified country",
    
    "Travel Tips": "Type:List, Provide maximum 3 useful travel tips for visiting the specified country"
}


# Overall context of the table
table_context = """
This table contains information about customers' travel plans, including countries to visit, duration of stay, and time of year.
The goal is to enrich these profiles with personalized travel information such as weather, cost of living, recommended districts, activities, local cuisine, and travel tips.
"""


# External data to feed the LLM alongside the rows populated fields in order to generate the rows empty fields
external_data = {
    "Preferences": """
    Travel Preferences and Activities:

    Solo Travelers:
        * Prefer quiet, off-the-beaten-path locations.
        * Enjoy cultural experiences and historical sites.

    Couples:
        * Prefer romantic destinations with scenic views.
        * Enjoy dining at fine restaurants and relaxing at spas.

    Families:
        * Prefer family-friendly locations with activities for children.
        * Enjoy theme parks, zoos, and outdoor activities.

    Adventure Seekers:
        * Prefer locations with opportunities for adventure sports.
        * Enjoy hiking, mountain climbing, and water sports.

    Food Enthusiasts:
        * Prefer locations known for their culinary experiences.
        * Enjoy food tours, cooking classes, and local markets.
    """
}

### Run the Enrichment Tool

In [None]:
# Select Gemini Model
model_name = 'models/gemini-1.5-pro-latest'

# Run the update_table function
enrich_table(csv_path, output_path, fields_dict, external_data, model_name, steps, batch_size=10, max_workers=4)

### De-Anomynize the Data for Local

In [None]:
PERSONAL_INFO_FIELDS = ['First Name', 'Last Name', 'Email', 'LinkedIn']
FIELDS_FOR_ID = ['First Name', 'Email']  # Fields to be used for ID generation

# File paths
original_path = r'data\destinations.csv'
anonymized_path = r'data\anonymized_destinations_enriched.csv'
output_path = r'data\destinations_enriched.csv'

de_anonymize_rows(original_path, anonymized_path, output_path, PERSONAL_INFO_FIELDS, id_field='ID')