# This Python script generates a dataset of fictional individuals with various attributes, such as:

- Unique ID (UUID)
- Salutation (Herr/Frau)
- First name and last name (based on common German names)
- Address information (including street name, city, state, and ZIP code)
- German telephone number and email address (with common German email providers)
- Geographical coordinates (latitude and longitude) of the individuals' addresses
- Date of birth (randomly generated within a specified age range)

## The script uses the `Faker` library for generating realistic, random data and includes functionality to:
- Randomly assign German states and cities
- Generate plausible ZIP codes based on the selected state
- Randomly generate birthday dates for each individual
- Generate plausible email addresses from a list of 50 common German email providers
- Save the generated dataset to an Excel file for easy access and usage

## Additionally, an interactive map with the generated Unique IDs and their geographical locations is created using the `folium` library. Markers are clustered for efficient visualization of large datasets.

Modules used:
- `Faker` (for generating random data)
- `pandas` (for managing the dataset)
- `folium` (for map visualization)
- `openpyxl` (for reading and writing Excel files)


In [1]:
import random
import pandas as pd
from faker import Faker
from datetime import datetime
import folium
from folium.plugins import MarkerCluster, HeatMap



In [2]:
# Initialize Faker for generating random data
fake = Faker("de_DE")  # Use the German locale

In [3]:
# Set seeds for reproducibility
Faker.seed(0)
random.seed(0)

In [4]:
def generate_zip_code(city):
    prefix = city_coords.get(city, {}).get("zip_prefix", "10")  # Default to "10" if city not found
    suffix = str(random.randint(0, 999)).zfill(3)
    return f"{prefix}{suffix}"

In [5]:
# Define a list of first names and last names (subset)
first_names_male = [
    "Lukas", "Max", "Paul", "Jonas", "Leon", "Felix", "Finn", "Ben", "Moritz", 
    "Noah", "Johannes", "Tim", "Julian", "David", "Matthias", "Niklas", "Elias", 
    "Alexander", "Tobias", "Samuel", "Lucas", "Jakob", "Fabian", "Andreas", 
    "Markus", "Christian", "Stefan", "Simon", "Benjamin", "Daniel", "Michael", 
    "Johann", "Mark", "Kai", "Martin", "Jakob", "Julian", "Tom", "Nico", 
    "Patrick", "Sebastian", "Bastian", "Hannes", "Matthias", "Rafael", "Georg", 
    "Arthur", "Lennard", "Oskar", "Jan", "Maurice", "Timothy"
]

first_names_female = [
    "Anna", "Sophie", "Marie", "Emma", "Lena", "Laura", "Mia", "Hannah", "Lina", 
    "Sophie", "Lea", "Sarah", "Charlotte", "Clara", "Amelie", "Lilli", "Emily", 
    "Nina", "Ella", "Katharina", "Isabella", "Julia", "Lisa", "Franziska", 
    "Marlene", "Greta", "Eva", "Luisa", "Paula", "Johanna", "Carla", "Leonie", 
    "Lara", "Alina", "Klara", "Victoria", "Elena", "Sina", "Merle", "Maja", 
    "Selina", "Antonia", "Tessa", "Nadine", "Isabel", "Vanessa", "Daniela", 
    "Verena", "Bettina", "Jana", "Maike", "Melanie"
]

last_names = [
    "Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", 
    "Hoffmann", "Schulz", "Bauer", "Koch", "Richter", "Klein", "Wolf", "Schröder", 
    "Neumann", "Schwarz", "Zimmermann", "Braun", "Schmitt", "Hartmann", "Lange", "Werner", 
    "Krause", "Peters", "Jung", "Roth", "Voigt", "Berger", "Mayer", "Fuchs", "Schulte", 
    "Böhm", "Weiss", "Bergmann", "Kraus", "Vogel", "Lang", "Ziegler", "Sauer", 
    "Weidner", "Meyerhoff", "Weigel", "Weber", "Wirth", "Krämer", "Röder", "Heinrich", 
    "Hahn", "Böttcher", "Schulze"
]

In [6]:
# List of 50 common German email providers
email_providers = [
    "gmx.de", "web.de", "t-online.de", "yahoo.de", "freenet.de", "aol.de", "mail.de", 
    "tutanota.de", "hotmail.de", "outlook.de", "1und1.de", "posteo.de", "googlemail.com", 
    "mailbox.org", "arcor.de", "ziggo.de", "gmx.net", "freemail.de", "scholar.de", 
    "mymail.de", "bluewin.ch", "studiemail.de", "uni-mail.de", "gmx.at", "gmx.ch", 
    "email.de", "deutschlandemail.de", "planet-interkom.de", "test.de", "versatel.de", 
    "gmx.us", "gmx.co.uk", "gmx.fr", "mailplus.de", "citymail.de", "iserv.de", 
    "gmx.org", "sapo.de", "mail.ru", "scout24.de", "onlinedeutsch.de", "blitzmail.de", 
    "earthlink.net", "easy-mail.de", "eclipso.de", "freenetmail.de", "mailzilla.de", 
    "surfmail.de", "gmx.us", "altavista.com", "dawnmail.de", "posteo.net"
]

# Function to generate a random email address using common German providers
def generate_email(first_name, last_name, unique_id):
    email_provider = random.choice(email_providers)
    username_patterns = [
        f"{first_name.lower()}.{last_name.lower()}{unique_id[:8]}",  # Use first 8 chars of UUID
        f"{last_name.lower()}.{first_name.lower()}{random.randint(1000, 9999)}",  # Larger range
        f"{first_name.lower()}{unique_id[:8]}",
        f"{first_name.lower()[0]}{last_name.lower()}{random.randint(1000, 9999)}",
        f"{first_name.lower()}{last_name.lower()[0]}{unique_id[:8]}"
    ]
    username = random.choice(username_patterns)
    return f"{username}@{email_provider}"


In [7]:
# List and Function to generate geo-coordinates (simple example, not 100% accurate)
# City_coords with state information
city_coords = {
    "Berlin": {"coords": (52.5200, 13.4050), "zip_prefix": "10"},
    "Munich": {"coords": (48.1351, 11.5820), "zip_prefix": "80"},
    "Hamburg": {"coords": (53.5511, 9.9937), "zip_prefix": "20"},
    "Cologne": {"coords": (50.9375, 6.9603), "zip_prefix": "50"},
    "Frankfurt": {"coords": (50.1109, 8.6821), "zip_prefix": "60"},
    "Stuttgart": {"coords": (48.7758, 9.1829), "zip_prefix": "70"},
    "Düsseldorf": {"coords": (51.2217, 6.7762), "zip_prefix": "40"},
    "Dortmund": {"coords": (51.5145, 7.4660), "zip_prefix": "44"},
    "Essen": {"coords": (51.4556, 7.0116), "zip_prefix": "45"},
    "Leipzig": {"coords": (51.3397, 12.3731), "zip_prefix": "04"},
    "Bremen": {"coords": (53.0793, 8.8017), "zip_prefix": "28"},
    "Dresden": {"coords": (51.0504, 13.7373), "zip_prefix": "01"},
    "Hanover": {"coords": (52.3759, 9.7320), "zip_prefix": "30"},
    "Nuremberg": {"coords": (49.4521, 11.0767), "zip_prefix": "90"},
    "Duisburg": {"coords": (51.4344, 6.7623), "zip_prefix": "47"},
    "Bochum": {"coords": (51.4818, 7.2162), "zip_prefix": "44"},
    "Wuppertal": {"coords": (51.2562, 7.1508), "zip_prefix": "42"},
    "Bielefeld": {"coords": (52.0302, 8.5325), "zip_prefix": "33"},
    "Bonn": {"coords": (50.7374, 7.0982), "zip_prefix": "53"},
    "Münster": {"coords": (51.9607, 7.6261), "zip_prefix": "48"},
    "Karlsruhe": {"coords": (49.0069, 8.4037), "zip_prefix": "76"},
    "Mannheim": {"coords": (49.4875, 8.4660), "zip_prefix": "68"},
    "Augsburg": {"coords": (48.3705, 10.8978), "zip_prefix": "86"},
    "Wiesbaden": {"coords": (50.0782, 8.2398), "zip_prefix": "65"},
    "Gelsenkirchen": {"coords": (51.5177, 7.0857), "zip_prefix": "45"},
    "Aachen": {"coords": (50.7753, 6.0839), "zip_prefix": "52"},
    "Kiel": {"coords": (54.3233, 10.1228), "zip_prefix": "24"},
    "Magdeburg": {"coords": (52.1205, 11.6276), "zip_prefix": "39"},
    "Freiburg": {"coords": (47.9990, 7.8421), "zip_prefix": "79"},
    "Erfurt": {"coords": (50.9848, 11.0299), "zip_prefix": "99"},
    "Mainz": {"coords": (49.9929, 8.2473), "zip_prefix": "55"},
    "Rostock": {"coords": (54.0924, 12.0991), "zip_prefix": "18"},
    "Saarbrücken": {"coords": (49.2402, 6.9969), "zip_prefix": "66"},
    "Potsdam": {"coords": (52.3906, 13.0645), "zip_prefix": "14"},
    "Bamberg": {"coords": (49.8988, 10.9007), "zip_prefix": "96"},
    "Halle": {"coords": (51.4967, 11.9689), "zip_prefix": "06"},
    "Würzburg": {"coords": (49.7913, 9.9534), "zip_prefix": "97"},
    "Regensburg": {"coords": (49.0134, 12.1016), "zip_prefix": "93"},
    "Göttingen": {"coords": (51.5413, 9.9158), "zip_prefix": "37"},
    "Koblenz": {"coords": (50.3564, 7.5886), "zip_prefix": "56"},
    "Braunschweig": {"coords": (52.2689, 10.5268), "zip_prefix": "38"},
    "Chemnitz": {"coords": (50.8278, 12.9214), "zip_prefix": "09"},
    "Darmstadt": {"coords": (49.8728, 8.6512), "zip_prefix": "64"},
    "Heidelberg": {"coords": (49.3988, 8.6724), "zip_prefix": "69"},
    "Ingolstadt": {"coords": (48.7667, 11.4333), "zip_prefix": "85"},
    "Kassel": {"coords": (51.3127, 9.4797), "zip_prefix": "34"},
    "Krefeld": {"coords": (51.3388, 6.5857), "zip_prefix": "47"},
    "Leverkusen": {"coords": (51.0459, 7.0192), "zip_prefix": "51"},
    "Lübeck": {"coords": (53.8655, 10.6866), "zip_prefix": "23"},
    "Oberhausen": {"coords": (51.4963, 6.8636), "zip_prefix": "46"},
    "Osnabrück": {"coords": (52.2799, 8.0472), "zip_prefix": "49"},
    "Paderborn": {"coords": (51.7189, 8.7575), "zip_prefix": "33"},
    "Schwerin": {"coords": (53.6355, 11.4012), "zip_prefix": "19"},
    "Solingen": {"coords": (51.1652, 7.0066), "zip_prefix": "42"},
    "Trier": {"coords": (49.7499, 6.6371), "zip_prefix": "54"},
    "Ulm": {"coords": (48.4011, 9.9876), "zip_prefix": "89"},
    "Flensburg": {"coords": (54.7937, 9.4470), "zip_prefix": "24"},
    "Jena": {"coords": (50.9272, 11.5899), "zip_prefix": "07"},
    "Kaiserslautern": {"coords": (49.4401, 7.7491), "zip_prefix": "67"},
    "Lüneburg": {"coords": (53.2464, 10.4115), "zip_prefix": "21"},
    "Marburg": {"coords": (50.8058, 8.7707), "zip_prefix": "35"},
    "Bayreuth": {"coords": (49.9456, 11.5713), "zip_prefix": "95"},
    "Cottbus": {"coords": (51.7563, 14.3329), "zip_prefix": "03"},
    "Dessau": {"coords": (51.8386, 12.2456), "zip_prefix": "06"},
    "Gera": {"coords": (50.8750, 12.0828), "zip_prefix": "07"},
    "Hof": {"coords": (50.3135, 11.9128), "zip_prefix": "95"},
    "Neumünster": {"coords": (54.0733, 9.9861), "zip_prefix": "24"},
    "Oldenburg": {"coords": (53.1435, 8.2146), "zip_prefix": "26"},
    "Passau": {"coords": (48.5667, 13.4333), "zip_prefix": "94"},
    "Pforzheim": {"coords": (48.8842, 8.6988), "zip_prefix": "75"},
    "Recklinghausen": {"coords": (51.6143, 7.1978), "zip_prefix": "45"},
    "Reutlingen": {"coords": (48.4914, 9.2041), "zip_prefix": "72"},
    "Siegen": {"coords": (50.8751, 8.0277), "zip_prefix": "57"},
    "Wolfsburg": {"coords": (52.4231, 10.7865), "zip_prefix": "38"},
    "Zwickau": {"coords": (50.7190, 12.4882), "zip_prefix": "08"},
    "Aschaffenburg": {"coords": (49.9792, 9.1486), "zip_prefix": "63"},
    "Bad Homburg": {"coords": (50.2154, 8.6189), "zip_prefix": "61"},
    "Bottrop": {"coords": (51.5230, 6.9388), "zip_prefix": "46"},
    "Bremerhaven": {"coords": (53.5486, 8.5767), "zip_prefix": "27"},
    "Celle": {"coords": (52.6180, 9.8332), "zip_prefix": "29"},
    "Fulda": {"coords": (50.5534, 9.6755), "zip_prefix": "36"},
    "Greifswald": {"coords": (54.0960, 13.3815), "zip_prefix": "17"},
    "Hagen": {"coords": (51.3671, 7.4730), "zip_prefix": "58"},
    "Hamm": {"coords": (51.6739, 7.8158), "zip_prefix": "59"},
    "Heilbronn": {"coords": (49.1427, 9.2109), "zip_prefix": "74"},
    "Herne": {"coords": (51.5369, 7.2009), "zip_prefix": "44"},
    "Hildesheim": {"coords": (52.1511, 9.9527), "zip_prefix": "31"},
    "Iserlohn": {"coords": (51.3751, 7.6991), "zip_prefix": "58"},
    "Landshut": {"coords": (48.5442, 12.1508), "zip_prefix": "84"},
    "Ludwigshafen": {"coords": (49.4774, 8.4452), "zip_prefix": "67"},
    "Mönchengladbach": {"coords": (51.1805, 6.4420), "zip_prefix": "41"},
    "Neuss": {"coords": (51.1983, 6.6887), "zip_prefix": "41"},
    "Offenbach": {"coords": (50.0977, 8.7760), "zip_prefix": "63"},
    "Ravensburg": {"coords": (47.7800, 9.6100), "zip_prefix": "88"},
    "Remscheid": {"coords": (51.1814, 7.1949), "zip_prefix": "42"},
    "Rosenheim": {"coords": (47.8561, 12.1225), "zip_prefix": "83"},
    "Salzgitter": {"coords": (52.1534, 10.3261), "zip_prefix": "38"},
    "Schwäbisch Gmünd": {"coords": (48.7995, 9.8049), "zip_prefix": "73"},
    "Singen": {"coords": (47.7622, 8.8403), "zip_prefix": "78"},
    "Stralsund": {"coords": (54.3159, 13.0898), "zip_prefix": "18"},
    "Tübingen": {"coords": (48.5200, 9.0600), "zip_prefix": "72"},
    "Villingen-Schwenningen": {"coords": (48.0597, 8.4579), "zip_prefix": "78"},
    "Weimar": {"coords": (50.9790, 11.3296), "zip_prefix": "99"},
    "Witten": {"coords": (51.4439, 7.3530), "zip_prefix": "58"},
    "Worms": {"coords": (49.6347, 8.3543), "zip_prefix": "67"},
    "Buxtehude": {"coords": (53.4700, 9.0200), "zip_prefix": "21"},
    "Görlitz": {"coords": (51.1500, 14.9900), "zip_prefix": "02"},
    "Kempten": {"coords": (47.7275, 10.3125), "zip_prefix": "87"},
    "Lörrach": {"coords": (47.6156, 7.6744), "zip_prefix": "79"},
    "Neubrandenburg": {"coords": (53.5586, 13.2608), "zip_prefix": "17"},
    "Rüsselsheim": {"coords": (49.9929, 8.4137), "zip_prefix": "65"},
    "Schweinfurt": {"coords": (50.0492, 10.2194), "zip_prefix": "97"},
    "Speyer": {"coords": (49.3173, 8.4311), "zip_prefix": "67"}
    # Add more cities as needed
}

# Improved geo-coordinates generation with more realistic variation
def generate_geo_coordinates(city):
    # Check if the city is in the city_coords dictionary
    if city in city_coords:
        base_lat, base_lon = city_coords[city]["coords"]
        # Add small random variation to make coordinates more realistic
        lat_variation = random.uniform(-0.02, 0.02)
        lon_variation = random.uniform(-0.02, 0.02)
        return (base_lat + lat_variation, base_lon + lon_variation)
    else:
        # Default to a center of Germany if city not found, with random variation
        print(f"Warning: City '{city}' not found in city_coords. Using default coordinates.")
        return (51.1657 + random.uniform(-0.5, 0.5), 10.4515 + random.uniform(-0.5, 0.5))

In [8]:
# Function to generate random birthdate (between 18 and 80 years ago)
def generate_birthday():
    start_date = pd.to_datetime("1940-01-01")  # Earliest plausible birthdate
    end_date = pd.to_datetime("2020-01-01")  # Latest plausible birthdate (18 years ago from 2024)
    return fake.date_of_birth(minimum_age=18, maximum_age=80).strftime('%Y-%m-%d')

In [9]:
# Function to Generate a random price between 1 and 500.
def generate_price():
    return random.randint(1, 500)

In [10]:
# Function to Generate a random Stuckzahl between 1 and 50.
def generate_stuckzahl():
    return random.randint(1, 50)  

In [11]:
# Function to assign a tax rate
def generate_sales_tax():
    return random.choice([0.19, 0.07]) 

In [12]:
# Function to randomly select a purchase_type from the list of 100 clothing items below
def generate_purchase_type():
    purchase_items = [
        "Hose", "T-Shirt", "Socken", "Jacke", "Schuhe", "Kleid", "Bluse", "Rock", "Pullover",
        "Jeans", "Shorts", "Mantel", "Anzug", "Mütze", "Schal", "Handschuhe", "Unterwäsche", 
        "Badeanzug", "Jogginghose", "Hemd", "Polo-Shirt", "Top", "Pyjama", "Bikini", "Weste", 
        "Leggings", "Poncho", "Strickjacke", "Overall", "Trainingsanzug", "Stirnband", "Strumpfhose", 
        "Sandalen", "Stiefel", "Sneaker", "Pumps", "Slipper", "Cargohose", "Blazer", "Cardigan", 
        "Gürtel", "Krawatte", "Fliege", "Latzhose", "Trachten", "Dirndl", "Halstuch", "Regenjacke", 
        "Regenhose", "Wanderstiefel", "Kapuzenpullover", "Chinos", "Cargo-Shorts", "Pufferjacke", 
        "Desert Boots", "Loafers", "Espadrilles", "Flip-Flops", "Hausschuhe", "Boxershorts", 
        "Tanktop", "Badehose", "Radlerhose", "Sonnenhut", "Haarband", "Klettschuhe", "Schnürschuhe", 
        "Abendkleid", "Ballkleid", "Ballerinas", "Mokassins", "Zehensandalen", "Bastschuhe", "Segelschuhe", 
        "Wedges", "Plateauschuhe", "Stoffschuhe", "Clogs", "Römersandalen", "Kampfstiefel", "Chelseaboots", 
        "Brogues", "Halbschuhe", "Oxfordschuhe", "Laufschuhe", "Kletterhosen", "Sport-BH", "Funktionsshirt"
    ]
    return random.choice(purchase_items)

In [13]:
def generate_profiles(num_profiles=100):
    """Generate a list of profiles with progress updates."""
    print(f"Generating {num_profiles} profiles...")
    available_cities = list(city_coords.keys())

    # Helper function to generate a single profile
    def create_profile(index):
        if index > 0 and index % (num_profiles // 10 or 1) == 0:  # Print every 10% or at least once
            print(f"Progress: {index / num_profiles * 100:.0f}% ({index}/{num_profiles})")
        
        unique_id = fake.uuid4()
        salutation = random.choice(["Herr", "Frau"])
        first_name = random.choice(first_names_male) if salutation == "Herr" else random.choice(first_names_female)
        last_name = random.choice(last_names)
        
        # Generate address with city-specific ZIP code
        city = random.choice(available_cities)
        zip_code = generate_zip_code(city)
        address_zip_city = f"{zip_code} {city}"
        address_street = fake.street_name() + " " + str(fake.building_number())
        
        phone_number = fake.phone_number()
        email = generate_email(first_name, last_name, unique_id)
        geo_coords = generate_geo_coordinates(city)
        birthday = generate_birthday()
        
        price = generate_price()
        stuckzahl = generate_stuckzahl()
        sales_tax = generate_sales_tax()
        purchase_type = generate_purchase_type()
        
        net_price = price
        tax_amount = net_price * sales_tax
        total_price = net_price + tax_amount
        total_amount = total_price * stuckzahl
        
        return [
            unique_id, salutation, first_name, last_name,
            address_zip_city, address_street,
            phone_number, email, geo_coords[0], geo_coords[1], birthday,
            price, stuckzahl, sales_tax, purchase_type, tax_amount, total_price, total_amount
        ]

    # Generate all profiles with progress tracking
    data = [create_profile(i) for i in range(num_profiles)]
    print("Generation complete.")
    return data

In [14]:
def create_interactive_map(df, output_file="geo_profiles_map.html"):
    """Create an interactive map with gender-filtered markers and enhanced popups."""
    # Initialize a Folium map centered around Germany
    mymap = folium.Map(location=[51.1657, 10.4515], zoom_start=6, tiles="CartoDB positron")

    # Create FeatureGroups for gender filtering
    male_group = folium.FeatureGroup(name="Herr", show=True)  # Default: male markers visible
    female_group = folium.FeatureGroup(name="Frau", show=True)  # Default: female markers visible

    # Add MarkerClusters to each gender group for efficient clustering
    male_cluster = MarkerCluster(
        name="Male Clusters",
        overlay=True,
        control=False,  # Controlled by FeatureGroup
        icon_create_function="""
        function(cluster) {
            return L.divIcon({
                html: '<div style="background-color: #3186cc; color: white; border-radius: 50%; width: 30px; height: 30px; display: flex; align-items: center; justify-content: center; font-weight: bold;">' + cluster.getChildCount() + '</div>',
                className: 'marker-cluster',
                iconSize: L.point(30, 30)
            });
        }
        """
    ).add_to(male_group)

    female_cluster = MarkerCluster(
        name="Female Clusters",
        overlay=True,
        control=False,  # Controlled by FeatureGroup
        icon_create_function="""
        function(cluster) {
            return L.divIcon({
                html: '<div style="background-color: #ff69b4; color: white; border-radius: 50%; width: 30px; height: 30px; display: flex; align-items: center; justify-content: center; font-weight: bold;">' + cluster.getChildCount() + '</div>',
                className: 'marker-cluster',
                iconSize: L.point(30, 30)
            });
        }
        """
    ).add_to(female_group)

    # Add markers for each profile, separated by gender
    for _, row in df.iterrows():
        lat = row['GeoLat']
        lon = row['GeoLon']
        unique_id = str(row['UniqueID'])  # Ensure string for popup
        name = f"{row['Salutation']} {row['FirstName']} {row['LastName']}"
        address = f"{row['Address']}, {row['AddressZipCity']}"

        # Create a detailed popup including Stuckzahl and SalesTax
        popup_html = f"""
        <div style="font-family: Arial; min-width: 200px;">
            <h4 style="margin-bottom: 5px;">{name}</h4>
            <p style="margin: 2px 0;"><strong>ID:</strong> {unique_id[:8]}...</p>
            <p style="margin: 2px 0;"><strong>Address:</strong> {address}</p>
            <p style="margin: 2px 0;"><strong>Email:</strong> {row['Email']}</p>
            <p style="margin: 2px 0;"><strong>Phone:</strong> {row['Telephone']}</p>
            <p style="margin: 2px 0;"><strong>Birthday:</strong> {row['Birthday']}</p>
            <p style="margin: 2px 0;"><strong>Purchase:</strong> {row['PurchaseType']}</p>
            <p style="margin: 2px 0;"><strong>Stuckzahl:</strong> {row['Stuckzahl']}</p>
            <p style="margin: 2px 0;"><strong>Sales Tax:</strong> {row['SalesTax']*100:.0f}%</p>
            <p style="margin: 2px 0;"><strong>Total:</strong> €{row['TotalAmount']:.2f}</p>
        </div>
        """

        # Create marker with gender-specific styling
        marker = folium.Marker(
            location=[lat, lon],
            popup=folium.Popup(popup_html, max_width=300),
            tooltip=name,
            icon=folium.Icon(
                color="blue" if row['Salutation'] == "Herr" else "pink",
                icon="male" if row['Salutation'] == "Herr" else "female",
                prefix='fa'
            )
        )

        # Add marker to the appropriate cluster based on gender
        if row['Salutation'] == "Herr":
            marker.add_to(male_cluster)
        else:  # Frau
            marker.add_to(female_cluster)

    # Add feature groups to the map
    mymap.add_child(male_group)
    mymap.add_child(female_group)

    # Add layer control for toggling visibility
    folium.LayerControl().add_to(mymap)

    # Add a title to the map
    title_html = '''
    <h3 align="center" style="font-size:16px"><b>Geographic Distribution of Profiles</b></h3>
    '''
    mymap.get_root().html.add_child(folium.Element(title_html))

    # Save the map
    mymap.save(output_file)
    print(f"Map has been created and saved as '{output_file}'")
    return mymap

In [15]:
# Main execution function
def main(num_profiles=1000, save_excel=True, save_csv=True, create_map=True):
    # Generate data
    data = generate_profiles(num_profiles)
    
    # Create DataFrame with additional calculated fields
    df = pd.DataFrame(data, columns=[
        "UniqueID", "Salutation", "FirstName", "LastName", 
        "AddressZipCity", "Address",
        "Telephone", "Email", "GeoLat", "GeoLon", "Birthday",
        "Price", "Stuckzahl", "SalesTax", "PurchaseType", 
        "TaxAmount", "TotalPrice", "TotalAmount"
    ])
    
    # Save to Excel if requested
    if save_excel:
        filename = f"random_data_{num_profiles}.xlsx"
        df.to_excel(filename, index=False)
        print(f"Data saved to Excel file: {filename}")
    
    # Save to CSV if requested
    if save_csv:
        filename = f"random_data_{num_profiles}.csv"
        df.to_csv(filename, index=False)
        print(f"Data saved to CSV file: {filename}")
    
    # Create map if requested
    if create_map:
        create_interactive_map(df)
    
    return df

In [16]:
# This will generate profiles, save them to both Excel and CSV files, and create an interactive map.
df = main(num_profiles=1000, save_excel=True, save_csv=True, create_map=True)
print('Done.')

Generating 1000 profiles...
Progress: 10% (100/1000)
Progress: 20% (200/1000)
Progress: 30% (300/1000)
Progress: 40% (400/1000)
Progress: 50% (500/1000)
Progress: 60% (600/1000)
Progress: 70% (700/1000)
Progress: 80% (800/1000)
Progress: 90% (900/1000)
Generation complete.
Data saved to Excel file: random_data_1000.xlsx
Data saved to CSV file: random_data_1000.csv
Map has been created and saved as 'geo_profiles_map.html'
Done.
