# Batch KML Generation: 30 Users
## FIXED: R90 calculated from Merged Neighborhood

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [2]:
# All 30 user IDs
USER_IDS = [
    "003", "017", "031", "044", "051", "052", "054", "063", "065", "068",
    "074", "079", "083", "088", "095", "100", "101", "102", "114", "120",
    "121", "122", "125", "126", "134", "145", "155", "172", "180", "147"
]

print(f"Total users: {len(USER_IDS)}")

Total users: 30


In [3]:
def add_laplace_noise_to_coords(df, scale_deg, seed=None):
    rng = np.random.default_rng(seed)
    df_noisy = df.copy()
    lat_noise = rng.laplace(0, scale_deg, size=len(df))
    lon_noise = rng.laplace(0, scale_deg, size=len(df))
    df_noisy['lat'] = df['lat'] + lat_noise
    df_noisy['lon'] = df['lon'] + lon_noise
    return df_noisy


def haversine_m(lat1, lon1, lat2, lon2):
    """Calculate distance in meters between two points."""
    R = 6371000.0
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    dphi = np.radians(lat2 - lat1)
    dl = np.radians(lon2 - lon1)
    a = np.sin(dphi / 2) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(dl / 2) ** 2
    return 2 * R * np.arcsin(np.sqrt(a))

In [4]:
def permutation_test_cluster(all_df, home_lat_bin, home_lon_bin, 
                              cluster_lat_bin, cluster_lon_bin,
                              n_permutations=1000, seed=42):
    rng = np.random.default_rng(seed)
    
    home_mask = (all_df["lat_bin"] == home_lat_bin) & (all_df["lon_bin"] == home_lon_bin)
    cluster_mask = (all_df["lat_bin"] == cluster_lat_bin) & (all_df["lon_bin"] == cluster_lon_bin)
    
    home_points = all_df[home_mask].copy()
    cluster_points = all_df[cluster_mask].copy()
    
    if len(home_points) == 0 or len(cluster_points) == 0:
        return 0.0
    
    home_p_night = home_points["is_night"].mean()
    cluster_p_night = cluster_points["is_night"].mean()
    observed_diff = abs(home_p_night - cluster_p_night)
    
    combined = pd.concat([home_points, cluster_points], ignore_index=True)
    n_home = len(home_points)
    
    count_extreme = 0
    is_night_values = combined["is_night"].values.copy()
    
    for _ in range(n_permutations):
        rng.shuffle(is_night_values)
        perm_home_p = is_night_values[:n_home].mean()
        perm_cluster_p = is_night_values[n_home:].mean()
        perm_diff = abs(perm_home_p - perm_cluster_p)
        
        if perm_diff >= observed_diff:
            count_extreme += 1
    
    return count_extreme / n_permutations

In [5]:
def analyze_user(
    user_id: str,
    base_dir: str,
    grid_deg: float = 0.002,
    night_start: int = 21,
    night_end: int = 6,
    merge_steps: int = 1,
    noise_scale_deg: float = None,
    n_permutations: int = 1000,
    significance_level: float = 0.05,
    max_distance_m: float = 500.0,
    seed: int = 42,
):
    """Analyze a single user - FIXED to use merged neighborhood for r90."""
    
    base = Path(base_dir)
    traj_dir = base / user_id / "Trajectory"
    files = sorted(traj_dir.glob("*.plt"))
    
    if len(files) == 0:
        return None
    
    def load_plt(path):
        df = pd.read_csv(
            path, skiprows=6, header=None,
            names=["lat", "lon", "unused", "alt_ft", "date_num", "date", "time"]
        )
        df["datetime"] = pd.to_datetime(df["date"] + " " + df["time"], errors="coerce")
        df = df.drop(columns=["unused", "date", "time", "date_num"])
        df = df.dropna(subset=["datetime", "lat", "lon"]).copy()
        return df
    
    df = pd.concat([load_plt(p) for p in files], ignore_index=True)
    df.sort_values("datetime", inplace=True)
    
    if noise_scale_deg is not None:
        df = add_laplace_noise_to_coords(df, noise_scale_deg, seed=seed)
    
    hours = df["datetime"].dt.hour
    df["is_night"] = (hours >= night_start) | (hours <= night_end)
    
    df["lat_bin"] = (df["lat"] / grid_deg).round().astype(int)
    df["lon_bin"] = (df["lon"] / grid_deg).round().astype(int)
    
    night_df = df[df["is_night"]].copy()
    
    if len(night_df) == 0:
        return None
    
    night_cell_counts = (
        night_df.groupby(["lat_bin", "lon_bin"])
        .size()
        .reset_index(name="night_count")
        .sort_values("night_count", ascending=False)
        .reset_index(drop=True)
    )
    
    top_cell = night_cell_counts.iloc[0]
    home_lat_bin = int(top_cell["lat_bin"])
    home_lon_bin = int(top_cell["lon_bin"])
    
    top_cell_bounds = {
        "lat_min": (home_lat_bin - 0.5) * grid_deg,
        "lat_max": (home_lat_bin + 0.5) * grid_deg,
        "lon_min": (home_lon_bin - 0.5) * grid_deg,
        "lon_max": (home_lon_bin + 0.5) * grid_deg,
    }
    
    # ===== FIXED: Calculate merged neighborhood =====
    dlat = (night_df["lat_bin"] - home_lat_bin).abs()
    dlon = (night_df["lon_bin"] - home_lon_bin).abs()
    cheb_dist = np.maximum(dlat, dlon)
    merged_mask = cheb_dist <= merge_steps
    merged_points = night_df[merged_mask].copy()
    
    merged_bounds = {
        "lat_min": float(merged_points["lat"].min()),
        "lat_max": float(merged_points["lat"].max()),
        "lon_min": float(merged_points["lon"].min()),
        "lon_max": float(merged_points["lon"].max()),
    }
    
    # ===== FIXED: Use MERGED POINTS for home centroid and r90 =====
    if len(merged_points) > 0:
        home_lat = float(merged_points["lat"].mean())
        home_lon = float(merged_points["lon"].mean())
        
        # Distance from each merged point to centroid
        home_distances = haversine_m(
            merged_points["lat"].values, 
            merged_points["lon"].values,
            home_lat, 
            home_lon
        )
        home_r90 = float(np.percentile(home_distances, 90))
    else:
        # Fallback to top cell only
        top_cell_points = night_df[
            (night_df["lat_bin"] == home_lat_bin) & 
            (night_df["lon_bin"] == home_lon_bin)
        ].copy()
        home_lat = float(top_cell_points["lat"].mean())
        home_lon = float(top_cell_points["lon"].mean())
        home_distances = haversine_m(
            top_cell_points["lat"].values, 
            top_cell_points["lon"].values,
            home_lat, 
            home_lon
        )
        home_r90 = float(np.percentile(home_distances, 90)) if len(home_distances) > 0 else 50.0
    
    # ===== Analyze top clusters =====
    top_cells = night_cell_counts.head(15).copy()
    
    cluster_data = []
    home_like_clusters = []
    
    for i, (_, cluster) in enumerate(top_cells.iterrows()):
        lat_bin = int(cluster["lat_bin"])
        lon_bin = int(cluster["lon_bin"])
        night_count = int(cluster["night_count"])
        
        # ===== FIXED: Use merged neighborhood for EACH cluster's r90 =====
        cluster_dlat = (night_df["lat_bin"] - lat_bin).abs()
        cluster_dlon = (night_df["lon_bin"] - lon_bin).abs()
        cluster_cheb = np.maximum(cluster_dlat, cluster_dlon)
        cluster_merged_mask = cluster_cheb <= merge_steps
        cluster_merged_points = night_df[cluster_merged_mask].copy()
        
        if len(cluster_merged_points) > 1:
            cluster_lat = float(cluster_merged_points["lat"].mean())
            cluster_lon = float(cluster_merged_points["lon"].mean())
            distances = haversine_m(
                cluster_merged_points["lat"].values, 
                cluster_merged_points["lon"].values,
                cluster_lat, 
                cluster_lon
            )
            cluster_r90 = float(np.percentile(distances, 90))
        else:
            cluster_lat = lat_bin * grid_deg
            cluster_lon = lon_bin * grid_deg
            cluster_r90 = 50.0
        
        # Calculate distance from home
        dist_from_home = float(haversine_m(home_lat, home_lon, cluster_lat, cluster_lon))
        
        if i == 0:
            # Home itself
            p_value = 1.0
            is_home_like = True
        else:
            # STRICT DISTANCE CHECK FIRST
            if dist_from_home > max_distance_m:
                p_value = 0.0
                is_home_like = False
            else:
                # Within distance - run permutation test
                p_value = permutation_test_cluster(
                    df, home_lat_bin, home_lon_bin,
                    lat_bin, lon_bin,
                    n_permutations=n_permutations,
                    seed=seed + i
                )
                is_home_like = (p_value > significance_level) and (dist_from_home <= max_distance_m)
        
        cluster_info = {
            "index": i,
            "lat_bin": lat_bin,
            "lon_bin": lon_bin,
            "lat_center": cluster_lat,
            "lon_center": cluster_lon,
            "night_count": night_count,
            "r90": cluster_r90,
            "dist_from_home": dist_from_home,
            "p_value": p_value,
            "is_home_like": is_home_like,
        }
        
        cluster_data.append(cluster_info)
        if is_home_like:
            home_like_clusters.append(cluster_info)
    
    clusters_df = pd.DataFrame(cluster_data)
    
    total_area_km2 = sum([
        3.14159 * (c["r90"]/1000)**2 for c in home_like_clusters
    ])
    
    return {
        "user_id": user_id,
        "home_lat": home_lat,
        "home_lon": home_lon,
        "home_r90": home_r90,
        "top_cell_bounds": top_cell_bounds,
        "merged_bounds": merged_bounds,
        "clusters": clusters_df,
        "home_like_count": len(home_like_clusters),
        "total_search_area_km2": total_area_km2,
        "noise_scale_deg": noise_scale_deg,
    }

In [6]:
def create_shaded_kml(res, output_path, title):
    """Create KML with shaded r90 circles."""
    
    home_lat = res["home_lat"]
    home_lon = res["home_lon"]
    home_r90 = res["home_r90"]
    top_cell = res["top_cell_bounds"]
    merged = res["merged_bounds"]
    clusters = res["clusters"]
    
    COLOR_TOP_CELL = "ff0000ff"      # Red
    COLOR_MERGED = "ff00ff00"        # Green  
    COLOR_HOME_R90 = "ff00a5ff"      # Orange
    COLOR_HOME_STAR = "ff00d7ff"     # Gold
    COLOR_CLUSTER_DOT = "ffff00ff"   # Magenta
    COLOR_SHADED_FILL = "440000ff"   # Semi-transparent red
    COLOR_SHADED_LINE = "ff0000ff"   # Red outline
    
    def kml_point(lat, lon, name, color, size=1.5, icon="placemark_circle"):
        return f"""
    <Placemark>
      <n>{name}</n>
      <Style>
        <IconStyle>
          <color>{color}</color>
          <scale>{size}</scale>
          <Icon><href>http://maps.google.com/mapfiles/kml/shapes/{icon}.png</href></Icon>
        </IconStyle>
      </Style>
      <Point><coordinates>{lon:.7f},{lat:.7f},0</coordinates></Point>
    </Placemark>
"""
    
    def kml_square(lat_min, lat_max, lon_min, lon_max, name, color, width=4):
        coords = [
            (lon_min, lat_min), (lon_max, lat_min),
            (lon_max, lat_max), (lon_min, lat_max),
            (lon_min, lat_min),
        ]
        coord_str = " ".join([f"{lon:.7f},{lat:.7f},0" for lon, lat in coords])
        return f"""
    <Placemark>
      <n>{name}</n>
      <Style>
        <LineStyle><color>{color}</color><width>{width}</width></LineStyle>
        <PolyStyle><color>00000000</color></PolyStyle>
      </Style>
      <Polygon>
        <outerBoundaryIs><LinearRing><coordinates>
          {coord_str}
        </coordinates></LinearRing></outerBoundaryIs>
      </Polygon>
    </Placemark>
"""
    
    def kml_circle(lat, lon, radius_m, name, line_color, width=3, fill_color="00000000"):
        if radius_m <= 0:
            return ""
        meters_per_deg_lat = 111320
        meters_per_deg_lon = 111320 * np.cos(np.radians(lat))
        r_lat = radius_m / meters_per_deg_lat
        r_lon = radius_m / meters_per_deg_lon
        theta = np.linspace(0, 2*np.pi, 100, endpoint=True)
        coords = [(lon + r_lon*np.cos(t), lat + r_lat*np.sin(t)) for t in theta]
        coords.append(coords[0])
        coord_str = " ".join([f"{x:.7f},{y:.7f},0" for x, y in coords])
        return f"""
    <Placemark>
      <n>{name}</n>
      <Style>
        <LineStyle><color>{line_color}</color><width>{width}</width></LineStyle>
        <PolyStyle><color>{fill_color}</color></PolyStyle>
      </Style>
      <Polygon>
        <outerBoundaryIs><LinearRing><coordinates>
          {coord_str}
        </coordinates></LinearRing></outerBoundaryIs>
      </Polygon>
    </Placemark>
"""
    
    home_like_count = int(clusters["is_home_like"].sum())
    
    kml = f"""<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2">
  <Document>
    <n>{title}</n>
"""
    
    # Shaded r90 regions (ONLY for home-like clusters)
    kml += f"    <Folder><n>Search Regions ({home_like_count} locations)</n>\n"
    
    for _, cluster in clusters.iterrows():
        if not cluster["is_home_like"]:
            continue
        
        lat = cluster["lat_center"]
        lon = cluster["lon_center"]
        r90 = cluster["r90"]
        idx = int(cluster["index"])
        
        if idx == 0:
            # Home's r90 - orange
            kml += kml_circle(
                lat, lon, r90, 
                f"Home r90 ({r90:.0f}m)",
                COLOR_HOME_R90, width=3, 
                fill_color="4400a5ff"
            )
        else:
            # Other home-like clusters - red shaded
            kml += kml_circle(
                lat, lon, r90,
                f"Cluster {idx+1} r90 ({r90:.0f}m) - dist={cluster['dist_from_home']:.0f}m",
                COLOR_SHADED_LINE, width=3,
                fill_color=COLOR_SHADED_FILL
            )
    
    kml += "    </Folder>\n"
    
    # Grid squares
    kml += "    <Folder><n>Home Grid Cells</n>\n"
    kml += kml_square(
        merged["lat_min"], merged["lat_max"],
        merged["lon_min"], merged["lon_max"],
        "Merged neighborhood",
        COLOR_MERGED, width=4
    )
    kml += kml_square(
        top_cell["lat_min"], top_cell["lat_max"],
        top_cell["lon_min"], top_cell["lon_max"],
        "Top cell",
        COLOR_TOP_CELL, width=4
    )
    kml += "    </Folder>\n"
    
    # Home star
    kml += "    <Folder><n>Home Location</n>\n"
    kml += kml_point(home_lat, home_lon, "HOME", COLOR_HOME_STAR, size=2.5, icon="star")
    kml += "    </Folder>\n"
    
    # Other clusters (dots only)
    kml += "    <Folder><n>Other Clusters</n>\n"
    
    for _, cluster in clusters.iterrows():
        idx = int(cluster["index"])
        if idx == 0:
            continue
        
        lat = cluster["lat_center"]
        lon = cluster["lon_center"]
        night_count = int(cluster["night_count"])
        is_home_like = cluster["is_home_like"]
        p_value = cluster["p_value"]
        dist = cluster["dist_from_home"]
        
        if is_home_like:
            label = f"Cluster {idx+1}: COULD BE HOME (p={p_value:.3f}, d={dist:.0f}m) | {night_count} pts"
            size = 1.5
        else:
            label = f"Cluster {idx+1}: Not home (p={p_value:.3f}, d={dist:.0f}m) | {night_count} pts"
            size = 1.0
        
        kml += kml_point(lat, lon, label, COLOR_CLUSTER_DOT, size=size)
    
    kml += "    </Folder>\n"
    
    # Summary
    total_area = res["total_search_area_km2"]
    kml += f"""
    <Folder>
      <n>Summary</n>
      <description><![CDATA[
        <h3>User {res['user_id']}</h3>
        <p><b>Possible home locations:</b> {home_like_count}</p>
        <p><b>Total search area:</b> {total_area:.3f} km¬≤</p>
        <p><b>Home r90:</b> {home_r90:.0f}m</p>
      ]]></description>
    </Folder>
"""
    
    kml += """  </Document>
</kml>
"""
    
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(kml, encoding="utf-8")
    
    return home_like_count, total_area

---
## Configuration

In [7]:
BASE_DIR = "Geolife Trajectories 1.3/Data"
OUTPUT_DIR = Path("demo_outputs")
NOISE_SCALE_DEG = 0.00135  # 150m
N_PERMUTATIONS = 1000
SIGNIFICANCE_LEVEL = 0.05
MAX_DISTANCE_M = 500  # Strict 500m limit

noise_meters = int(NOISE_SCALE_DEG * 111000)

# Create output directories
no_noise_dir = OUTPUT_DIR / "no_noise"
with_noise_dir = OUTPUT_DIR / f"with_{noise_meters}m_noise"
no_noise_dir.mkdir(parents=True, exist_ok=True)
with_noise_dir.mkdir(parents=True, exist_ok=True)

print(f"Settings:")
print(f"  Noise: {noise_meters}m Laplace")
print(f"  Œ±: {SIGNIFICANCE_LEVEL}")
print(f"  Max distance: {MAX_DISTANCE_M}m (STRICT)")
print(f"  Permutations: {N_PERMUTATIONS}")
print(f"  R90 calculation: MERGED NEIGHBORHOOD (FIXED)")

Settings:
  Noise: 149m Laplace
  Œ±: 0.05
  Max distance: 500m (STRICT)
  Permutations: 1000
  R90 calculation: MERGED NEIGHBORHOOD (FIXED)


---
## Generate KMLs for All 30 Users

In [8]:
results_summary = []

print("=" * 80)
print("GENERATING KMLs FOR 30 USERS")
print(f"Max distance limit: {MAX_DISTANCE_M}m")
print("R90 now calculated from MERGED NEIGHBORHOOD")
print("=" * 80)

for i, user_id in enumerate(USER_IDS):
    print(f"\n[{i+1}/30] Processing User {user_id}...")
    
    try:
        # ===== NO NOISE =====
        res_no_noise = analyze_user(
            user_id=user_id,
            base_dir=BASE_DIR,
            noise_scale_deg=None,
            n_permutations=N_PERMUTATIONS,
            significance_level=SIGNIFICANCE_LEVEL,
            max_distance_m=MAX_DISTANCE_M,
            seed=42
        )
        
        if res_no_noise is None:
            print(f"  ‚ö†Ô∏è Skipping user {user_id} - no data found")
            continue
        
        no_noise_path = no_noise_dir / f"user_{user_id}_NO_NOISE.kml"
        hl_no, area_no = create_shaded_kml(
            res_no_noise,
            no_noise_path,
            f"User {user_id} - No Privacy Protection"
        )
        
        # ===== WITH NOISE =====
        res_with_noise = analyze_user(
            user_id=user_id,
            base_dir=BASE_DIR,
            noise_scale_deg=NOISE_SCALE_DEG,
            n_permutations=N_PERMUTATIONS,
            significance_level=SIGNIFICANCE_LEVEL,
            max_distance_m=MAX_DISTANCE_M,
            seed=42
        )
        
        with_noise_path = with_noise_dir / f"user_{user_id}_WITH_{noise_meters}m_NOISE.kml"
        hl_with, area_with = create_shaded_kml(
            res_with_noise,
            with_noise_path,
            f"User {user_id} - With {noise_meters}m Laplace Noise"
        )
        
        results_summary.append({
            "user_id": user_id,
            "home_r90_no_noise": res_no_noise["home_r90"],
            "home_r90_with_noise": res_with_noise["home_r90"],
            "home_like_no_noise": hl_no,
            "home_like_with_noise": hl_with,
            "area_no_noise_km2": area_no,
            "area_with_noise_km2": area_with,
        })
        
        print(f"  ‚úì No noise: r90={res_no_noise['home_r90']:.0f}m, {hl_no} locations, {area_no:.4f} km¬≤")
        print(f"  ‚úì With noise: r90={res_with_noise['home_r90']:.0f}m, {hl_with} locations, {area_with:.4f} km¬≤")
        
    except Exception as e:
        print(f"  ‚ùå Error processing user {user_id}: {e}")
        import traceback
        traceback.print_exc()
        continue

print("\n" + "=" * 80)
print("DONE!")
print("=" * 80)

GENERATING KMLs FOR 30 USERS
Max distance limit: 500m
R90 now calculated from MERGED NEIGHBORHOOD

[1/30] Processing User 003...
  ‚úì No noise: r90=307m, 1 locations, 0.2968 km¬≤
  ‚úì With noise: r90=307m, 2 locations, 0.6042 km¬≤

[2/30] Processing User 017...
  ‚úì No noise: r90=223m, 1 locations, 0.1562 km¬≤
  ‚úì With noise: r90=291m, 3 locations, 0.8159 km¬≤

[3/30] Processing User 031...
  ‚úì No noise: r90=38m, 1 locations, 0.0046 km¬≤
  ‚úì With noise: r90=289m, 6 locations, 1.5911 km¬≤

[4/30] Processing User 044...
  ‚úì No noise: r90=217m, 1 locations, 0.1485 km¬≤
  ‚úì With noise: r90=291m, 5 locations, 1.3172 km¬≤

[5/30] Processing User 051...
  ‚úì No noise: r90=63m, 1 locations, 0.0125 km¬≤
  ‚úì With noise: r90=262m, 2 locations, 0.4643 km¬≤

[6/30] Processing User 052...
  ‚úì No noise: r90=44m, 1 locations, 0.0061 km¬≤
  ‚úì With noise: r90=267m, 3 locations, 0.6439 km¬≤

[7/30] Processing User 054...
  ‚úì No noise: r90=86m, 2 locations, 0.0463 km¬≤
  ‚úì With noi

---
## Summary Statistics

In [9]:
summary_df = pd.DataFrame(results_summary)
print(f"\nüìä SUMMARY FOR {len(summary_df)} USERS:")
print("=" * 60)

print(f"\nüìç WITHOUT NOISE:")
print(f"  Avg home-like locations: {summary_df['home_like_no_noise'].mean():.2f}")
print(f"  Avg search area: {summary_df['area_no_noise_km2'].mean():.4f} km¬≤")
print(f"  Avg home r90: {summary_df['home_r90_no_noise'].mean():.0f}m")

print(f"\nüìç WITH {noise_meters}m NOISE:")
print(f"  Avg home-like locations: {summary_df['home_like_with_noise'].mean():.2f}")
print(f"  Avg search area: {summary_df['area_with_noise_km2'].mean():.4f} km¬≤")
print(f"  Avg home r90: {summary_df['home_r90_with_noise'].mean():.0f}m")

print(f"\nüìà PRIVACY IMPROVEMENT:")
avg_area_increase = summary_df['area_with_noise_km2'].mean() / summary_df['area_no_noise_km2'].mean()
avg_loc_increase = summary_df['home_like_with_noise'].mean() / summary_df['home_like_no_noise'].mean()
avg_r90_increase = summary_df['home_r90_with_noise'].mean() / summary_df['home_r90_no_noise'].mean()
print(f"  Avg search area increase: {avg_area_increase:.1f}x")
print(f"  Avg possible locations increase: {avg_loc_increase:.1f}x")
print(f"  Avg r90 increase: {avg_r90_increase:.1f}x")


üìä SUMMARY FOR 30 USERS:

üìç WITHOUT NOISE:
  Avg home-like locations: 1.60
  Avg search area: 0.2290 km¬≤
  Avg home r90: 185m

üìç WITH 149m NOISE:
  Avg home-like locations: 5.07
  Avg search area: 1.3195 km¬≤
  Avg home r90: 292m

üìà PRIVACY IMPROVEMENT:
  Avg search area increase: 5.8x
  Avg possible locations increase: 3.2x
  Avg r90 increase: 1.6x


In [10]:
# Display full table
summary_df

Unnamed: 0,user_id,home_r90_no_noise,home_r90_with_noise,home_like_no_noise,home_like_with_noise,area_no_noise_km2,area_with_noise_km2
0,3,307.349264,306.805699,1,2,0.296766,0.604158
1,17,223.00586,290.56748,1,3,0.156236,0.815913
2,31,38.239129,289.471605,1,6,0.004594,1.591131
3,44,217.397299,290.722022,1,5,0.148477,1.317205
4,51,63.150479,261.883552,1,2,0.012529,0.464342
5,52,44.127783,266.846411,1,3,0.006117,0.643906
6,54,85.874865,276.265001,2,13,0.046335,2.775775
7,63,151.328324,298.731216,1,7,0.071943,1.693177
8,65,139.519968,293.81159,2,1,0.290827,0.271199
9,68,277.337658,309.194601,2,5,0.58558,1.561744


In [11]:
# Save summary
summary_csv_path = OUTPUT_DIR / "summary_30_users.csv"
summary_df.to_csv(summary_csv_path, index=False)
print(f"\n‚úì Summary saved to: {summary_csv_path}")


‚úì Summary saved to: demo_outputs/summary_30_users.csv


In [12]:
# List files
print(f"\nüìÇ GENERATED FILES:")
print(f"\nNo noise ({len(list(no_noise_dir.glob('*.kml')))} files):")
for f in sorted(no_noise_dir.glob('*.kml'))[:5]:
    print(f"  {f.name}")
if len(list(no_noise_dir.glob('*.kml'))) > 5:
    print(f"  ... and {len(list(no_noise_dir.glob('*.kml')))-5} more")

print(f"\nWith {noise_meters}m noise ({len(list(with_noise_dir.glob('*.kml')))} files):")
for f in sorted(with_noise_dir.glob('*.kml'))[:5]:
    print(f"  {f.name}")
if len(list(with_noise_dir.glob('*.kml'))) > 5:
    print(f"  ... and {len(list(with_noise_dir.glob('*.kml')))-5} more")


üìÇ GENERATED FILES:

No noise (30 files):
  user_003_NO_NOISE.kml
  user_017_NO_NOISE.kml
  user_031_NO_NOISE.kml
  user_044_NO_NOISE.kml
  user_051_NO_NOISE.kml
  ... and 25 more

With 149m noise (30 files):
  user_003_WITH_149m_NOISE.kml
  user_017_WITH_149m_NOISE.kml
  user_031_WITH_149m_NOISE.kml
  user_044_WITH_149m_NOISE.kml
  user_051_WITH_149m_NOISE.kml
  ... and 25 more
