In [1]:
import pandas as pd
import folium
from folium.plugins import HeatMap, MarkerCluster
from geopandas import GeoDataFrame
import geopandas as gpd
from shapely.geometry import Point

In [3]:
# Step 1: Load the large dataset (purchase data)
df = pd.read_csv("datasets/Tabelle_Purchase_Value.csv")  # Replace with your CSV file path

# Step 2: Randomly sample 350,000 rows from the dataset
sample_size = 350000
sampled_df = df.sample(n=sample_size, random_state=42)  # Ensures reproducibility

# Step 3: Check your column names for D_CUS_DeliveryLatitude and D_CUS_DeliveryLongitude
D_CUS_DeliveryLatitude_column = 'D_CUS_DeliveryLatitude'  # Replace with your actual column name
D_CUS_DeliveryLongitude_column = 'D_CUS_DeliveryLongitude'  # Replace with your actual column name

# Step 4: Create a map centered around the average location
center_lat = sampled_df[D_CUS_DeliveryLatitude_column].mean()
center_lon = sampled_df[D_CUS_DeliveryLongitude_column].mean()
heatmap_map = folium.Map(location=[center_lat, center_lon], zoom_start=10)

# Step 5: Prepare data for the HeatMap (purchase data)
heat_data = sampled_df[[D_CUS_DeliveryLatitude_column, D_CUS_DeliveryLongitude_column]].dropna().values.tolist()
HeatMap(heat_data, radius=12, name="Purchase Activity Heatmap").add_to(heatmap_map)

# Step 6: Add legends
legend_html = '''
<div style="
    position: fixed; 
    bottom: 550px; left: 390px; width: 150px; height: 100px; 
    background-color: white; z-index:1000; font-size:14px;
    border:1px solid black; padding: 10px;">
    <b>Heatmap Intensity</b><br>
    <i style="background:yellow; width:10px; height:10px; display:inline-block;"></i> Low<br>
    <i style="background:orange; width:10px; height:10px; display:inline-block;"></i> Medium<br>
    <i style="background:red; width:10px; height:10px; display:inline-block;"></i> High<br>
</div>
'''
heatmap_map.get_root().html.add_child(folium.Element(legend_html))

# Step 7: Load GeoJSON data for amenities
geojson_files = {
    "Supermarkets": "supermarkets.geojson",  # Replace with your file path
    "Bars": "bars.geojson",                # Replace with your file path
    "Restaurants": "restaurants.geojson"  # Replace with your file path
}

# Initialize amenity layers
amenity_marker_clusters = {}
amenity_heat_data = []

for amenity, file_path in geojson_files.items():
    # Load GeoJSON data
    amenity_gdf = gpd.read_file(file_path)
    # Filter only Point geometries
    amenity_gdf = amenity_gdf[amenity_gdf.geometry.type == 'Point']
    
    # Add markers for this amenity
    marker_cluster = MarkerCluster(name=f"{amenity} Locations").add_to(heatmap_map)
    for _, row in amenity_gdf.iterrows():
        folium.Marker(
            location=[row.geometry.y, row.geometry.x],
            popup=f"{amenity}: {row.get('name', 'N/A')}",  # Replace 'name' with a relevant property in your GeoJSON
            icon=folium.Icon(color="blue" if amenity == "Bars" else "green" if amenity == "Supermarkets" else "orange")
        ).add_to(marker_cluster)
    amenity_marker_clusters[amenity] = marker_cluster
    
    # Collect heatmap data for this amenity
    amenity_heat_data.extend([[point.y, point.x] for point in amenity_gdf.geometry if point is not None])

# Add combined amenity heatmap
HeatMap(amenity_heat_data, radius=15, gradient={0.4: 'blue', 0.65: 'lime', 1: 'red'}, name="Amenity Density Heatmap").add_to(heatmap_map)

# Step 8: Analyze spatial overlap
# Convert purchase data to GeoDataFrame
purchase_gdf = GeoDataFrame(sampled_df, geometry=gpd.points_from_xy(sampled_df[D_CUS_DeliveryLongitude_column], sampled_df[D_CUS_DeliveryLatitude_column]))

# Create buffer zones around amenities (e.g., 500 meters)
buffers = []
for amenity, file_path in geojson_files.items():
    amenity_gdf = gpd.read_file(file_path)
    amenity_gdf['buffer'] = amenity_gdf.geometry.buffer(0.005)  # Buffer size in degrees (~500m)
    buffers.extend(amenity_gdf['buffer'])

# Check for overlap
overlap = purchase_gdf[purchase_gdf.geometry.apply(lambda x: any(x.within(b) for b in buffers))]
print(f"Number of overlapping purchase points: {len(overlap)}")

# Step 9: Save the map with all layers
folium.LayerControl().add_to(heatmap_map)  # Add layer control for toggling layers
heatmap_map.save('overlay_map_multiple_geojson.html')
print("Overlay map saved as 'overlay_map_multiple_geojson.html'")


Skipping field contact:phone:description: unsupported OGR type: 10
Skipping field opening_hours:checkin: unsupported OGR type: 10

  amenity_gdf['buffer'] = amenity_gdf.geometry.buffer(0.005)  # Buffer size in degrees (~500m)
Skipping field contact:phone:description: unsupported OGR type: 10

  amenity_gdf['buffer'] = amenity_gdf.geometry.buffer(0.005)  # Buffer size in degrees (~500m)
Skipping field opening_hours:checkin: unsupported OGR type: 10

  amenity_gdf['buffer'] = amenity_gdf.geometry.buffer(0.005)  # Buffer size in degrees (~500m)


KeyboardInterrupt: 

The warning indicates that your geometries are in a geographic coordinate reference system (CRS), which uses latitude and longitude. Performing operations like `buffer` on geographic CRS can lead to inaccurate results because distances in degrees are not consistent across the globe.

To resolve this, you need to re-project the geometries to a projected CRS (e.g., UTM or a local projection suitable for your data) before applying the buffer operation. After the operation, you can convert the geometries back to the original geographic CRS if needed.

### Updated Code
Here’s how you can fix the issue:



In [None]:

# Step 1: Re-project to a projected CRS for buffering
for amenity, file_path in geojson_files.items():
    # Load GeoJSON data
    amenity_gdf = gpd.read_file(file_path)
    
    # Re-project to a suitable projected CRS (e.g., EPSG:3857 for Web Mercator)
    amenity_gdf = amenity_gdf.to_crs(epsg=3857)
    
    # Create buffer (e.g., 500 meters)
    amenity_gdf['buffer'] = amenity_gdf.geometry.buffer(500)  # Buffer in meters
    
    # Optionally, revert back to the original CRS after buffering
    amenity_gdf = amenity_gdf.to_crs(epsg=4326)
    
    # Append buffers to the list for overlap analysis
    buffers.extend(amenity_gdf['buffer'])



### Explanation of the Changes:
1. **Re-project to a Projected CRS:**
   - `amenity_gdf.to_crs(epsg=3857)` re-projects the GeoDataFrame to the Web Mercator projection, which is suitable for distance-based operations like buffering.

2. **Buffer in Meters:**
   - `amenity_gdf.geometry.buffer(500)` creates a buffer of 500 meters around each geometry.

3. **Revert to Geographic CRS:**
   - `amenity_gdf.to_crs(epsg=4326)` converts the GeoDataFrame back to the original geographic CRS (WGS84, EPSG:4326) if needed for visualization or further analysis.

---

### Key Notes:
- **Choosing a Suitable CRS:** 
   - Use a CRS appropriate for your region. For global data, EPSG:3857 is commonly used. For local data, find the UTM zone corresponding to your area.
   - Example for UTM: Replace `epsg=3857` with the appropriate EPSG code for your UTM zone.

- **Avoid Reverting to Geographic CRS:**
   - If you don't need to revert to EPSG:4326, keep the data in the projected CRS for further spatial operations.

Let me know if you need help determining the best CRS for your data!