In [325]:
import json
import os
import re

import openrouteservice as ors
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from geopy.geocoders import Nominatim
from scipy.interpolate import Rbf

In [326]:
# Set your city manually here
city = "Fort Worth, TX"  # Change this as needed

# Set max walking time
max_walking_time = 60  # in minutes

# Set max price
max_price = 1250  # in dollars

In [327]:
# Define the pattern for the city (case insensitive)
city_pattern = re.compile(re.escape(city), re.IGNORECASE)

# Open and read the file
with open("listings.txt", "r") as file:
    data = file.read()

# Find all lines that contain the city pattern
matches = [line for line in data.splitlines() if city_pattern.search(line)]

# Organize data by address (set to store unique addresses)
unique_addresses = []
address_prices = {}  # To store prices for each address

# Go through each match and process data
for match in matches:
    # If there's a vertical bar '|', get everything after it (excluding space)
    if '|' in match:
        match = match.split('|', 1)[1].lstrip()  # Extract after the bar, remove leading spaces

    # Add to list of unique addresses
    if match not in unique_addresses:
        unique_addresses.append(match)
        address_prices[match] = set()  # Use a set to prevent duplicate prices

# Iterate through the file again, this time collecting prices for each address
current_address = None

# Extract prices from the data
for line in data.splitlines():
    # Check if the line contains any of the unique addresses
    for address in unique_addresses:
        if address in line:
            current_address = address  # Set current address when we find it
            break

    if current_address:  # If we've set a current address, check for prices
        price_matches = re.findall(r'\$(\d{1,3}(?:,\d{3})*)', line)  # Match prices
        
        if price_matches:
            for price in price_matches:
                address_prices[current_address].add(price)  # Use set to avoid duplicates

# Now, convert the address_prices dictionary into a DataFrame
rows = []
columns = ['Address', 'Price']

# Prepare rows for the DataFrame
for address, prices in address_prices.items():
    for price in prices:
        rows.append([address, price])

df = pd.DataFrame(rows, columns=columns)

In [328]:
pd.set_option('display.max_colwidth', None)

df.head()

Unnamed: 0,Address,Price
0,"6351 Hulen Bend Blvd, Fort Worth, TX",795
1,"6351 Hulen Bend Blvd, Fort Worth, TX",1247
2,"6751 Westcreek Dr, Fort Worth, TX",799
3,"3500 Renzel Blvd, Fort Worth, TX",1199
4,"3500 Renzel Blvd, Fort Worth, TX",845


In [329]:
# OpenRouteService Client
client = ors.Client(key='5b3ce3597851110001cf624817e35411a1444fc7901b0872949c4b86')

# Initialize Geolocator for address geocoding
geolocator = Nominatim(user_agent="address_optimizer")

# Fixed intermediate addresses (coordinates)
intermediate_coords = [
    (32.74840963982297, -97.32497439905646),  # Address 1
    (32.747705801640954, -97.35808003618311),  # Address 2
    (32.7515678225884, -97.35133168289146)     # Address 3
]

# Function to calculate the centroid of the intermediate locations
def calculate_centroid(coords):
    latitudes = [coord[0] for coord in coords]
    longitudes = [coord[1] for coord in coords]
    return sum(latitudes) / len(coords), sum(longitudes) / len(coords)

# Calculate the centroid
centroid_lat, centroid_lon = calculate_centroid(intermediate_coords)

# Define cache file and load cache if it exists.
cache_file = 'walking_times_cache.json'
if os.path.exists(cache_file):
    with open(cache_file, 'r') as f:
        cache = json.load(f)
else:
    cache = {}

# List to store results for each address
walk_times = []

# Iterate over each address in the DataFrame 'df'
for index, row in df.iterrows():
    address = row['Address']
    # Create a unique key for the address and centroid
    key = f"{address}_{centroid_lat}_{centroid_lon}"
    
    if key in cache:
        walking_time_minutes = cache[key]
    else:
        # Geocode the address
        location = geolocator.geocode(address)
        if location:
            address_lat = location.latitude
            address_lon = location.longitude
            
            # Request walking route from the address to the centroid
            route = client.directions(
                coordinates=[(address_lon, address_lat), (centroid_lon, centroid_lat)], 
                profile='foot-walking', 
                format='geojson'
            )
            # Extract walking time (in seconds) then convert to minutes
            if route.get('features'):
                walking_time_seconds = route['features'][0]['properties']['segments'][0]['duration']
                walking_time_minutes = walking_time_seconds / 60
            else:
                walking_time_minutes = None
        else:
            walking_time_minutes = None
        
        # Save result in cache
        cache[key] = walking_time_minutes

        # Save updated cache to file
        with open(cache_file, 'w') as f:
            json.dump(cache, f, indent=2)
    
    # Append result to list
    walk_times.append({
        'Address': address,
        'Price': row['Price'],
        'Walking Time to Centroid (minutes)': walking_time_minutes
    })

# Convert results into a DataFrame for better readability
walk_times_df = pd.DataFrame(walk_times)

In [330]:
# Remove commas from the 'Price' column and convert it to numeric
walk_times_df['Price'] = walk_times_df['Price'].str.replace(',', '').astype(float)

walk_times_df = walk_times_df[walk_times_df['Walking Time to Centroid (minutes)'] < max_walking_time]
walk_times_df = walk_times_df[walk_times_df['Price'] < max_price]

# Sort the DataFrame based on 'Walking Time to Centroid (minutes)'
walk_times_df_sorted = walk_times_df.sort_values(by='Walking Time to Centroid (minutes)', ascending=True)

# Save the sorted DataFrame to a new CSV file
walk_times_df_sorted.to_csv('walk_times_sorted.csv', index=False)

# Display the DataFrame
walk_times_df_sorted

Unnamed: 0,Address,Price,Walking Time to Centroid (minutes)
43,"1001 W 7th St, Fort Worth, TX",1190.0,16.696667
41,"1012 Burnett St, Fort Worth, TX",1184.0,19.608333
97,"Magnolia on Park, 2901 W 5th St APT 302, Fort Worth, TX 76107",1149.0,24.0
36,"929 Norwood St, Fort Worth, TX",1150.0,27.528333
39,"3125 Sondra Dr, Fort Worth, TX",1175.0,34.093333
40,"3125 Sondra Dr, Fort Worth, TX",1182.0,34.093333
31,"555 Elm St, Fort Worth, TX",1106.0,34.726667
94,"520 Samuels Ave, Fort Worth, TX",1143.0,37.763333
50,"320 E Broadway Ave, Fort Worth, TX",1234.0,38.905
79,"3529 W 7th St, Fort Worth, TX",900.0,39.606667


In [331]:
walk_times_df_sorted = pd.read_csv('walk_times_sorted.csv')

In [332]:
# Count dominated points
walk_times_df_sorted['Dominated Count'] = 0
n = len(walk_times_df_sorted)
for i in range(n):
    for j in range(n):
        if (walk_times_df_sorted.iloc[i]['Price'] < walk_times_df_sorted.iloc[j]['Price'] and
            walk_times_df_sorted.iloc[i]['Walking Time to Centroid (minutes)'] < walk_times_df_sorted.iloc[j]['Walking Time to Centroid (minutes)']):
            walk_times_df_sorted.at[i, 'Dominated Count'] += 1

# Create the scatter plot
fig = px.scatter(
    walk_times_df_sorted, 
    x='Price', 
    y='Walking Time to Centroid (minutes)', 
    hover_name='Address',
    title='Price vs Walking Time',
    labels={'Price': 'Price ($)', 'Walking Time to Centroid (minutes)': 'Walking Time (minutes)'},
    color='Dominated Count',
    color_continuous_scale='Viridis',
    template='plotly_dark'
)

# Set margins and grid range
x_margin = (walk_times_df_sorted['Price'].max() - walk_times_df_sorted['Price'].min()) * 0.05
y_margin = (walk_times_df_sorted['Walking Time to Centroid (minutes)'].max() - walk_times_df_sorted['Walking Time to Centroid (minutes)'].min()) * 0.05
x_min = 500
x_max = walk_times_df_sorted['Price'].max() + x_margin
y_min = 0
y_max = walk_times_df_sorted['Walking Time to Centroid (minutes)'].max() + y_margin

fig.update_layout(
    xaxis=dict(type='linear', title='Price ($)', range=[x_min, x_max]),
    yaxis=dict(title='Walking Time (minutes)', range=[y_min, y_max]),
    xaxis_gridcolor='rgba(255, 255, 255, 0.05)',  # Slightly transparent gridlines for x-axis
    yaxis_gridcolor='rgba(255, 255, 255, 0.05)',  # Slightly transparent gridlines for y-axis
    xaxis_zerolinecolor='rgba(255, 255, 255, 0.5)',  # Transparent zero line for x-axis
    yaxis_zerolinecolor='rgba(255, 255, 255, 0.5)'   # Transparent zero line for y-axis
)

# Create grid for interpolation over full plot area
price_range = np.linspace(x_min, x_max, 200)
time_range = np.linspace(y_min, y_max, 200)
price_grid, time_grid = np.meshgrid(price_range, time_range)

# Use Rbf for continuous interpolation
points = walk_times_df_sorted[['Price', 'Walking Time to Centroid (minutes)']].values
values = walk_times_df_sorted['Dominated Count'].values
rbf = Rbf(points[:,0], points[:,1], values, function='inverse', smooth=0)
grid_z = rbf(price_grid, time_grid)

# Add translucent contour layer with hover disabled
fig.add_trace(go.Contour(
    x=price_range,
    y=time_range,
    z=grid_z,
    colorscale='Viridis',
    opacity=0.5,  # Decreased opacity to make the grid more transparent
    contours=dict(showlines=False),
    showscale=False,
    name='Dominated Count Distribution',
    hoverinfo='skip'
))

# Update marker to add a thin border around each point and set size to 8
fig.update_traces(
    marker=dict(
        size=10,  # Set point size to 8
        opacity=1, 
        line=dict(width=1, color='DarkSlateGrey')  # Thin border around the points
    ),
    selector=dict(mode='markers')
)

fig.show()

In [333]:
fig.write_html("pareto.html")

In [334]:
walk_times_df_sorted = walk_times_df_sorted.sort_values(by='Dominated Count', ascending=False)

walk_times_df_sorted

Unnamed: 0,Address,Price,Walking Time to Centroid (minutes),Dominated Count
2,"Magnolia on Park, 2901 W 5th St APT 302, Fort Worth, TX 76107",1149.0,24.0,6
3,"929 Norwood St, Fort Worth, TX",1150.0,27.528333,5
9,"3529 W 7th St, Fort Worth, TX",900.0,39.606667,5
6,"555 Elm St, Fort Worth, TX",1106.0,34.726667,4
0,"1001 W 7th St, Fort Worth, TX",1190.0,16.696667,3
1,"1012 Burnett St, Fort Worth, TX",1184.0,19.608333,3
4,"3125 Sondra Dr, Fort Worth, TX",1175.0,34.093333,3
5,"3125 Sondra Dr, Fort Worth, TX",1182.0,34.093333,3
7,"520 Samuels Ave, Fort Worth, TX",1143.0,37.763333,3
10,"Ramble & Rose Apartments, 501 W Rosedale St #449, Fort Worth, TX 76104",1228.0,45.0,1
