In [1]:
import googlemaps
import pandas as pd

In [2]:
API_KEY = 'USE-YOUR-KEY'

## Extracting the closest transit spots within 2000 meters

In [3]:
# Initialize the Google Maps client
gmaps = googlemaps.Client(key=API_KEY)

# Example location
location = (33.7490, -84.3880)

results = gmaps.places_nearby(location=location, radius=2000, type='transit_station')
data = []

# Loop through each place and calculate driving distance
for place in results['results']:
    name = place['name']
    address = place.get('vicinity', 'Address not available')
    latitude = place['geometry']['location']['lat']
    longitude = place['geometry']['location']['lng']
    
    # Get directions from home_location to the transit stop
    directions = gmaps.directions(location, (latitude, longitude), mode="walking")
    
    # Get the distance from the directions response
    if directions:
        distance_meters = directions[0]['legs'][0]['distance']['value']
        distance_miles = distance_meters / 1609.34  # Convert meters to miles
    else:
        distance_miles = None
    
    data.append([name, address, latitude, longitude, distance_miles])

df = pd.DataFrame(data, columns=['Name', 'Address', 'Latitude', 'Longitude', 'Walking_Distance_miles'])
df.to_csv("transit_stops_with_walking_distance_miles.csv", index=False)


In [4]:
#5 different potential rental houses
houses = [
    {"id": 1, "location": (33.7490, -84.3880)}, 
    {"id": 2, "location": (34.0375, -84.5751)},
    {"id": 3, "location": (34.7530, -84.3870)}, 
    {"id": 4, "location": (33.7550, -84.3890)},
    {"id": 5, "location": (33.7570, -85.3860)} 
]

In [5]:
import pickle as pkl
with open('houses.pkl','rb') as f:
    houses = pkl.load(f)
houses

[{'id': 26, 'location': (37.782777, -122.204223)},
 {'id': 6, 'location': (37.752411, -122.164314)},
 {'id': 14, 'location': (37.826687, -122.283587)},
 {'id': 56, 'location': (37.81102, -122.276154)},
 {'id': 15, 'location': (37.78373, -122.230492)},
 {'id': 10, 'location': (37.813896, -122.267265)},
 {'id': 17, 'location': (37.796753, -122.277016)},
 {'id': 20, 'location': (37.796753, -122.277016)},
 {'id': 59, 'location': (37.875832, -122.266228)},
 {'id': 7, 'location': (37.796753, -122.277016)},
 {'id': 8, 'location': (37.796753, -122.277016)},
 {'id': 52, 'location': (37.767633, -122.196906)},
 {'id': 4, 'location': (37.796753, -122.277016)},
 {'id': 12, 'location': (37.796753, -122.277016)},
 {'id': 54, 'location': (37.734421, -122.158687)},
 {'id': 65, 'location': (37.813146, -122.242996)},
 {'id': 13, 'location': (37.826111, -122.251816)},
 {'id': 5, 'location': (37.796753, -122.277016)},
 {'id': 47, 'location': (37.837816, -122.303922)},
 {'id': 94, 'location': (37.575729, -1

In [6]:
transit_stops = []
for house in houses:
    transits = []
    results = gmaps.places_nearby(location=house['location'], radius=2000, type='transit_station')
    for place in results['results']:
        name = place['name']
        latitude = place['geometry']['location']['lat']
        longitude = place['geometry']['location']['lng']
        transits.append({"name":name,"location":(latitude,longitude)})
    transit_stops.append(transits)
    

In [7]:
len(transit_stops[0])

20

## Best house using Average distance to transits 

In [8]:
def get_average_distance(house_location, transit_stops, mode="walking"):
    total_distance = 0
    for stop in transit_stops:
        directions = gmaps.directions(house_location, stop["location"], mode=mode)
        if directions:
            distance_meters = directions[0]['legs'][0]['distance']['value']
            total_distance += distance_meters / 1609.34
    return total_distance / len(transit_stops)

In [9]:
house_distances = []
for i, house in enumerate(houses):
    if len(transit_stops[i]) == 0:  # If there are no transit stops found
        avg_distance = None
    else:
        avg_distance = get_average_distance(house["location"], transit_stops[i])
    
    house_distances.append({"House_ID": house["id"], "avg_dist": avg_distance})

df_dist = pd.DataFrame(house_distances)
print(df_dist.head(20))

    House_ID  avg_dist
0         26  0.731014
1          6  0.441579
2         14  0.593007
3         56  0.703549
4         15  0.579896
5         10  0.511732
6         17  0.705165
7         20  0.705165
8         59  0.432227
9          7  0.705165
10         8  0.705165
11        52  0.497129
12         4  0.705165
13        12  0.705165
14        54  0.785136
15        65  0.580673
16        13  0.599718
17         5  0.705165
18        47  0.702369
19        94  0.530155


In [10]:
best_house = df_dist.loc[df_dist['avg_dist'].idxmin()]
best_house

House_ID    23.000000
avg_dist     0.401158
Name: 88, dtype: float64

## Best house using Weighted Average distance to transits 

In [11]:
def get_weighted_distance(house_location, transit_stops, mode="walking"):
    total_weighted_distance = 0
    total_weight = 0
    for stop in transit_stops:
        directions = gmaps.directions(house_location, stop["location"], mode=mode)
        if directions:
            distance_meters = directions[0]['legs'][0]['distance']['value']
            distance_miles = distance_meters / 1609.34 
            weight = 1 / (distance_miles + 0.1)  # To avoid division by zero, adding a small constant
            total_weighted_distance += distance_miles * weight
            total_weight += weight
    return total_weighted_distance / total_weight if total_weight > 0 else None

In [12]:
house_distances = []
for i, house in enumerate(houses):
    if len(transit_stops[i]) == 0:  # If there are no transit stops found
        avg_distance = None
    else:
        avg_distance = get_weighted_distance(house["location"], transit_stops[i])
    
    house_distances.append({"House_ID": house["id"], "weight_avg_dist": avg_distance})

df_dist = pd.DataFrame(house_distances)
print(df_dist.head(20))

    House_ID  weight_avg_dist
0         26         0.338566
1          6         0.365010
2         14         0.470945
3         56         0.513993
4         15         0.510143
5         10         0.381791
6         17         0.522869
7         20         0.522869
8         59         0.330825
9          7         0.522869
10         8         0.522869
11        52         0.238037
12         4         0.522869
13        12         0.522869
14        54         0.665359
15        65         0.431872
16        13         0.485731
17         5         0.522869
18        47         0.509807
19        94         0.317471


In [13]:
best_house = df_dist.loc[df_dist['weight_avg_dist'].idxmin()]
best_house

House_ID           52.000000
weight_avg_dist     0.238037
Name: 11, dtype: float64

## Best house using k-Nearest Neighbors to transits 

In [14]:
from sklearn.neighbors import NearestNeighbors
import numpy as np

In [15]:
def get_total_distance(house_location, transit_stops, mode="walking"):
    distances = []
    for stop in transit_stops:
        directions = gmaps.directions(house_location, stop["location"], mode=mode)
        if directions:
            distance_meters = directions[0]['legs'][0]['distance']['value']
            distance_miles = distance_meters / 1609.34
            distances.append(distance_miles)
    return distances

In [16]:
house_distances = []
for i, house in enumerate(houses):
    if len(transit_stops[i]) == 0:  # If there are no transit stops found
        distances = None
    else:
        distances = get_total_distance(house["location"], transit_stops[i])
    if distances is not None:
        house_distances.append([i,distances])

In [17]:
final_prob_houses = [x[0] for x in house_distances]
final_prob_houses

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99]

In [30]:
final_house_distances = [x[1] for x in house_distances if (len(x[1]) == 20)] #A relaxation which includes the entries if they have at least 20 transit stops in their vicinity - this needs an edit
X = np.array(final_house_distances)

In [31]:
X.shape

(97, 20)

In [32]:
# K-NN (for K=3)
knn = NearestNeighbors(n_neighbors=3, metric='euclidean')
knn.fit(X)
distances, indices = knn.kneighbors(X)

In [33]:
average_distances = distances.mean(axis=1)
best_house_idx = np.argmin(average_distances)
best_house = houses[final_prob_houses[best_house_idx]]
best_house

{'id': 17, 'location': (37.796753, -122.277016)}