In [1]:
import requests
import pandas as pd

#helper functions 

def get_places(api_key, place_type, location, radius=10000):
    """
    Fetches places of a specific type within a given radius from a location using Google Places API.
    
    :param api_key: Your Google Places API key
    :param place_type: Type of place (e.g., grocery_or_supermarket, pharmacy, gym)
    :param location: Latitude,Longitude of the center point
    :param radius: Radius to search within (in meters)
    :return: DataFrame of places with name, address, rating, and location
    """
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        "key": api_key,
        "location": location,
        "radius": radius,
        "type": place_type
    }
    response = requests.get(url, params=params)
    data = response.json()

    # Extract relevant details
    places = []
    for place in data.get('results', []):
        places.append({
            "Name": place.get('name'),
            "Address": place.get('vicinity'),
            "Rating": place.get('rating', 'N/A'),
            "Location": place.get('geometry', {}).get('location', {})
        })

    return pd.DataFrame(places)

def prepare_location_data(api_key, center_location, place_types):

    '''
        creates a dictionary of all the desired/undesired amenities/locations in a geographic location
    '''
    
    location_data = {}
    
    for place_type in place_types:
        df = get_places(api_key, place_type, center_location)
        coords = [f"{row['Location']['lat']},{row['Location']['lng']}" 
                  for _, row in df.iterrows() 
                  if 'lat' in row['Location'] and 'lng' in row['Location']]
        location_data[place_type] = coords
    
    return location_data


def get_distances(origin, destinations, api_key):
    url = "https://maps.googleapis.com/maps/api/distancematrix/json"
    params = {
        "origins": origin,
        "destinations": "|".join(destinations),
        "mode": 'transit',
        "key": api_key
    }
    response = requests.get(url, params=params)
    data = response.json()

    #return [element['distance']['value'] for element in data['rows'][0]['elements']]
    distances = []
    durations = []
    fares = []
    
    for element in data['rows'][0]['elements']:
        if element['status'] == 'OK':
            distances.append(element['distance']['value'])  # in meters
            durations.append(element['duration']['value'])  # in seconds
            fare = element.get('fare', {}).get('value', None)  # in the currency's smallest unit
            fares.append(fare)
        else:
            distances.append(None)
            durations.append(None)
            fares.append(None)
    
    return distances, durations, fares

def get_distance_and_duration(origin, destination, api_key):
    # Define the endpoint URL
    url = "https://maps.googleapis.com/maps/api/distancematrix/json"
    
    # Set up the parameters for the API request
    params = {
        "origins": origin,
        "destinations": destination,
        "mode": "transit",  # Use 'driving' for driving directions
        "key": api_key
    }
    
    # Make the GET request to the API
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        # Check if the response contains valid data
        if data['status'] == 'OK':
            # Extract the distance and duration
            element = data['rows'][0]['elements'][0]
            distance = element['distance']['value']
            duration = element['duration']['value']
            return distance, duration
        else:
            return None, f"Error: {data['status']}"
    else:
        return None, f"HTTP Error: {response.status_code}"






In [3]:
# Example Usage
api_key = "AIzaSyCaOWXoABSdgWZYGCRlEiAGyRnHtuha_D0"

In [4]:
df = pd.read_csv('data/df_all_listResults.csv')
budget_min = 1000
budget_max = 1500
rooms = 1
filtered_df = df[(df['price']>=budget_min) 
                 & (df['price']>=budget_max) & (df['beds']==rooms)]
rentals = filtered_df[['latitude','longitude']]
rentals.rename(columns={'latitude': 'lat', 'longitude': 'lng'}, inplace=True)
rentals = rentals.to_dict(orient='records')
priorities = ['supermarket', 'park', 'cafe']
frequencies = [2, 4, 1]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rentals.rename(columns={'latitude': 'lat', 'longitude': 'lng'}, inplace=True)


In [6]:
def find_nearest_place(api_key, location, place_type, radius=2000):
    """
    Finds the nearest place of a specified type to the given latitude and longitude.

    Parameters:
    - api_key: str - Your Google API key.
    - latitude: float - Latitude of the origin point.
    - longitude: float - Longitude of the origin point.
    - place_type: str - Type of place to search for (e.g., 'supermarket', 'school').
    - radius: int - Search radius in meters (default is 5000 meters).

    Returns:
    - dict: Information about the nearest place, including name, address, distance in meters, and duration in seconds.
    """
    # Define the location and search parameters
    places_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    places_params = {
        "location": location,
        "radius": radius,
        "type": place_type,
        "key": api_key
    }

    # Perform the Places API request
    places_response = requests.get(places_url, params=places_params)
    places_data = places_response.json()

    # Check if any places were found
    if not places_data.get("results"):
        return {"error": "No places found within the specified radius."}

    # Initialize variables to track the nearest place
    nearest_place = None
    shortest_distance = float("inf")

    # Iterate through the found places
    for place in places_data["results"]:
        place_location = place["geometry"]["location"]
        destination = f"{place_location['lat']},{place_location['lng']}"

        # Define the Distance Matrix API parameters
        distance_url = "https://maps.googleapis.com/maps/api/distancematrix/json"
        distance_params = {
            "origins": location,
            "destinations": destination,
            "key": api_key
        }

        # Perform the Distance Matrix API request
        distance_response = requests.get(distance_url, params=distance_params)
        distance_data = distance_response.json()

        # Extract the distance and duration information
        element = distance_data["rows"][0]["elements"][0]
        if element["status"] == "OK":
            distance = element["distance"]["value"]  # Distance in meters
            duration = element["duration"]["value"]  # Duration in seconds

            # Check if this place is closer than the current nearest
            if distance < shortest_distance:
                shortest_distance = distance
                nearest_place = {
                    "name": place.get("name"),
                    "address": place.get("vicinity"),
                    "distance_meters": distance,
                    "duration_seconds": duration
                }

    return nearest_place

In [7]:
import requests

def find_nearest_place(api_key, location, place_type, radius=2000):
    """
    Finds the nearest place of a specified type to the given latitude and longitude.

    Parameters:
    - api_key: str - Your Google API key.
    - location: str - Latitude and longitude of the origin point in 'lat,lng' format.
    - place_type: str - Type of place to search for (e.g., 'supermarket', 'school').
    - radius: int - Search radius in meters (default is 2000 meters).

    Returns:
    - dict: Information about the nearest place, including name, address, distance in meters,
            duration in seconds, and transit fare (if available).
    """
    # Define the Places API endpoint and parameters
    places_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    places_params = {
        "location": location,
        "radius": radius,
        "type": place_type,
        "key": api_key
    }

    # Perform the Places API request
    places_response = requests.get(places_url, params=places_params)
    places_data = places_response.json()

    # Check if any places were found
    if not places_data.get("results"):
        return {
                    "name": None,
                    "address": None,
                    "distance_meters": None,
                    "duration_seconds": None,
                    "transit_fare": None
                }

    # Initialize variables to track the nearest place
    nearest_place = None
    shortest_distance = float("inf")

    # Iterate through the found places
    for place in places_data["results"]:
        place_location = place["geometry"]["location"]
        destination = f"{place_location['lat']},{place_location['lng']}"

        # Define the Distance Matrix API parameters
        distance_url = "https://maps.googleapis.com/maps/api/distancematrix/json"
        distance_params = {
            "origins": location,
            "destinations": destination,
            "mode": "transit",
            "key": api_key
        }

        # Perform the Distance Matrix API request
        distance_response = requests.get(distance_url, params=distance_params)
        distance_data = distance_response.json()

        # Extract the distance, duration, and fare information
        element = distance_data["rows"][0]["elements"][0]
        if element["status"] == "OK":
            distance = element["distance"]["value"]  # Distance in meters
            duration = element["duration"]["value"]  # Duration in seconds
            fare = element.get("fare", {}).get("value")  # Fare in the local currency

            # Check if this place is closer than the current nearest
            if distance < shortest_distance:
                shortest_distance = distance
                nearest_place = {
                    "name": place.get("name"),
                    "address": place.get("vicinity"),
                    "location": place.get('geometry', {}).get('location'),
                    "distance_meters": distance,
                    "duration_seconds": duration,
                    "transit_fare": fare
                }

    return nearest_place


In [70]:
x = find_nearest_place(api_key, '47.658050,-122.321150', 'supermarket')

In [8]:
def get_nearest_place_row(row, api_key, place_type, radius=2000):

    # Construct the location string from the DataFrame row
    location = f"{row['latitude']},{row['longitude']}"

    # Use the find_nearest_place function to get the nearest place information
    result = find_nearest_place(api_key, location, place_type, radius)

    # Return the relevant information as a pandas Series
    return pd.Series({
        'nearest_place_name': result.get('name'),
        'nearest_place_distance_meters': result.get('distance_meters'),
        'commute_duration_seconds': result.get('duration_seconds'),
        'commute_fare': result.get('transit_fare'),
        'location': result.get('location')
    })

In [86]:
#import dask.dataframe as dd

## Add supermarket details
ddf = dd.from_pandas(df, npartitions=4)  # Adjust the number of partitions as needed

# Apply the function in parallel
result = ddf.map_partitions(lambda df: df.apply(
    get_nearest_place_row,
    axis=1,
    api_key=api_key,
    place_type='supermarket',
    radius=2000
)).compute()

# Assign the results back to the original DataFrame
df[['nearest_supermarket', 'nearest_supermarket_distance_meters', 'supermarket_duration_seconds', 'supermarket_commute_fare', 'supermarket_location']] = result


KeyboardInterrupt: 

In [None]:
import dask.dataframe as dd
ddf = dd.from_pandas(df, npartitions=4)  # Adjust the number of partitions as needed

# Apply the function in parallel
result = ddf.map_partitions(lambda df: df.apply(
    get_nearest_place_row,
    axis=1,
    api_key=api_key,
    place_type='park',
    radius=2000
)).compute()

# Assign the results back to the original DataFrame
df[['nearest_park', 'nearest_park_distance_meters', 'nearest_park_seconds', 'park_commute_fare', 'park_location']] = result


In [15]:
import dask.dataframe as dd
from dask.diagnostics import ProgressBar

# Create Dask DataFrame
ddf = dd.from_pandas(df, npartitions=4)  # Adjust the number of partitions as needed

# Set up the progress bar
with ProgressBar():
    # Apply the function in parallel with the progress bar
    result = ddf.map_partitions(lambda df: df.apply(
        get_nearest_place_row,
        axis=1,
        api_key=api_key,
        place_type='park',
        radius=2000
    )).compute()

# Assign the results back to the original DataFrame
df[['nearest_park', 'nearest_park_distance_meters', 'nearest_park_seconds', 'park_commute_fare', 'park_location']] = result


[                                        ] | 0% Completed | 16m 49sss


ConnectionError: HTTPSConnectionPool(host='maps.googleapis.com', port=443): Max retries exceeded with url: /maps/api/distancematrix/json?origins=47.61245%2C-122.34818&destinations=47.6270967%2C-122.3371302&mode=transit&key=AIzaSyCaOWXoABSdgWZYGCRlEiAGyRnHtuha_D0 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x127dbeac0>: Failed to resolve 'maps.googleapis.com' ([Errno 8] nodename nor servname provided, or not known)"))

In [17]:
!pip3 install dask

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [19]:
import dask
from dask import delayed

@delayed
def get_nearest_place_row_delayed(row, api_key, place_type, radius):
    return get_nearest_place_row(row, api_key, place_type, radius)

# Apply the function in parallel
result = [get_nearest_place_row_delayed(row, api_key, 'park', 2000) for _, row in filtered_df.iterrows()]
result = dask.compute(*result)

In [31]:
type(result[0])

pandas.core.series.Series

In [32]:
result_dicts = [row.to_dict() for row in result]

# Create a DataFrame from the list of dictionaries
df_result = pd.DataFrame(result_dicts)


In [34]:
df[['nearest_park', 'nearest_park_distance_meters', 'supermarket_park_seconds', 'park_commute_fare', 'park_location']] = df_result

In [36]:
df2 = pd.read_csv('df_all_Results2.csv')

In [37]:
df2[['nearest_park', 'nearest_park_distance_meters', 'supermarket_park_seconds', 'park_commute_fare', 'park_location']]=df[['nearest_park', 'nearest_park_distance_meters', 'supermarket_park_seconds', 'park_commute_fare', 'park_location']]

In [40]:
df2.rename(columns={'supermarket_park_seconds': 'nearest_park_seconds'}, inplace=True)

In [46]:
df2.to_csv('final_df.csv')