<a href="https://colab.research.google.com/github/aniruddhha/100-days-of-ml/blob/main/hotels_project_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install geopy tabulate



In [2]:
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.geocoders import GoogleV3
import matplotlib.pyplot as plt
from tabulate import tabulate
import time

In [4]:
def load_data(filepath):
    return pd.read_csv(filepath)

In [6]:
def save_data(df, filepath):
    df.to_csv(filepath, index=False)

In [5]:
def geocode_addresses(df, column_name):
    #geolocator = GoogleV3()
    geolocator = Nominatim(user_agent="my_request")
    latitudes = []
    longitudes = []

    for address in df[column_name]:
        while True:
            try:
                location = geolocator.geocode(address, timeout=10)
                if location is not None:
                    latitudes.append(location.latitude)
                    longitudes.append(location.longitude)
                else:
                    latitudes.append(0)
                    longitudes.append(0)
                break
            except Exception as e:
                print(f"Error geocoding {address}: {e}")
                time.sleep(5)

    df['latitude'] = latitudes
    df['longitude'] = longitudes

    return df

In [3]:
def create_location_table(df):
    location_data = []
    for index, row in df.iterrows():
        coordinates = (row['latitude'], row['longitude'])
        location_data.append([index, row['Street'], int(row['zipcode']), coordinates])

    headers = ['Index', 'Street Address', 'zipcode', 'Coordinates']
    table = tabulate(location_data, headers, tablefmt='grid')
    print(table)

In [7]:
def calculate_boundaries(latitudes, longitudes):
    min_lat = min(latitudes) - 2.5 / 69
    max_lat = max(latitudes) + 2.5 / 69
    min_lon = min(longitudes) - 2.5 / 54.6
    max_lon = max(longitudes) + 2.5 / 54.6
    return min_lat, max_lat, min_lon, max_lon

In [13]:
def distance(lat1, lon1, lat2, lon2):
    R = 3959  # Radius of the Earth in miles
    dlat = np.radians(lat2 - lat1)
    dlon = np.radians(lon2 - lon1)
    a = np.sin(dlat / 2) ** 2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance_miles = R * c
    return distance_miles

In [63]:
def calculate_distances(cloud_kitchens, service_stations):

    d_matrix = np.zeros((len(cloud_kitchens), len(service_stations)))

    for i in range(len(cloud_kitchens)):
      for j in range(len(service_stations)):
          d_matrix[i][j] = distance(cloud_kitchens[i][0], cloud_kitchens[i][1], service_stations[j][0], service_stations[j][1])

    df = pd.DataFrame(d_matrix)

    # Write the DataFrame to a CSV file
    df.to_csv('distance_matrix.csv', index=False)


    # headers = ['Cloud Kitchen Index', 'Service Station Index', 'Distance (miles)']
    # distance_table = tabulate(d_matrix, headers, tablefmt='grid')
    print(d_matrix[0])


In [8]:
def plot_locations(latitudes, longitudes, min_lat, max_lat, min_lon, max_lon, title):
    plt.scatter(longitudes, latitudes, label='Locations', c='red', marker='o')
    plt.xlim(min_lon, max_lon)
    plt.ylim(min_lat, max_lat)
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()

In [9]:
def generate_random_points(num_points, min_lat, max_lat, min_lon, max_lon):
    random_latitudes = np.random.uniform(min_lat, max_lat, num_points)
    random_longitudes = np.random.uniform(min_lon, max_lon, num_points)
    return random_latitudes, random_longitudes

In [65]:
def main():
    filepath_in = 'hotels.csv'
    filepath_out = 'Hotels with coordinates.csv'

    df = load_data(filepath_in)
    df = geocode_addresses(df, 'Street')
    save_data(df, filepath_out)



    min_lat, max_lat, min_lon, max_lon = calculate_boundaries(df['latitude'].dropna(), df['longitude'].dropna())

    # plot_locations(df['latitude'], df['longitude'], min_lat, max_lat, min_lon, max_lon, 'Cloud Kitchen Locations')

    random_latitudes, random_longitudes = generate_random_points(50, min_lat, max_lat, min_lon, max_lon)
    # plot_locations(random_latitudes, random_longitudes, min_lat, max_lat, min_lon, max_lon, 'Randomly Sampled Points within Boundaries')

    # create_location_table(df)

    latitude = df['latitude'].values
    longitude = df['longitude'].values
    cloud_kitchens = np.array(list(zip(latitude, longitude)))
    service_stations = np.array(list(zip(random_latitudes, random_longitudes)))
    # print(cloud_kitchens)
    # print(service_stations)

    calculate_distances(cloud_kitchens, service_stations)


In [64]:
main()

[2246.61703577  839.66457465 5123.81601178 3261.01089084 3933.80377359
 2215.03417851 3433.5832289  2960.47153499 3168.83416447  625.59920049
 2084.89298156 3918.31211008 1371.88589618 4464.69663474  997.33368057
 2378.17046508  530.51155406 2016.2372178  1399.91282434 2049.32976742
 3970.5515254  2339.06082847 2682.38567232 1411.81935221 3315.49698258
 2459.68814916 5482.03615414 2246.92108703  961.74054675 3151.41525521
 2052.63797656 2285.24351642 2777.41877305 3413.55850074 2447.96023986
 5172.52726557 1823.58394024 2813.5149291  4363.22223618 1480.36468906
 5158.18558472 1647.95079533 1699.39886423 4552.74761259 5519.67523165
 5434.5192187  3484.97210998 2785.21985572 3780.69662609 3237.97820776]
