# Geographic Calculations
## Enviromnemt

In [1]:
# General libraries
import pandas as pd

In [20]:
#Libraries for geographic calculations
from geopy.geocoders import Nominatim # Importing the geopy library and Nominatim class
from geopy.exc import GeocoderTimedOut
from geopy.distance import Distance
from geopy.distance import geodesic

## Functions related to Input

In [8]:
# Internal Input: Function to read the internal file and extract data
def read_excel_to_df(file_path):
    try:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(file_path, engine='openpyxl')
        
        # Check if the DataFrame has the correct number of columns and they are in the expected order
        expected_columns = ['code', 'date', 'event', 'city', 'venue', 'accom.Code', 'flight.Code']
        if len(df.columns) != len(expected_columns) or not all(df.columns == expected_columns):
            # If columns do not match, raise an error
            raise ValueError("Error: Unexpected input form. Please insert a file containing " +
                             "\"code\", \"date\", \"event\", \"city\", \"venue\", \"accommodation code\" and \"flight code\" in this order.")

        # Convert the 'date' column to datetime dtype
        df['date'] = pd.to_datetime(df['date'])

        # Return the DataFrame
        return df
    
    except ValueError as ve:
        print(ve)
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

## Functions related to Data Cleaning

In [84]:
# Function that takes a DataFrame, removes rows that contain "Nearby - " in the Location name, and returns the cleaned DataFrame
def remove_nearby(df):
    # Remove rows that contain "Nearby - " in the Location name
    df = df[~df['Location'].str.contains("Nearby - ", na=False)]
    return df

## Functions related to Geographic Calculations

In [87]:
# Function to obtain the coordinates of a list of places
def get_coordinates(places):
    geolocator = Nominatim(user_agent="Geopy Library")
    coordinates = []  # Use a list to store coordinate data
    
    for place in places:
        try:
            location = geolocator.geocode(place)
            if location:
                # Append the place and its coordinates as a list
                coordinates.append([place, location.latitude, location.longitude])
            else:
                # Append the place and a message indicating location was not found
                coordinates.append([place, "Location not found", "Location not found"])
        except Exception as e:
            # Append the place and the error occurred
            coordinates.append([place, f"Error occurred: {str(e)}", f"Error occurred: {str(e)}"])
    
    return coordinates

In [108]:
# Not used - not sure where to save the results - TBD
# Function to calculate the distance between a base location and a list of other places
def calculate_distances(base_coords, other_places, initial_list):
    results = []
    # Extract the base coordinates
    base_latitude = base_coords[0][1]
    base_longitude = base_coords[0][2]

    # Iterate through the list of other places
    for i in range(len(other_places)):
        # Calculate distance between base coordinates and other places
        distance = geodesic((base_latitude, base_longitude), (other_places[i][1], other_places[i][2])).kilometers
        results.append(distance)
        # Insert the distance into the initial list as the last element
        initial_list[i]['distance'].append(distance)


## Geographic Calculations

In [119]:
# For testing purposes
# Load accommodation data from excel file
file_path_accom = '/Users/danielavandasova/Library/CloudStorage/OneDrive-Personal/Documents/3_IES/8-Semestr/Data Analysis in Python/Python_Project/locations2.xlsx'
accommodation_data = pd.read_excel(file_path_accom)

# Load internal database from excel file
file_path_internal = '/Users/danielavandasova/Library/CloudStorage/OneDrive-Personal/Documents/3_IES/8-Semestr/Data Analysis in Python/Python_Project/internal-data.xlsx'
internal_data = read_excel_to_df(file_path_internal)

In [120]:
# Clean accommodation data: Remove rows that contain "Nearby - " in the Location name
accommodation_data = remove_nearby(accommodation_data)

In [121]:
places = get_coordinates(accommodation_data['Location'])

In [122]:
# Working wersion - city and venue pbtained differently in the main code
base_coords = get_coordinates([internal_data['venue'][16]])

In [123]:
# Extract the base coordinates
base_latitude = base_coords[0][1]
base_longitude = base_coords[0][2]

In [124]:

for i in range(len(places)):
    #Calculate distance between base coordinates and other places
    distance = geodesic((base_latitude, base_longitude), (places[i][1], places[i][2]))
    print(distance)
    

516.6058863495922 km
18.731899773539357 km
18.731899773539357 km
13.889254312530568 km
17.166205019780246 km
14.736198195954682 km
13.889254312530568 km
18.731899773539357 km
18.731899773539357 km
13.889254312530568 km
516.6058863495922 km
18.731899773539357 km
18.731899773539357 km
13.889254312530568 km
16.251044923671753 km
13.889254312530568 km
13.889254312530568 km
18.731899773539357 km
17.166205019780246 km
13.889254312530568 km
13.889254312530568 km
13.889254312530568 km
18.731899773539357 km
116.99614856788858 km
17.34569683250562 km
13.889254312530568 km
18.087773072883596 km
516.6058863495922 km
516.6058863495922 km
18.731899773539357 km
13.889254312530568 km
17.166205019780246 km
13.889254312530568 km
14.736198195954682 km
13.889254312530568 km
18.087773072883596 km
18.731899773539357 km
18.731899773539357 km
18.087773072883596 km
13.889254312530568 km
18.731899773539357 km
13.889254312530568 km
18.731899773539357 km
18.731899773539357 km
13.889254312530568 km
13.889254312530