# Data Processing in Python, Summer 2023/24, Final Project
Daniela Vandasová, Ondřej Beneš

# Spectators Unlimited Ltd.
## Environment

In [67]:
# General libraries
import pandas as pd

In [68]:
# Libraries for web scraping
from time import sleep
from selenium import webdriver
from bs4 import BeautifulSoup
import os
from itertools import product
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

In [11]:
#Libraries for geographic calculations
from geopy.geocoders import Nominatim # Importing the geopy library and Nominatim class
from geopy.exc import GeocoderTimedOut
from geopy.distance import geodesic

ModuleNotFoundError: No module named 'geopy'

## Functions

### Functions Related to Geographic Calculations

In [None]:
# Function to obtain the coordinates of a list of places
def get_coordinates(places):
    geolocator = Nominatim(user_agent="Geopy Library")
    coordinates = []  # Use a list to store coordinate data
    
    for place in places:
        try:
            location = geolocator.geocode(place)
            if location:
                # Append the place and its coordinates as a list
                coordinates.append([place, location.latitude, location.longitude])
            else:
                # Append the place and a message indicating location was not found
                coordinates.append([place, "Location not found", "Location not found"])
        except Exception as e:
            # Append the place and the error occurred
            coordinates.append([place, f"Error occurred: {str(e)}", f"Error occurred: {str(e)}"])
    
    return coordinates

In [None]:
# Function to calculate the distance between a base location and a list of other places
def calculate_distances(base_coords, other_places):
    results = []
    # Extract the base coordinates
    base_latitude, base_longitude = base_coords

    # Iterate through the list of other places
    for place in other_places:
        place_name, latitude, longitude = place
        # Calculate the distance using the geodesic function
        distance = geodesic((base_latitude, base_longitude), (latitude, longitude)).kilometers
        # Append the place name and distance to the results list
        results.append([place_name, distance])
    
    return results

### Functions Related to Inputs

In [None]:
# User Input: Function to check code and return error message if correct input code not found
def check_code(primary_code, internal_list):
    # Create a set of codes from internal list for easy comparison
    internal_codes = {item[3] for item in internal_list}
    
    # Check if the primary code exists in the internal list of codes
    if primary_code in internal_codes:
        pass
    else:
        print(f"Error: Code {primary_code} is not a valid input. Please check the code and try again.")

In [None]:
# User Input: Function to read the input file and extract data
def read_code(file_path, internal_list):
    with open(file_path, 'r') as file:
        file.readline()  # Skip the first line as it's not needed for the code
        primary_code = file.readline().strip()  # Read the second line for primary code

        # Check if the primary code exists in the internal list of codes
        check_code(primary_code, internal_list)
    
    return primary_code

In [8]:
# Internal Input: Function to read the internal file and extract data
def read_excel_to_df(file_path):
    try:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(file_path, engine='openpyxl')
        
        # Check if the DataFrame has the correct number of columns and they are in the expected order
        expected_columns = ['city', 'venue', 'event', 'date', 'code']
        if len(df.columns) != len(expected_columns) or not all(df.columns == expected_columns):
            # If columns do not match, raise an error
            raise ValueError("Error: Unexpected input form. Please insert a file containing " +
                             "\"city\", \"venue\", \"event\", \"date\" and \"code\" in this order.")

        # Convert the 'date' column to datetime dtype
        df['date'] = pd.to_datetime(df['date'])

        # Return the DataFrame
        return df
    
    except ValueError as ve:
        print(ve)
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

## Internal Data

In [6]:
# Addresses of venues - football stadiums of EURO 2024 and Taylor Swift Eras tour 2024 Europe; FOR LEGACY PURPOSES
venues = [
    "Olympia Stadion Berlin, Berlin",
    "Fußball Arena München, Munich",
    "BVB Stadion Dortmund, Dortmund",
    "Stuttgart Arena, Stuttgart",
    "Arena AufSchalke, Gelsenkirchen",
    "Frankfurt Arena, Frankfurt",
    "Volksparkstadion, Hamburg",
    "Düsseldorf Arena, Düsseldorf",
    "Cologne Stadium, Cologne",
    "Leipzig Stadium, Leipzig",
    "Paris La Défense Arena, Paris",
    "Friends Arena, Stockholm",
    "Estádio da Luz, Lisbon",
    "ESTADIO SANTIAGO BERNABÉU, Madrid",
    "GROUPAMA STADIUM, Lyon",
    "SCOTTISH GAS MURRAYFIELD STADIUM, Edinburgh",
    "ANFIELD STADIUM, Liverpool",
    "PRINCIPALITY STADIUM, Cardiff",
    "Wembley Stadium, London",
    "AVIVA STADIUM, Dublin",
    "JOHAN CRUIJFF ARENA, Amsterdam",
    "STADION LETZIGRUND ZÜRICH, Zurich",
    "SAN SIRO STADIUM, Milan",
    "VELTINS-ARENA, Gelsenkirchen",
    "OLYMPIASTADION, Munich",
    "PGE NARODOWY, Warsaw",
    "ERNST-HAPPEL-STADION, Vienna"
]

In [None]:
# Accomodation locations; FOR TESTING PURPOSES
accomodation_locations = [
    "Eixample, Barcelona",
    "Gracia, Barcelona",
    "Sant-Montjuïc, Barcelona"
]

In [3]:
# List containing the event dates, locations and event codes; FOR LEGACY PURPOSES
event_dates = [
    ["Munich", "Germany vs. Scotland", pd.to_datetime("2024-06-14"), "euro01"],
    ["Berlin", "Spain vs. Croatia", pd.to_datetime("2024-06-15"), "euro02"],
    ["Dortmund", "Italy vs. Albania", pd.to_datetime("2024-06-15"), "euro03"],
    ["Cologne", "Hungary vs. Switzerland", pd.to_datetime("2024-06-15"), "euro04"],
    ["Hamburg", "Poland vs. Netherlands", pd.to_datetime("2024-06-16"), "euro05"],
    ["Gelsenkirchen", "England vs. Serbia", pd.to_datetime("2024-06-16"), "euro06"],
    ["Stuttgart", "Slovenia vs. Denmark", pd.to_datetime("2024-06-16"), "euro07"],
    ["Düsseldorf", "Austria vs. France", pd.to_datetime("2024-06-17"), "euro08"],
    ["Frankfurt", "Belgium vs. Slovakia", pd.to_datetime("2024-06-17"), "euro09"],
    ["Munich", "Romania vs. Ukraine", pd.to_datetime("2024-06-17"), "euro10"],
    ["Leipzig", "Portugal vs. Czechia", pd.to_datetime("2024-06-18"), "euro11"],
    ["Dortmund", "Türkiye vs. Georgia", pd.to_datetime("2024-06-18"), "euro12"],
    ["Hamburg", "Croatia vs. Albania", pd.to_datetime("2024-06-19"), "euro13"],
    ["Cologne", "Scotland vs. Switzerland", pd.to_datetime("2024-06-19"), "euro14"],
    ["Stuttgart", "Germany vs. Hungary", pd.to_datetime("2024-06-19"), "euro15"],
    ["Gelsenkirchen", "Spain vs. Italy", pd.to_datetime("2024-06-20"), "euro16"],
    ["Frankfurt", "Denmark vs. England", pd.to_datetime("2024-06-20"), "euro17"],
    ["Munich", "Slovenia vs. Serbia", pd.to_datetime("2024-06-20"), "euro18"],
    ["Berlin", "Poland vs. Austria", pd.to_datetime("2024-06-21"), "euro19"],
    ["Leipzig", "Netherlands vs. France", pd.to_datetime("2024-06-21"), "euro20"],
    ["Düsseldorf", "Slovakia vs. Ukraine", pd.to_datetime("2024-06-21"), "euro21"],
    ["Hamburg", "Georgia vs. Czechia", pd.to_datetime("2024-06-22"), "euro22"],
    ["Dortmund", "Türkiye vs. Portugal", pd.to_datetime("2024-06-22"), "euro23"],
    ["Cologne", "Belgium vs. Romania", pd.to_datetime("2024-06-22"), "euro24"],
    ["Frankfurt", "Switzerland vs. Germany", pd.to_datetime("2024-06-23"), "euro25"],
    ["Stuttgart", "Scotland vs. Hungary", pd.to_datetime("2024-06-23"), "euro26"],
    ["Leipzig", "Croatia vs. Italy", pd.to_datetime("2024-06-24"), "euro27"],
    ["Düsseldorf", "Albania vs. Spain", pd.to_datetime("2024-06-24"), "euro28"],
    ["Berlin", "Netherlands vs. Austria", pd.to_datetime("2024-06-25"), "euro29"],
    ["Dortmund", "France vs. Poland", pd.to_datetime("2024-06-25"), "euro30"],
    ["Cologne", "England vs. Slovenia", pd.to_datetime("2024-06-25"), "euro31"],
    ["Munich", "Denmark vs. Serbia", pd.to_datetime("2024-06-25"), "euro32"],
    ["Hamburg", "Czechia vs. Türkiye", pd.to_datetime("2024-06-26"), "euro33"],
    ["Gelsenkirchen", "Georgia vs. Portugal", pd.to_datetime("2024-06-26"), "euro34"],
    ["Frankfurt", "Romania vs. Slovakia", pd.to_datetime("2024-06-26"), "euro35"],
    ["Stuttgart", "Ukraine vs. Belgium", pd.to_datetime("2024-06-26"), "euro36"],
    ["Berlin", "Berlin Round of 16", pd.to_datetime("2024-06-29"), "euro37"],
    ["Dortmund", "Dortmund Round of 16", pd.to_datetime("2024-06-29"), "euro38"],
    ["Gelsenkirchen", "Gelsenkirchen Round of 16", pd.to_datetime("2024-06-30"), "euro39"],
    ["Düsseldorf", "Düsseldorf Round of 16", pd.to_datetime("2024-07-01"), "euro40"],
    ["Frankfurt", "Frankfurt Round of 16", pd.to_datetime("2024-07-01"), "euro41"],
    ["Leipzig", "Leipzig Round of 16", pd.to_datetime("2024-07-02"), "euro42"],
    ["Hamburg", "Hamburg Quarter-Finals", pd.to_datetime("2024-07-05"), "euro43"],
    ["Stuttgart", "Stuttgart Quarter-Finals", pd.to_datetime("2024-07-05"), "euro44"],
    ["Berlin", "Berlin Quarter-Finals", pd.to_datetime("2024-07-06"), "euro45"],
    ["Düsseldorf", "Düsseldorf Quarter-Finals", pd.to_datetime("2024-07-06"), "euro46"],
    ["Munich", "Munich Semi-Finals", pd.to_datetime("2024-07-09"), "euro47"],
    ["Dortmund", "Dortmund Semi-Finals", pd.to_datetime("2024-07-10"), "euro48"],
    ["Berlin", "Berlin Final", pd.to_datetime("2024-07-14"), "euro49"],
    ["Lyon", "Eras Lyon Day 2.6.", pd.to_datetime("2024-06-02"), "eras01"],
    ["Lyon", "Eras Lyon Day 3.6.", pd.to_datetime("2024-06-03"), "eras02"],
    ["Edinburgh", "Eras Edinburgh 7.6.", pd.to_datetime("2024-06-07"), "eras03"],
    ["Edinburgh", "Eras Edinburgh 8.6.", pd.to_datetime("2024-06-08"), "eras04"],
    ["Edinburgh", "Eras Edinburgh 9.6.", pd.to_datetime("2024-06-09"), "eras05"],
    ["Liverpool", "Eras Liverpool 13.6.", pd.to_datetime("2024-06-13"), "eras06"],
    ["Liverpool", "Eras Liverpool 14.6.", pd.to_datetime("2024-06-14"), "eras07"],
    ["Liverpool", "Eras Liverpool 15.6.", pd.to_datetime("2024-06-15"), "eras08"],
    ["Cardiff", "Eras Cardiff 18.6.", pd.to_datetime("2024-06-18"), "eras09"],
    ["London", "Eras London 21.6.", pd.to_datetime("2024-06-21"), "eras10"],
    ["London", "Eras London 22.6.", pd.to_datetime("2024-06-22"), "eras11"],
    ["London", "Eras London 23.6.", pd.to_datetime("2024-06-23"), "eras12"],
    ["Dublin", "Eras Dublin 28.6.", pd.to_datetime("2024-06-28"), "eras13"],
    ["Dublin", "Eras Dublin 29.6.", pd.to_datetime("2024-06-29"), "eras14"],
    ["Dublin", "Eras Dublin 30.6.", pd.to_datetime("2024-06-30"), "eras15"],
    ["Amsterdam", "Eras Amsterdam 4.7.", pd.to_datetime("2024-07-04"), "eras16"],
    ["Amsterdam", "Eras Amsterdam 5.7.", pd.to_datetime("2024-07-05"), "eras17"],
    ["Amsterdam", "Eras Amsterdam 6.7.", pd.to_datetime("2024-07-06"), "eras18"],
    ["Zürich", "Eras Zürich 9.7.", pd.to_datetime("2024-07-09"), "eras19"],
    ["Zürich", "Eras Zürich 10.7.", pd.to_datetime("2024-07-10"), "eras20"],
    ["Milan", "Eras Milan 13.7.", pd.to_datetime("2024-07-13"), "eras21"],
    ["Milan", "Eras Milan 14.7.", pd.to_datetime("2024-07-14"), "eras22"],
    ["Gelsenkirchen", "Eras Gelsenkirchen 17.7.", pd.to_datetime("2024-07-17"), "eras23"],
    ["Gelsenkirchen", "Eras Gelsenkirchen 18.7.", pd.to_datetime("2024-07-18"), "eras24"],
    ["Gelsenkirchen", "Eras Gelsenkirchen 19.7.", pd.to_datetime("2024-07-19"), "eras25"],
    ["Hamburg", "Eras Hamburg 23.7.", pd.to_datetime("2024-07-23"), "eras26"],
    ["Hamburg", "Eras Hamburg 24.7.", pd.to_datetime("2024-07-24"), "eras27"],
    ["Munich", "Eras Munich 27.7.", pd.to_datetime("2024-07-27"), "eras28"],
    ["Munich", "Eras Munich 28.7.", pd.to_datetime("2024-07-28"), "eras29"],
    ["Warsaw", "Eras Warsaw 1.8.", pd.to_datetime("2024-08-01"), "eras30"],
    ["Warsaw", "Eras Warsaw 2.8.", pd.to_datetime("2024-08-02"), "eras31"],
    ["Warsaw", "Eras Warsaw 3.8.", pd.to_datetime("2024-08-03"), "eras32"],
    ["Vienna", "Eras Vienna 8.8.", pd.to_datetime("2024-08-08"), "eras33"],
    ["Vienna", "Eras Vienna 9.8.", pd.to_datetime("2024-08-09"), "eras34"],
    ["Vienna", "Eras Vienna 10.8.", pd.to_datetime("2024-08-10"), "eras35"],
    ["London", "Eras London 15.8.", pd.to_datetime("2024-08-15"), "eras36"],
    ["London", "Eras London 16.8.", pd.to_datetime("2024-08-16"), "eras37"],
    ["London", "Eras London 17.8.", pd.to_datetime("2024-08-17"), "eras38"],
    ["London", "Eras London 18.8.", pd.to_datetime("2024-08-18"), "eras39"],
    ["London", "Eras London 19.8.", pd.to_datetime("2024-08-19"), "eras40"]
]

In [7]:
# Code for exporting lists to excel; FOR FUTURE REFERENCE
df = pd.DataFrame(venues, columns=['Venue'])

# Export the DataFrame to an Excel file
df.to_excel('output.xlsx', index=False, engine='openpyxl')

In [10]:
# Internal Input: Read the internal file and extract data into a data frame for processing
file_path = '/Users/danielavandasova/Library/CloudStorage/OneDrive-Personal/Documents/3_IES/8-Semestr/Data Analysis in Python/Python_Project/internal-data.xlsx'
data_frame = read_excel_to_df(file_path)


        city                           venue                    event  \
0     Munich   Fußball Arena München, Munich     Germany vs. Scotland   
1     Berlin  Olympia Stadion Berlin, Berlin        Spain vs. Croatia   
2   Dortmund  BVB Stadion Dortmund, Dortmund        Italy vs. Albania   
3    Cologne        Cologne Stadium, Cologne  Hungary vs. Switzerland   
4    Hamburg       Volksparkstadion, Hamburg   Poland vs. Netherlands   
..       ...                             ...                      ...   
84    London         Wembley Stadium, London        Eras London 15.8.   
85    London         Wembley Stadium, London        Eras London 16.8.   
86    London         Wembley Stadium, London        Eras London 17.8.   
87    London         Wembley Stadium, London        Eras London 18.8.   
88    London         Wembley Stadium, London        Eras London 19.8.   

         date    code  
0  2024-06-14  euro01  
1  2024-06-15  euro02  
2  2024-06-15  euro03  
3  2024-06-15  euro04  
4  

In [None]:
# Codes of flight destinations; FOR ACOMMODATION SCRAPING   
thisdict = {
  "Berlin": "dest_id=-1746443",
  "Munich": "dest_id=-1829149",
  "Dortmund": "dest_id=-1761123",
    "Stuttgart": "dest_id=-1871728",
  "Gelsenkirchen": "dest_id=-1775230",
  "Frankfurt": "dest_id=-1771148",
    "Hamburg": "dest_id=-1785434",
  "Dusseldorf": "dest_id=-1762397",
  "Cologne": "dest_id=-1810561",
    "Leipzig": "dest_id=-1817680",
  "Paris": "dest_id=-1456928",
  "Stockoholm": "dest_id=-2524279",
    "Lisbon": "dest_id=-2167973",
  "Madrid": "dest_id=-390625",
  "Lyon": "dest_id=-1448468",
    "Edinburgh": "dest_id=-2595386",
  "Liverpool": "dest_id=-2601422",
  "Cardiff": "dest_id=-2591777"
    "London": "dest_id=-2601889",
  "Dublin": "dest_id=-1502554",
  "Amsterdam": "dest_id=-2140479",
    "Zurich": "dest_id=-2554935",
  "Milan": "dest_id=-121726",
  "Warsaw": "dest_id=-534433",
   "Vienna": "dest_id=-1995499"
}

## Flights Scraping

In [70]:
all_flights_data = []

driver = webdriver.Chrome()
to_location = ["BCN"]
departure_date = ["2024-10-17", "2024-10-18", "2024-10-19"]
return_date = ["2024-10-23"]


for location, return_date, departure_date in product(to_location, return_date, departure_date):
  
    url = f"https://www.kayak.ie/flights/PRG-{location}/{departure_date}/{return_date}?sort=bestflight_a"

    driver.get(url)
    sleep(15)
    
    #if an error occurs, try increasing the sleep parameter

    try:
        popwindow = driver.find_element("xpath", '//*[@id="portal-container"]/div/div[2]/div/div/div[1]/div/span[2]/button/div/div')
        popwindow.click()
    except Exception as e:
        None
    
    flight_rows = driver.find_elements("xpath", '//div[@class="nrc6-inner"]')
    print(flight_rows)

    for row in flight_rows:
        elementHTML = row.get_attribute('outerHTML')
        elementSoup = BeautifulSoup(elementHTML, 'html.parser')

        price = elementSoup.find("div", {"class":"f8F1-price-text"})
        airline = elementSoup.find("div", {"class":"J0g6-operator-text"})
        
        all_flights_data.append({
            'Location': location,
            'Departure Date': departure_date,
            'Return Date': return_date,
            'Price': price.text,
            'Airline': airline.text
        })
    
flight_data_df = pd.DataFrame(all_flights_data)

print(flight_data_df)

[<selenium.webdriver.remote.webelement.WebElement (session="3dd85eef21f51083de9e3c0d6c788827", element="f.96B57F698E31EA7A50CB209ADC92AD87.d.04FC26DA0407B3687491826E972E3051.e.144")>, <selenium.webdriver.remote.webelement.WebElement (session="3dd85eef21f51083de9e3c0d6c788827", element="f.96B57F698E31EA7A50CB209ADC92AD87.d.04FC26DA0407B3687491826E972E3051.e.145")>, <selenium.webdriver.remote.webelement.WebElement (session="3dd85eef21f51083de9e3c0d6c788827", element="f.96B57F698E31EA7A50CB209ADC92AD87.d.04FC26DA0407B3687491826E972E3051.e.146")>, <selenium.webdriver.remote.webelement.WebElement (session="3dd85eef21f51083de9e3c0d6c788827", element="f.96B57F698E31EA7A50CB209ADC92AD87.d.04FC26DA0407B3687491826E972E3051.e.147")>, <selenium.webdriver.remote.webelement.WebElement (session="3dd85eef21f51083de9e3c0d6c788827", element="f.96B57F698E31EA7A50CB209ADC92AD87.d.04FC26DA0407B3687491826E972E3051.e.148")>, <selenium.webdriver.remote.webelement.WebElement (session="3dd85eef21f51083de9e3c0d6

## Acommodation Scraping

In [61]:
pd.set_option('display.max_colwidth', None) 

booking_data = []
driver = webdriver.Chrome()
driver.maximize_window()

to_location_accomodation = ["a-p42002"]
departure_date_accomodation = ["2024-10-17", "2024-10-18", "2024-10-19"]
return_date_accomodation = ["2024-10-23"]

for location, return_date, departure_date in product(to_location_accomodation, return_date_accomodation, departure_date_accomodation):
    url = f"https://www.kayak.com/hotels/{location}/{departure_date}/{return_date}/2adults;map?sort=rank_a&fs=extendedrating=great"
    driver.get(url)
    sleep(10)

    actions = ActionChains(driver)

    for i in range(5):
        actions.send_keys(Keys.PAGE_DOWN).perform()

    sleep(5)

    hotel_rows = driver.find_elements("xpath", '//div[contains(@class, "resultInner")]')

    for row in hotel_rows: 
        elementHTML = row.get_attribute('outerHTML')
        elementSoup = BeautifulSoup(elementHTML, 'html.parser')
    
        location = elementSoup.find("div", {"class":"upS4 upS4-big-name"})
        location_text = location.text if location else "Location not found"

        name = elementSoup.find("div", {"class":"FLpo-hotel-name"})
        name_text = name.text if name else "Name not found"

        rating = elementSoup.find("div", {"class":"wdjx wdjx-positive wdjx-mod-rating-condensed"})
        rating_text = rating.text if rating else "Rating not found"
        rating_text_adjusted = rating_text[:3]
        rating_text_adjusted = rating_text_adjusted.replace(",", ".")


        price = elementSoup.find("div", {"class": "c1XBO"})
        price_text = price.text if price else "Price not found"

        booking_data.append({
            'Location': location_text,
            "Name" : name_text,
            "Rating" : rating_text_adjusted,
            "Price" : price_text,
            "Image" : image_url
        })

booking_data_df = pd.DataFrame(booking_data)

booking_data_df


Unnamed: 0,Location,Name,Rating,Price,Image
0,Sant Marti,Sallés Hotel Pere IV,8.1,"$1,591",
1,Ciutat Vella,DO Placa Reial powered by Sonder,8.4,"$2,051",
2,Eixample,Hotel America Barcelona,8.6,"$1,791",
3,Sarria-Sant Gervasi,limehome Barcelona Carrer de Fontcoberta,8.3,"$1,236",
4,Ciutat Vella,Gran Hotel Barcino,8.2,"$1,453",
...,...,...,...,...,...
79,Sant Marti,Hotel SB Glow,8.3,"$1,044",
80,Les Corts,Casual Colours Barcelona,8.1,$668,
81,Eixample,numa | Roca Rooms & Apartments,8.3,"$1,063",
82,Eixample,NH Collection Barcelona Gran Hotel Calderón,8.2,"$1,364",


In [65]:
booking_data_df['Price'] = booking_data_df['Price'].replace({r'\$': '', ',': ''}, regex=True)
booking_data_df['Price'] = pd.to_numeric(booking_data_df['Price'], errors='coerce')
booking_data_df['Price'].min()

668

[<selenium.webdriver.remote.webelement.WebElement (session="d6caedcfd010077de089cbeefcbd16e4", element="f.39EB2B53BB644643CF7C6BEA1269DB6F.d.A05F3C20829BBC4177096F6081F68F72.e.3174")>, <selenium.webdriver.remote.webelement.WebElement (session="d6caedcfd010077de089cbeefcbd16e4", element="f.39EB2B53BB644643CF7C6BEA1269DB6F.d.A05F3C20829BBC4177096F6081F68F72.e.3175")>, <selenium.webdriver.remote.webelement.WebElement (session="d6caedcfd010077de089cbeefcbd16e4", element="f.39EB2B53BB644643CF7C6BEA1269DB6F.d.A05F3C20829BBC4177096F6081F68F72.e.3176")>, <selenium.webdriver.remote.webelement.WebElement (session="d6caedcfd010077de089cbeefcbd16e4", element="f.39EB2B53BB644643CF7C6BEA1269DB6F.d.A05F3C20829BBC4177096F6081F68F72.e.3177")>, <selenium.webdriver.remote.webelement.WebElement (session="d6caedcfd010077de089cbeefcbd16e4", element="f.39EB2B53BB644643CF7C6BEA1269DB6F.d.A05F3C20829BBC4177096F6081F68F72.e.3178")>, <selenium.webdriver.remote.webelement.WebElement (session="d6caedcfd010077de089

In [54]:
hotel_rows

[]

In [None]:
for image_element in image_elements:
    image_url = image_element.get_attribute('src')

    booking_data.append({
    "Image" : image_url
        })

In [None]:
https://www.booking.com/searchresults.cs.html?ss=&efdco=1&aid=2311236&lang=cs&sb=1&src_elem=sb&src=index&dest_id=-372490&dest_type=city&ac_position=0&ac_click_type=b&ac_langcode=en&ac_suggestion_list_length=100&search_selected=true&checkin=2024-05-20&checkout=2024-05-26&group_adults=2&no_rooms=1&group_children=0

## User Input

In [None]:
file_path = '/Users/danielavandasova/Library/CloudStorage/OneDrive-Personal/Documents/3_IES/8-Semestr/Data Analysis in Python/Python_Project/input.txt' #path to input file
primary_code = read_code(file_path, event_dates)