# VacationPy
---

## Starter Code to Import Libraries and Load the Weather and Coordinates Data

In [37]:
# API
import requests
import json

# Data
import pandas as pd
import numpy as np

# Data Viz
import matplotlib.pyplot as plt
import seaborn as sns
import hvplot.pandas # maps

# Import API key
from api_keys_sub import geoapify_key

#to turn off the warning messages
import warnings
warnings.filterwarnings("ignore")

In [38]:
# Load the CSV file created in Part 1 into a Pandas DataFrame
city_data_df = pd.read_csv("output_data/cities_data1.csv")

# Display sample data
city_data_df.head()

Unnamed: 0,City_ID,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,0,igarka,67.4667,86.5833,32.02,92.0,100.0,13.0,RU,1717330000.0
1,1,puerto natales,-51.7236,-72.4875,27.82,90.0,21.0,3.87,CL,1717330000.0
2,2,waitangi,-43.9535,-176.5597,49.82,81.0,82.0,11.99,NZ,1717330000.0
3,3,puerto ayora,-0.7393,-90.3518,68.56,97.0,91.0,3.51,EC,1717330000.0
4,4,saint-pierre,-21.3393,55.4781,78.48,65.0,0.0,18.41,RE,1717330000.0


---

### Step 1: Create a map that displays a point for every city in the `city_data_df` DataFrame. The size of the point should be the humidity in each city.

In [39]:
#checking for null values in humidity
city_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   City_ID     586 non-null    int64  
 1   City        586 non-null    object 
 2   Lat         557 non-null    float64
 3   Lng         557 non-null    float64
 4   Max Temp    557 non-null    float64
 5   Humidity    557 non-null    float64
 6   Cloudiness  557 non-null    float64
 7   Wind Speed  557 non-null    float64
 8   Country     552 non-null    object 
 9   Date        557 non-null    float64
dtypes: float64(7), int64(1), object(2)
memory usage: 45.9+ KB


In [40]:
# taking the nulls out of humidity so my code works
city_data_df.loc[pd.isnull(city_data_df.Humidity)]

Unnamed: 0,City_ID,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
33,33,kulia village,,,,,,,,
48,48,ampasimanolotra,,,,,,,,
52,52,mahabe,,,,,,,,
64,64,alotau,,,,,,,,
92,92,taiohae,,,,,,,,
159,159,little cayman,,,,,,,,
174,174,cable beach,,,,,,,,
178,178,koneurgench,,,,,,,,
180,180,puerto san carlos,,,,,,,,
205,205,qapqal,,,,,,,,


In [41]:
city_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586 entries, 0 to 585
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   City_ID     586 non-null    int64  
 1   City        586 non-null    object 
 2   Lat         557 non-null    float64
 3   Lng         557 non-null    float64
 4   Max Temp    557 non-null    float64
 5   Humidity    557 non-null    float64
 6   Cloudiness  557 non-null    float64
 7   Wind Speed  557 non-null    float64
 8   Country     552 non-null    object 
 9   Date        557 non-null    float64
dtypes: float64(7), int64(1), object(2)
memory usage: 45.9+ KB


In [42]:
#dropping the nulls now because the map plot is not working 
clean_city = city_data_df.dropna(how="any").reset_index()
clean_city.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 552 entries, 0 to 551
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   index       552 non-null    int64  
 1   City_ID     552 non-null    int64  
 2   City        552 non-null    object 
 3   Lat         552 non-null    float64
 4   Lng         552 non-null    float64
 5   Max Temp    552 non-null    float64
 6   Humidity    552 non-null    float64
 7   Cloudiness  552 non-null    float64
 8   Wind Speed  552 non-null    float64
 9   Country     552 non-null    object 
 10  Date        552 non-null    float64
dtypes: float64(7), int64(2), object(2)
memory usage: 47.6+ KB


In [43]:
#keep getting a value error trying to make sure that all the humididty data is positive before making the map plot
#expert helped with this 
# Filter out non-positive values from the "Humidity" column
city_data_df = city_data_df[city_data_df["Humidity"] > 0]

In [44]:
%%capture --no-display

# Configure the map plot
city_data_plot = city_data_df.hvplot.points(
    "Lng",
    "Lat",
    geo = True,
    alpha = 0.5,
    tiles = "OSM",
    frame_width = 700,
    frame_height = 500,
    size = "Humidity",
    scale = 0.8,
    color = "City"
)
# Display the map
city_data_plot

### Step 2: Narrow down the `city_data_df` DataFrame to find your ideal weather condition

In [51]:
# Narrow down cities that fit criteria and drop any results with null values
narrowed_city_df = city_data_df.loc[
    (city_data_df["Max Temp"] < 90) & (city_data_df["Max Temp"] > 60) \
    & (city_data_df["Wind Speed"] < 15) \
    & (city_data_df["Cloudiness"] == 0)
]

# Drop any rows with null values
# I already dropped the null values but ill put the code just incase
#narrowed_city_df = narrowed_city_df.dropna()

# Display sample data
narrowed_city_df

Unnamed: 0,City_ID,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
19,19,galliano,29.4422,-90.2992,77.0,88.0,0.0,0.0,US,1717330000.0
59,59,red bluff,40.1785,-122.2358,66.33,32.0,0.0,4.61,US,1717330000.0
114,114,tolanaro,-25.0319,46.9987,78.87,73.0,0.0,11.5,MG,1717330000.0
148,148,kill devil hills,36.0307,-75.676,71.98,76.0,0.0,4.61,US,1717330000.0
150,150,hotan,37.0997,79.9269,83.48,14.0,0.0,5.88,CN,1717330000.0
151,151,greenville,35.6127,-77.3663,67.95,73.0,0.0,4.61,US,1717330000.0
158,158,haiku-pauwela,20.9219,-156.3051,74.05,86.0,0.0,14.0,US,1717330000.0
165,165,quartu sant'elena,39.2419,9.1839,76.06,55.0,0.0,8.05,IT,1717330000.0
173,173,castelli,-25.9468,-60.6195,68.68,54.0,0.0,9.6,AR,1717330000.0
210,210,tsiombe,-25.3,45.4833,79.88,45.0,0.0,11.1,MG,1717330000.0


### Step 3: Create a new DataFrame called `hotel_df`.

In [52]:
# Use the Pandas copy function to create DataFrame called hotel_df to store the city, country, coordinates, and humidity
hotel_df = narrowed_city_df[["City", "Country", "Lat", "Lng", "Humidity"]].copy()

# Add an empty column, "Hotel Name," to the DataFrame so you can store the hotel found using the Geoapify API
hotel_df["Hotel Name"] = ""

# Display sample data
hotel_df


Unnamed: 0,City,Country,Lat,Lng,Humidity,Hotel Name
19,galliano,US,29.4422,-90.2992,88.0,
59,red bluff,US,40.1785,-122.2358,32.0,
114,tolanaro,MG,-25.0319,46.9987,73.0,
148,kill devil hills,US,36.0307,-75.676,76.0,
150,hotan,CN,37.0997,79.9269,14.0,
151,greenville,US,35.6127,-77.3663,73.0,
158,haiku-pauwela,US,20.9219,-156.3051,86.0,
165,quartu sant'elena,IT,39.2419,9.1839,55.0,
173,castelli,AR,-25.9468,-60.6195,54.0,
210,tsiombe,MG,-25.3,45.4833,45.0,


### Step 4: For each city, use the Geoapify API to find the first hotel located within 10,000 metres of your coordinates.

In [59]:
rows = []

for index, row in narrowed_city_df.iterrows():
    # this is what i am extracting from the dataframe
    longitude = row.Lng
    latitude = row.Lat
    humidity = row.Humidity
    og_city = row.City

    # STEP 1:
    # Build URL using the places endpoint
    base_url = "https://api.geoapify.com/v2/places"

    # Set the parameters for the type of place
    categories = "accommodation.hotel"
    # conditions = "vegetarian" # no condition needed
    radius = 10000

    # Set the parameters for the type of search
    filters = f"circle:{longitude},{latitude},{radius}"
    bias = f"proximity:{longitude},{latitude}"
    limit = 20

    # set up a parameters dictionary
    params = {
    "categories":categories,
    # "conditions":conditions,
    "limit":limit,
    "filter":filters,
    "bias":bias,
    "apiKey":geoapify_key    
    }

    # Step 2:
    response = requests.get(base_url, params=params)

    # Step 3: 
    status_code = response.status_code

    # Step 4: 
    if status_code == 200:
        data = response.json()
    else:
        data = {} # failed request

    # Step 5: extraction
    results = data.get("features", [])

    # safe extraction for first item in list
    if len(results) > 0:
        result = results[0]
    else:
        result = {}

    # extract safely the data that we care about from first result
    properties = result.get("properties", {})

    state = properties.get("state")
    country = properties.get("country")
    city = properties.get("city")
    address = properties.get("formatted")
    distance = properties.get("distance") # just for places
    name = properties.get("name")

    # row for potential dataframe later on
    row = {
        "original_city": og_city,
        "longitude": longitude,
        "latitude": latitude,
        "humidity": humidity,
        "hotel": name,
        "address": address,
        "city": city,
        "state": state,
        "country": country,
        "distance": distance
    }

    print(og_city)

    rows.append(row) 

galliano
red bluff
tolanaro
kill devil hills
hotan
greenville
haiku-pauwela
quartu sant'elena
castelli
tsiombe
manhattan
rundu
tralee
lumberton
novo horizonte
laguna
toliara
urumqi
nema
zouerate
palmer
arkalyk
thulamahashi
watertown
longboat key
lata
mazatlan
nantucket
harsin
urangan
poyarkovo
hwange
ceres
iscar
imbituba
yeppoon


In [60]:
hotel_df = pd.DataFrame(rows)
hotel_df

Unnamed: 0,original_city,longitude,latitude,humidity,hotel,address,city,state,country,distance
0,galliano,-90.2992,29.4422,88.0,Fairfield Inn & Suites by Marriott Cut Off-Gal...,Fairfield Inn & Suites by Marriott Cut Off-Gal...,,Louisiana,United States,3522.0
1,red bluff,-122.2358,40.1785,32.0,Travelodge by Wyndham Red Bluff,"Travelodge by Wyndham Red Bluff, 38 Antelope B...",Red Bluff,California,United States,487.0
2,tolanaro,46.9987,-25.0319,73.0,Hôtel Mahavokey,"Hôtel Mahavokey, Rue Maréchal Joffre, Bazaribe...",Tolanaro,Anosy,Madagascar,395.0
3,kill devil hills,-75.676,36.0307,76.0,Mariner Days Inn & Suites,"Mariner Days Inn & Suites, North Virginia Dare...",Kill Devil Hills,North Carolina,United States,670.0
4,hotan,79.9269,37.0997,14.0,Hua Yi International Hotel (accepts foreigners),Hua Yi International Hotel (accepts foreigners...,Hotan City,Xinjiang,China,166.0
5,greenville,-77.3663,35.6127,73.0,Quality Inn,"Quality Inn, South Memorial Drive, Greenville,...",Greenville,North Carolina,United States,2731.0
6,haiku-pauwela,-156.3051,20.9219,86.0,Inn At Mama's Fish House,"Inn At Mama's Fish House, 799 Poho Place, Paia...",Paia,Hawaii,United States,6460.0
7,quartu sant'elena,9.1839,39.2419,55.0,Hotel Italia,"Hotel Italia, Via Alfredo Panzini, 09045 Quart...",Quartu Sant'Aleni/Quartu Sant'Elena,Sardinia,Italy,914.0
8,castelli,-60.6195,-25.9468,54.0,Hotel Clarice,"Hotel Clarice, Mariano Moreno, Juan Jose Caste...",Municipio de Juan José Castelli,Chaco,Argentina,254.0
9,tsiombe,45.4833,-25.3,45.0,,,,,,


In [67]:
#dropping the null so my map works
hotel_df = hotel_df.dropna(how = "any").reset_index(drop=True)
hotel_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   original_city  24 non-null     object 
 1   longitude      24 non-null     float64
 2   latitude       24 non-null     float64
 3   humidity       24 non-null     float64
 4   hotel          24 non-null     object 
 5   address        24 non-null     object 
 6   city           24 non-null     object 
 7   state          24 non-null     object 
 8   country        24 non-null     object 
 9   distance       24 non-null     float64
dtypes: float64(4), object(6)
memory usage: 2.0+ KB


### Step 5: Add the hotel name and the country as additional information in the hover message for each city in the map.

In [68]:
%%capture --no-display

# Configure the map plot
hotel_data_plot = hotel_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    alpha = 0.5,
    tiles = "OSM",
    frame_width = 700,
    frame_height = 500,
    size = "humidity",
    color = "city",
    hover_cols = ["address"]
)
# Display the map
hotel_data_plot