# API Country prices from numbeo.com

In [4]:
# import libraries
import requests
import os
import pandas as pd
import json
import sql_functions as sf

In [5]:
# To be able to pull the API key from the .env, we need to import load_dotenv

from dotenv import load_dotenv
load_dotenv()

True

In [6]:
#Get data for the 7 items per city, example for London

url = 'https://www.numbeo.com/api/city_prices'
api_key = 'zlueew978sczoi'
query = "London, United Kingdom"

# Send the HTTP GET request
response = requests.get(url, params={"api_key": api_key, "query": query})

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Access the "prices" list from the response
    prices = data.get("prices", [])
    
    # Initialize a list to store the desired items
    desired_items = []
    
    # Iterate over the prices and check if the item_id matches the desired values
    for price in prices:
        item_id = price.get("item_id")
        if item_id in [1, 3, 4, 7, 13, 18, 78, 108]:
            desired_items.append(price)
    
    # Print the desired items
    for item in desired_items:
        print("Item ID:", item.get("item_id"))
        print("Item Name:", item.get("item_name"))
        print("Lowest Price:", item.get("lowest_price"))
        print("Average Price:", item.get("average_price"))
        print("Highest Price:", item.get("highest_price"))
        print()
else:
    print("Request failed with status code:", response.status_code)

Item ID: 1
Item Name: Meal, Inexpensive Restaurant, Restaurants
Lowest Price: 10
Average Price: 19.404525
Highest Price: 30

Item ID: 3
Item Name: McMeal at McDonalds (or Equivalent Combo Meal), Restaurants
Lowest Price: 6.5
Average Price: 8
Highest Price: 10

Item ID: 4
Item Name: Domestic Beer (0.5 liter draught), Restaurants
Lowest Price: 4
Average Price: 6
Highest Price: 7.5

Item ID: 7
Item Name: Water (0.33 liter bottle) , Restaurants
Lowest Price: 1
Average Price: 1.4808450279326137
Highest Price: 3

Item ID: 13
Item Name: Water (1.5 liter bottle), Markets
Lowest Price: 0.6
Average Price: 1.1190384615384614
Highest Price: 2

Item ID: 18
Item Name: One-way Ticket (Local Transport), Transportation
Lowest Price: 1.7
Average Price: 2.8
Highest Price: 5

Item ID: 108
Item Name: Taxi 1km (Normal Tariff), Transportation
Lowest Price: 1.1
Average Price: 1.797591068161934
Highest Price: 3



In [7]:
#import pycountry library to create a list for all countries according to ISO 3166 
import pycountry

# Get all ISO 3166 countries
countries = list(pycountry.countries)

# Create a list of country names
country_list = [(country.name) for country in countries]


In [8]:
# #API download from www.numbeo.com 
url = 'https://www.numbeo.com/api/city_prices'
api_key = 'zlueew978sczoi'
#country_list ' don`t need, we take the values for countries from country_list in cell above 
desired_item_ids = [1, 2, 3, 4, 7, 13, 18, 78, 108]
currency = "EUR"

data_list = []

for country in country_list:
    # Send the HTTP GET request for each country with currency parameter
    response = requests.get(url, params={"api_key": api_key, "query": country, "currency": currency})
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Access the "prices" list from the response
        prices = data.get("prices", [])
        
        # Access the country's land information
        land = country
        
        # Initialize a list to store the desired items for the country
        desired_items = []
        
        # Iterate over the prices and check if the item_id matches the desired values
        for price in prices:
            item_id = price.get("item_id")
            if item_id in desired_item_ids:
                desired_items.append(price)
        
        # Add the desired items, land information, and currency to the data list
        for item in desired_items:
            item["land"] = land
            item["currency"] = currency
        data_list.extend(desired_items)
    else:
        print(f"Request for {country} failed with status code:", response.status_code)

# Create a DataFrame from the data list
df = pd.DataFrame(data_list)

# Print the DataFrame
df

Unnamed: 0,data_points,item_id,lowest_price,average_price,highest_price,item_name,land,currency
0,0,1,17.454597,18.373260,22.966575,"Meal, Inexpensive Restaurant, Restaurants",Aruba,EUR
1,0,2,61.244200,64.306410,101.052930,"Meal for 2 People, Mid-range Restaurant, Three...",Aruba,EUR
2,0,3,9.186630,9.186630,9.186630,McMeal at McDonalds (or Equivalent Combo Meal)...,Aruba,EUR
3,0,4,2.755989,4.746425,6.430641,"Domestic Beer (0.5 liter draught), Restaurants",Aruba,EUR
4,0,7,0.918663,1.258909,1.837326,"Water (0.33 liter bottle) , Restaurants",Aruba,EUR
...,...,...,...,...,...,...,...,...
848,63,4,7.503169,11.754965,15.006338,"Domestic Beer (0.5 liter draught), Restaurants",Zimbabwe,EUR
849,81,7,0.250106,0.579207,1.384227,"Water (0.33 liter bottle) , Restaurants",Zimbabwe,EUR
850,101,13,0.375158,0.541896,1.000423,"Water (1.5 liter bottle), Markets",Zimbabwe,EUR
851,51,18,1.125475,1.500634,2.501056,"One-way Ticket (Local Transport), Transportation",Zimbabwe,EUR


In [None]:
#Output is DataFrame with 742 rows × 8 columns

In [9]:
#Drop column data_points
df = df.drop("data_points", axis=1)

In [None]:
#Drop columns lowest_price and highest price, rename df in df_clean
df_clean = df.drop(["lowest_price","highest_price"], axis=1)

In [None]:
#Output is df_clean DataFrame with 742 rows and 5 columns

In [None]:
#Read the csv file worldcities from data folder for add column with iso3 code to df_clean
df_iso = pd.read_csv('data/worldcities.csv')
iso3_map = dict(zip(df_iso['country'], df_iso['iso3']))

In [None]:
#Insert the values in df_clean column iso3
df_clean['iso3'] = df_clean['land'].map(iso3_map)

In [None]:
#Dataframe df_clean with 742 rows and 6 columns
df_clean

In [None]:
#Rename column average_price to price_avg_eur
df_clean.rename(columns ={"average_price" : "price_avg_eur"}, inplace=True)

In [None]:
#Drop columns land and currency
df_clean.drop(['land', 'currency'], axis = 1, inplace=True)

In [None]:
#Output is Dataframe with 742 rows and 4 columns
df_clean.head()

In [None]:
#Info about df_clean
df_clean.info()

## Push the DataFrame df_clean to sql

In [None]:
#Define schema for DBeaver
schema = 'capstone_travel_index'
engine = sf.get_engine()

In [None]:
#Import dataset as table to DBeaver, rename df_clean to country_avg_price
table_name = 'country_avg_price'
if engine!=None:
    try:
        df_clean.to_sql(name=table_name, # Name of SQL table
                        con=engine, # Engine or connection
                        if_exists='replace', # Drop the table before inserting new values 
                        schema=schema, # Use schmea that was defined earlier
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

In [None]:
#The country_avg_price table was imported successfully.