# API City prices for 1 bedroom apartment in city centre from numbeo.com

In [1]:
# import libraries
import requests
import os
import pandas as pd
import json
import sql_functions as sf

In [12]:
# To be able to pull the API key from the .env, we need to import load_dotenv

from dotenv import load_dotenv
load_dotenv()

True

Info from API numbeo.com

yearLastUpdate': 2023

'item_id': 26, 'item_name': 'Apartment (1 bedroom) in City Centre, Rent Per Month'


In [14]:
#Load city list from data/cities_numbeo excel file
city_list = pd.read_excel('data/Cities_numbeo_2.xlsx')


In [93]:
city_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 554 entries, 0 to 553
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   city     554 non-null    object
 1   country  554 non-null    object
dtypes: object(2)
memory usage: 8.8+ KB


In [68]:
city_list.to_csv('data/Cities_numbeo_new.csv')

In [95]:
# Make a city list for API query
city = list(city_list['city'])

In [97]:
# Make a country list for API query
country = list(city_list['country'])

In [73]:
#Create a dictionary city, country for API query
city_count_dict = dict(zip(city, country))

In [None]:
# #API download from www.numbeo.com 
url = 'https://www.numbeo.com/api/city_prices'
api_key = os.getenv('numbeo_api_key') # extract the value for the api key
#country_list ' don`t need, we take the values for countries from country_list in cell above 
desired_item_ids = [26]
currency = "EUR"

data_list = []

for city in city:
    # Send the HTTP GET request for each country with currency parameter
    response = requests.get(url, params={"api_key": api_key, "query": city, "currency": currency})
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Access the "prices" list from the response
        prices = data.get("prices", [])
        
        # Access the country's land information
        city = city
        
        # Initialize a list to store the desired items for the country
        desired_items = []
        
        # Iterate over the prices and check if the item_id matches the desired values
        for price in prices:
            item_id = price.get("item_id")
            if item_id in desired_item_ids:
                desired_items.append(price)
        
        # Add the desired items, land information, and currency to the data list
        for item in desired_items:
            item["city"] = city
            item["currency"] = currency
        data_list.extend(desired_items)
    else:
        print(f"Request for {city} failed with status code:", response.status_code)

# Create a DataFrame from the data list
df = pd.DataFrame(data_list)

# Print the DataFrame
df

In [78]:
df.head()

Unnamed: 0,data_points,item_id,lowest_price,average_price,highest_price,item_name,city,currency
0,16,26,450.0,563.909091,850.0,"Apartment (1 bedroom) in City Centre, Rent Per...",Aachen,EUR
1,10,26,536.738442,726.51382,858.781507,"Apartment (1 bedroom) in City Centre, Rent Per...",Aalborg,EUR
2,24,26,554.521899,644.923561,817.190166,"Apartment (1 bedroom) in City Centre, Rent Per...",Aberdeen,EUR
3,14,26,228.673539,491.224639,914.694156,"Apartment (1 bedroom) in City Centre, Rent Per...",Abidjan,EUR
4,13,26,79.741209,641.334624,1500.0,"Apartment (1 bedroom) in City Centre, Rent Per...",Accra,EUR


### We get the data for 525 cities for 1-bedroom apartment in city center month rent in EUR from numbeo.com
### Output is DataFrame with 525 rows × 8 columns

In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   data_points    525 non-null    int64  
 1   item_id        525 non-null    int64  
 2   lowest_price   520 non-null    float64
 3   average_price  525 non-null    float64
 4   highest_price  520 non-null    float64
 5   item_name      525 non-null    object 
 6   city           525 non-null    object 
 7   currency       525 non-null    object 
dtypes: float64(3), int64(2), object(3)
memory usage: 32.9+ KB


In [80]:
#Drop column, that we don`t need: data_points, item_id, lowest_price, highest_price, currency. And storage it in df_new
df_new = df.drop(["data_points", 'item_id', 'lowest_price', 'highest_price', 'currency'], axis=1)

In [83]:
#Add the column country
df_new['country'] = df_new['city'].map(city_count_dict)

In [89]:
# Check the data in df_new
df_new.head()

Unnamed: 0,average_price,item_name,city,country
0,563.909091,"Apartment (1 bedroom) in City Centre, Rent Per...",Aachen,Germany
1,726.51382,"Apartment (1 bedroom) in City Centre, Rent Per...",Aalborg,Denmark
2,644.923561,"Apartment (1 bedroom) in City Centre, Rent Per...",Aberdeen,United Kingdom
3,491.224639,"Apartment (1 bedroom) in City Centre, Rent Per...",Abidjan,Ivory Coast
4,641.334624,"Apartment (1 bedroom) in City Centre, Rent Per...",Accra,Ghana


In [103]:
#Check the data for United States. 
df_new[df_new['country'] == 'United States'].head()

Unnamed: 0,average_price,item_name,city,country
14,1357.033298,"Apartment (1 bedroom) in City Centre, Rent Per...",Albuquerque,United States
27,1743.72759,"Apartment (1 bedroom) in City Centre, Rent Per...",Atlanta,United States
30,2096.497436,"Apartment (1 bedroom) in City Centre, Rent Per...",Austin,United States
60,1451.9901,"Apartment (1 bedroom) in City Centre, Rent Per...",Boise,United States
64,2511.693686,"Apartment (1 bedroom) in City Centre, Rent Per...",Boston,United States


In [106]:
#Now we want to have a daily price for apartment, we divided month price through 30 days. 
df_new['price_per_day'] = df_new['average_price'] / 30

In [109]:
#Check the data in df_new
df_new.head()

Unnamed: 0,average_price,item_name,city,country,price_per_day
0,563.909091,"Apartment (1 bedroom) in City Centre, Rent Per...",Aachen,Germany,18.79697
1,726.51382,"Apartment (1 bedroom) in City Centre, Rent Per...",Aalborg,Denmark,24.217127
2,644.923561,"Apartment (1 bedroom) in City Centre, Rent Per...",Aberdeen,United Kingdom,21.497452
3,491.224639,"Apartment (1 bedroom) in City Centre, Rent Per...",Abidjan,Ivory Coast,16.374155
4,641.334624,"Apartment (1 bedroom) in City Centre, Rent Per...",Accra,Ghana,21.377821


In [110]:
df_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   average_price  525 non-null    float64
 1   item_name      525 non-null    object 
 2   city           525 non-null    object 
 3   country        525 non-null    object 
 4   price_per_day  525 non-null    float64
dtypes: float64(2), object(3)
memory usage: 20.6+ KB


## Push the DataFrame df_new to sql

In [111]:
#Define schema for DBeaver
schema = 'capstone_travel_index'
engine = sf.get_engine()

In [112]:
#Import dataset as table to DBeaver, rename df_clean to country_avg_price
table_name = 'city_apart_prices'
if engine!=None:
    try:
        df_new.to_sql(name=table_name, # Name of SQL table
                        con=engine, # Engine or connection
                        if_exists='replace', # Drop the table before inserting new values 
                        schema=schema, # Use schmea that was defined earlier
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

The city_apart_prices table was imported successfully.


In [None]:
#The city_apart_prices table was imported successfully.