# API City ranking historical from numbeo.com

In [17]:
# import libraries
import requests
import os
import pandas as pd
import sql_functions as sf
import psycopg2

In [18]:
#API download from www.numbeo.com 
base_url = 'https://www.numbeo.com/api//rankings_by_city_historical'
api_key = 'zlueew978sczoi' # extract the value for the api key
section = 1
url = base_url + '?' + 'api_key=' + api_key + '&section=1'
response = requests.get(url)
city_hist_livcost = response.json()


In [19]:
#Output JSON
city_hist_livcost

{'2020-mid': [{'country': 'Switzerland',
   'city_name': 'Zurich',
   'cpi_and_rent_index': 98.80333355150216,
   'rent_index': 64.36890115529944,
   'purchasing_power_incl_rent_index': 121.11735429720454,
   'restaurant_price_index': 120.38647500609562,
   'groceries_index': 131.22978274257514,
   'city_id': 6379,
   'cpi_index': 131.4883943334502},
  {'country': 'Switzerland',
   'city_name': 'Lugano',
   'cpi_and_rent_index': 86.65717230521128,
   'rent_index': 40.204932141752856,
   'purchasing_power_incl_rent_index': 101.15475315934373,
   'restaurant_price_index': 115.18645996739482,
   'groceries_index': 134.799149220362,
   'city_id': 6362,
   'cpi_index': 130.74950046473447},
  {'country': 'Switzerland',
   'city_name': 'Basel',
   'cpi_and_rent_index': 89.72312954761954,
   'rent_index': 46.60616873381468,
   'purchasing_power_incl_rent_index': 109.89984198471193,
   'restaurant_price_index': 131.92276406170222,
   'groceries_index': 126.04580461792969,
   'city_id': 6348,
  

In [20]:
# Normalize the JSON data for one year

df_2012 = pd.json_normalize(city_hist_livcost, 
                            record_path=["2012"], 
                            meta=["country", "city_name"], 
                            errors='ignore', 
                            meta_prefix="meta_")
print(df_2012)

         country           city_name  cpi_and_rent_index  rent_index  \
0         Norway           Trondheim          142.213776   59.164313   
1         Norway           Stavanger          137.762590   78.080955   
2    Switzerland              Zurich          124.674877   74.583585   
3         Norway                Oslo          117.969883   57.400172   
4    Switzerland              Geneva          122.060417   79.054238   
..           ...                 ...                 ...         ...   
252        India                Pune           23.780926    9.335651   
253        India             Chennai           23.205405    9.186201   
254        India               Kochi           21.785921    5.282557   
255        India  Thiruvananthapuram           21.227610    5.089061   
256        India            Ludhiana           22.168664    7.885916   

     purchasing_power_incl_rent_index  restaurant_price_index  \
0                           67.007954              160.230357   
1    

In [21]:
# Normalize the JSON data for 2012 until 2022
start_year = 2012
end_year = 2022

all_years_dfs = []  # List to store the dataframes

for year in range(start_year, end_year + 1):
    df = pd.json_normalize(city_hist_livcost[str(year)])
    df['year'] = year  # Add the 'year' column
    df = df[["year", "country", "city_name", "restaurant_price_index", "groceries_index"]]  # Select only the desired columns
    all_years_dfs.append(df)

# Concatenate all the dataframes into a single dataframe
city_rank_df = pd.concat(all_years_dfs, ignore_index=True)

# Print the combined dataframe
print(city_rank_df)

      year      country   city_name  restaurant_price_index  groceries_index
0     2012       Norway   Trondheim              160.230357       193.936492
1     2012       Norway   Stavanger              201.163181       147.735435
2     2012  Switzerland      Zurich              138.786508       143.698375
3     2012       Norway        Oslo              155.750221       139.217948
4     2012  Switzerland      Geneva              135.252613       138.950065
...    ...          ...         ...                     ...              ...
5088  2022        India      Kanpur               13.314035        22.192483
5089  2022     Pakistan     Karachi               15.207355        18.484952
5090  2022     Pakistan  Rawalpindi               16.179185        18.512800
5091  2022     Pakistan      Multan               11.802370        18.370703
5092  2022     Pakistan    Peshawar               14.386384        16.621125

[5093 rows x 5 columns]


In [22]:
#Info about df
city_rank_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5093 entries, 0 to 5092
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   year                    5093 non-null   int64  
 1   country                 5093 non-null   object 
 2   city_name               5093 non-null   object 
 3   restaurant_price_index  5093 non-null   float64
 4   groceries_index         5093 non-null   float64
dtypes: float64(2), int64(1), object(2)
memory usage: 199.1+ KB


## Push the DateFrame df_clean to sql

In [23]:
#Define schema for DBeaver
schema = 'capstone_travel_index'
engine = sf.get_engine()

In [24]:
#Import dataset to DBeaver
table_name = 'city_rank_df'
if engine!=None:
    try:
        city_rank_df.to_sql(name=table_name, # Name of SQL table
                        con=engine, # Engine or connection
                        if_exists='replace', # Drop the table before inserting new values 
                        schema=schema, # Use schmea that was defined earlier
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

The city_rank_df table was imported successfully.


In [None]:
#The city_rank_df table was imported successfully.