In [1]:
# Notebook Overview:

# the goal here was to check the API operation and test how the data transformation and sub selection was made.

In [18]:
!pip install -r requirements_jupyter.txt



In [8]:
import os
import requests
import pandas as pd
#from dotenv import load_dotenv
from dotenv import dotenv_values
from sqlalchemy import create_engine, inspect


CONFIG = dotenv_values('.env')
if not CONFIG:
    CONFIG = os.environ

connection_uri = "postgresql+psycopg2://{}:{}@{}:{}".format(
    CONFIG["POSTGRES_USER"],
    CONFIG["POSTGRES_PASSWORD"],
    CONFIG['POSTGRES_HOST'],
    CONFIG["POSTGRES_PORT"],
)

params = {
    'access_key':CONFIG['API_ACCESS_KEY'],
    'limit':100
    #,
    #'flight_status':'active' # Sometimes the api has no flights active
}
url = 'http://api.aviationstack.com/v1/flights'

In [9]:
engine = create_engine(connection_uri, pool_pre_ping=True)
engine.connect()

<sqlalchemy.engine.base.Connection at 0xffff94108cd0>

In [10]:
try:
    # Send an HTTP GET request with a timeout
    api_result = requests.get(url, params=params, timeout=100)

    # Check if the request was successful (HTTP status code 200)
    if api_result.status_code == 200:
        # Parse the JSON response
        api_response = api_result.json()
        # Now you can work with the `api_response` data
    else:
        # Handle HTTP error responses
        print(f"HTTP Error {api_result.status_code}: {api_result.text}")

except requests.exceptions.RequestException as e:
    # Handle network-related errors
    print(f"Request Exception: {e}")

except ValueError as ve:
    # Handle JSON parsing errors
    print(f"JSON Parsing Error: {ve}")

In [11]:
# Flatten the JSON data into a DataFrame
df = pd.json_normalize(api_response['data'])

# Select and rename specific columns
df = df[['flight_date', 'flight_status', 'departure.airport', 'departure.timezone','arrival.airport','arrival.timezone','arrival.terminal','airline.name', 'flight.number']]
df.columns = ['flight_date', 'flight_status', 'departure_airport', 'departure_timezone','arrival_airport','arrival_timezone','arrival_terminal','airline_name', 'flight_number']

# Replace '/' with '-' in specific columns
df['departure_timezone'] = df['departure_timezone'].str.replace('/', ' - ')
df['arrival_timezone'] = df['arrival_timezone'].str.replace('/', ' - ')

display(df)

Unnamed: 0,flight_date,flight_status,departure_airport,departure_timezone,arrival_airport,arrival_timezone,arrival_terminal,airline_name,flight_number
0,2023-10-05,landed,Port Lincoln,Australia - Adelaide,Adelaide International Airport,Australia - Adelaide,4,Thai AirAsia,540


In [11]:
memory_usage = df.memory_usage(deep=True).sum()

# Print the memory usage in bytes
print(f"Memory Usage: {memory_usage} bytes")

# Convert memory usage to a more human-readable format (e.g., megabytes)
memory_usage_kb = memory_usage / (1024 )
print(f"Memory Usage: {memory_usage_kb:.2f} kilobytes")

Memory Usage: 687 bytes
Memory Usage: 0.67 kilobytes


In [12]:
raw_table_name = 'testdata'
df.to_sql(raw_table_name, engine, if_exists='replace')

1

In [13]:
def check_table_exists(table_name, engine):
    if table_name in inspect(engine).get_table_names():
        print(f"{table_name!r} exists in the DB!")
    else:
        print(f"{table_name} does not exist in the DB!")

check_table_exists(raw_table_name, engine)

'testdata' exists in the DB!


In [17]:
pd.read_sql(f"SELECT * FROM {raw_table_name}", engine)

Unnamed: 0,index,flight_date,flight_status,departure_airport,departure_timezone,arrival_airport,arrival_timezone,arrival_terminal,airline_name,flight_number
0,0,2023-10-05,landed,Port Lincoln,Australia - Adelaide,Adelaide International Airport,Australia - Adelaide,4,Thai AirAsia,540
