In [7]:
# Notebook Overview:

# The goal here was to check the API operation and test how the data transformation and sub selection was made.

# This notebook:
# - retrieves 10 records of active flights, 
# - selects a subset of columns, 
# - renames columns
# - modifies some column data
# - calculates the size of the data batch 

In [5]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

params = {
    'access_key':os.environ.get('API_ACCESS_KEY'),
    'limit':10,
    'flight_status':'active'
}
url = 'http://api.aviationstack.com/v1/flights'

In [6]:
try:
    # Send an HTTP GET request with a timeout
    api_result = requests.get(url, params=params, timeout=10)

    # Check if the request was successful (HTTP status code 200)
    if api_result.status_code == 200:
        # Parse the JSON response
        api_response = api_result.json()
        # Now you can work with the `api_response` data
    else:
        # Handle HTTP error responses
        print(f"HTTP Error {api_result.status_code}: {api_result.text}")

except requests.exceptions.RequestException as e:
    # Handle network-related errors
    print(f"Request Exception: {e}")

except ValueError as ve:
    # Handle JSON parsing errors
    print(f"JSON Parsing Error: {ve}")

In [7]:
# Flatten the JSON data into a DataFrame
df = pd.json_normalize(api_response['data'])

# Select and rename specific columns
df = df[['flight_date', 'flight_status', 'departure.airport', 'departure.timezone','arrival.airport','arrival.timezone','arrival.terminal','airline.name', 'flight.number']]
df.columns = ['flight_date', 'flight_status', 'departure_airport', 'departure_timezone','arrival_airport','arrival_timezone','arrival_terminal','airline_name', 'flight_number']

# Replace '/' with '-' in specific columns
df['departure_timezone'] = df['departure_timezone'].str.replace('/', ' - ')
df['arrival_timezone'] = df['arrival_timezone'].str.replace('/', ' - ')

display(df)

Unnamed: 0,flight_date,flight_status,departure_airport,departure_timezone,arrival_airport,arrival_timezone,arrival_terminal,airline_name,flight_number
0,2024-01-15,active,Indira Gandhi International,Asia - Kolkata,Ngurah Rai International,Asia - Makassar,I,Vistara,145
1,2024-01-15,active,Xianyang,Asia - Shanghai,Shenzhen,Asia - Shanghai,T3,Shandong Airlines,9466
2,2024-01-15,active,Melbourne - Tullamarine Airport,Australia - Melbourne,Sydney Kingsford Smith Airport,Australia - Sydney,3,Cathay Pacific,9052
3,2024-01-15,active,Melbourne - Tullamarine Airport,Australia - Melbourne,Perth International,Australia - Perth,4,Air New Zealand,7252
4,2024-01-15,active,Haneda Airport,Asia - Tokyo,Kobe,Asia - Tokyo,D,ANA,411
5,2024-01-15,active,Bangalore International Airport,Asia - Kolkata,Kuwait International,Asia - Kuwait,5,Jazeera Airways,432
6,2024-01-15,active,Melbourne - Tullamarine Airport,Australia - Melbourne,Sydney Kingsford Smith Airport,Australia - Sydney,3,Qantas,422
7,2024-01-15,active,Suvarnabhumi International,Asia - Bangkok,Luoyang,Asia - Shanghai,T2,Urumqi Airlines,2642
8,2024-01-15,active,Suvarnabhumi International,Asia - Bangkok,Daegu,Asia - Seoul,,T'Way Air,106
9,2024-01-15,active,Sydney Kingsford Smith Airport,Australia - Sydney,Hobart,Australia - Hobart,D,Qantas,5719


In [8]:
memory_usage = df.memory_usage(deep=True).sum() / (1024)

# Print the memory usage in kilobytes
print(f"Memory Usage: {memory_usage:.2f} kilobytes")

Memory Usage: 6.12 kilobytes
