### Extract
To extract data from the JSONPlaceholder API

In [1]:
import requests
from psycopg2 import connect, sql

# Configure your PostgreSQL connection string
conn_string = "dbname='etl_bites' user='joemiller' password='your_password' host='localhost' port='5432'"

def get_data_from_api(url):
    response = requests.get(url)
    return response.json()

posts_url = "https://jsonplaceholder.typicode.com/posts"
users_url = "https://jsonplaceholder.typicode.com/users"

posts_data = get_data_from_api(posts_url)
users_data = get_data_from_api(users_url)

### Transform
We then transform the extracted data by joining posts and users on the user ID.

In [6]:
def join_posts_and_users(posts, users):
    for post in posts:
        for user in users:
            if post['userId'] == user['id']:
                post['author'] = user['name']
    return posts

combined_data = join_posts_and_users(posts_data, users_data)
combined_data

[{'userId': 1,
  'id': 1,
  'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit',
  'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto',
  'author': 'Leanne Graham'},
 {'userId': 1,
  'id': 2,
  'title': 'qui est esse',
  'body': 'est rerum tempore vitae\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\nqui aperiam non debitis possimus qui neque nisi nulla',
  'author': 'Leanne Graham'},
 {'userId': 1,
  'id': 3,
  'title': 'ea molestias quasi exercitationem repellat qui ipsa sit aut',
  'body': 'et iusto sed quo iure\nvoluptatem occaecati omnis eligendi aut ad\nvoluptatem doloribus vel accusantium quis pariatur\nmolestiae porro eius odio et labore et velit aut',
  'author': 'Leanne Graham'},
 {'userId': 1,
  'id': 4,
  'title': 'eum et est occaecati',
  

### Load
Now we create our table in our etl_bites database

In [3]:
# Create tables in analytical DB
# This could also be done manually via a GUI (e.g. TablePlus) or with a SQL script
def execute_query_postgresql(conn_string, query):
    with connect(conn_string) as conn:
        with conn.cursor() as cur:
            cur.execute(query)
            conn.commit()

create_api_data_table = '''
DROP TABLE IF EXISTS api_data CASCADE;
CREATE TABLE api_data (
    post_id INTEGER NOT NULL,
    title TEXT NOT NULL,
    body TEXT NOT NULL,
    user_id INTEGER NOT NULL,
    author TEXT NOT NULL
);
'''

execute_query_postgresql(conn_string, create_api_data_table)

And then we load the transformed data into our analytical database.

In [7]:
def insert_data_to_postgresql(conn_string, table_name, data):
    with connect(conn_string) as conn:
        with conn.cursor() as cur:
            for item in data:
                query = sql.SQL("INSERT INTO {} (post_id, title, body, user_id, author) VALUES (%s, %s, %s, %s, %s)").format(sql.Identifier(table_name))
                cur.execute(query, (item['id'], item['title'], item['body'], item['userId'], item['author']))
        conn.commit()

table_name = "api_data"
insert_data_to_postgresql(conn_string, table_name, combined_data)

In [4]:
%load_ext sql

In [8]:
%sql postgresql+psycopg2://{env['readonly_user']}:{env['readonly_user_password']}@{env['database_endpoint']}/{env['db_name']}
%sql postgresql+psycopg2://{'joemiller':'password'@{env['database_endpoint']}/etl_bites}

Traceback (most recent call last):
  File "/Users/joemiller/Documents/makers_academy/Data-Engineering/Projects/etl_bites/etl-bites-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 145, in __init__
    self._dbapi_connection = engine.raw_connection()
                             ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/joemiller/Documents/makers_academy/Data-Engineering/Projects/etl_bites/etl-bites-venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 3292, in raw_connection
    return self.pool.connect()
           ^^^^^^^^^^^^^^^^^^^
  File "/Users/joemiller/Documents/makers_academy/Data-Engineering/Projects/etl_bites/etl-bites-venv/lib/python3.11/site-packages/sqlalchemy/pool/base.py", line 452, in connect
    return _ConnectionFairy._checkout(self)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/joemiller/Documents/makers_academy/Data-Engineering/Projects/etl_bites/etl-bites-venv/lib/python3.11/site-packages/sqlalchemy/pool/base.py", line 1269

In [5]:
%%sql

SELECT *
FROM api_data;

Traceback (most recent call last):
  File "/Users/joemiller/Documents/makers_academy/Data-Engineering/Projects/etl_bites/etl-bites-venv/lib/python3.11/site-packages/sql/magic.py", line 196, in execute
    conn = sql.connection.Connection.set(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/joemiller/Documents/makers_academy/Data-Engineering/Projects/etl_bites/etl-bites-venv/lib/python3.11/site-packages/sql/connection.py", line 82, in set
    raise ConnectionError(
sql.connection.ConnectionError: Environment variable $DATABASE_URL not set, and no connect string given.

Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
               or an existing connection: dict_keys([])
