In [12]:
import openai
import os
from util import get_api_key
openai.api_key = get_api_key()

In [13]:
schema_files = os.listdir('../schemas')

In [14]:
all_schemas = {}

In [15]:
for file in schema_files:
    opened_file = open('../schemas/' + file, 'r')
    all_schemas[file] = opened_file.read()

In [16]:
system_prompt = """
            You are a data engineer looking to generate an Airflow pipeline DAG skeleton 
            without the SQL details
            """

In [17]:
user_prompt = f"""
                Generate a cumulative Airflow DAG that transforms 
                {all_schemas['player_seasons.sql']}
                into {all_schemas['players.sql']}
                use markdown for output and Postgres for queries
                The DAG depends on last season data from players table 
                and the DAG depends on past is true
                Make sure each run scans only one season and does a 
                FULL OUTER JOIN with the previous seasons data
                Use the {{ ds }} airflow parameter to filter season 
                All create table statements should include IF NOT EXISTS
            """

In [18]:
print(system_prompt)
print(user_prompt)


            You are a data engineer looking to generate an Airflow pipeline DAG skeleton 
            without the SQL details
            

                Generate a cumulative Airflow DAG that transforms 
                CREATE TABLE public.player_seasons (
    player_name text NOT NULL,
    age integer,
    height text,
    weight integer,
    college text,
    country text,
    draft_year text,
    draft_round text,
    draft_number text,
    gp real,
    pts real,
    reb real,
    ast real,
    netrtg real,
    oreb_pct real,
    dreb_pct real,
    usg_pct real,
    ts_pct real,
    ast_pct real,
    season integer NOT NULL
);
                into  CREATE TYPE season_stats AS (
                         season Integer,
                         pts REAL,
                         ast REAL,
                         reb REAL,
                         weight INTEGER
                       );
 CREATE TYPE scorer_class AS
     ENUM ('bad', 'average', 'good', 'star');


 CREATE TABLE pla

In [19]:
response = openai.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ],
    temperature=0
)
answer = response.choices[0].message.content

In [20]:
print(answer)

Sure, here is a skeleton of an Airflow DAG that you can use as a starting point. Please note that this is a very basic skeleton and you will need to fill in the SQL details and other specifics according to your requirements.

```python
from airflow import DAG
from airflow.operators.postgres_operator import PostgresOperator
from datetime import datetime, timedelta

default_args = {
    'owner': 'airflow',
    'depends_on_past': True,
    'start_date': datetime(2021, 1, 1),
    'email': ['your-email@example.com'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG(
    'transform_player_seasons', default_args=default_args, schedule_interval='@daily')

t1 = PostgresOperator(
    task_id='create_type_season_stats',
    postgres_conn_id='your_connection_id',
    sql="""
    CREATE TYPE IF NOT EXISTS season_stats AS (
        season Integer,
        pts REAL,
        ast REAL,
        reb REAL,
        weight IN

In [21]:
if not os.path.exists('output'):
    os.mkdir('output')

In [22]:
output = filter(lambda x: x.startswith('python'), answer.split('```'))
# Open the file with write permissions
with open('output/airflow_dag.py', 'w') as file:
    # Write some data to the file
    file.write('\n'.join(output))