ADR: Rewriting Airbyte connectors to python.

Status: What is the status, such as proposed, accepted, rejected, deprecated, superseded, etc.?

Accepted

Context: What is the issue that we're seeing that is motivating this decision or change?

While fetching data with Airbyte, a new table was being created each time. This behavior is a default functionality of Airbyte that we couldn't change when performing a full overwrite on a table.
As a result, the tables had the change_tracking attribute set to false, whereas we needed it to be set to true for Dynamic Tables to work properly.
Because of this, we had to orchestrate altering the Airbyte-created tables manually, as it couldn't be done within a single orchestrated chain.

Decision: What is the change that we're proposing and/or doing?

We are rewriting the Airbyte connectors in Python within a notebook. This will allow us to orchestrate the process using Snowflake tasks, decommission Airbyte, and keep the entire pipeline within Snowflake.

Consequences: What becomes easier or more difficult to do because of this change?

Easier:

Orchestration

CI/CD, as everything is now done within Notebooks that can be connected to a Git repository

Managing the entire process from bronze to gold data layers becomes simpler 

In [None]:
ALTER SESSION SET TIMEZONE = 'Europe/London';

In [None]:
try:
    from requests.auth import HTTPBasicAuth
    import requests
    import pandas as pd
    import json
    from snowflake.snowpark.context import get_active_session
    session = get_active_session()
    username = session.sql("SELECT PROD.RAW.jira_username()").collect()[0][0]
    api_token = session.sql("SELECT PROD.RAW.jira_api_token()").collect()[0][0]
    
except:
    from requests.auth import HTTPBasicAuth
    import requests
    import pandas as pd
    from dotenv import load_dotenv
    import json
    load_dotenv() 
    username = os.getenv('username')
    api_token = os.getenv('api_token')

In [None]:
# Start time
import time
start_time = time.time()

class Jira():
    def __init__(self):
        self.username = username
        self.api_token = api_token
        self.auth = HTTPBasicAuth(self.username, self.api_token)
        self.headers = {"Accept": "application/json"}
    
    def get_jira_boards(self):
        """
        Fetches all Jira boards using the Jira Agile API with pagination.
        
        Returns:
            DataFrame of all boards
        """
        time.sleep(1)
        startAt = 0
        maxResults = 50
        all_boards = []
        
        while True:  # Added the missing while loop
            params = {
                "startAt": startAt,
                "maxResults": maxResults
            }
        
            response = requests.get(
                url="https://phlexglobal.atlassian.net/rest/agile/1.0/board",
                headers=self.headers,
                auth=self.auth,
                params=params
            )
        
            if response.status_code == 200:
                data = response.json()
                all_boards.extend(data.get("values", []))
                if data.get("isLast", True):
                    break
                startAt += maxResults
            else:
                print(f"Błąd {response.status_code}: {response.text}")
                break
        
        all_boards_df = pd.DataFrame(all_boards)
        all_boards_df.reset_index(drop=True, inplace=True)  # Fixed reset_index
        return all_boards_df
    
    def get_unique_boards(self):
        """
        Gets unique board IDs from all Jira boards.
        
        Returns:
            list of unique board IDs
        """
        unique_boards = self.get_jira_boards()['id'].astype(str).unique().tolist()
        return unique_boards

    def get_jira_issue_fields(self):

          response = requests.get(
            url = 'https://phlexglobal.atlassian.net/rest/api/3/field',
            headers=self.headers,
            auth=self.auth
          )
          data = response.json()
          # print(json.dumps(json.loads(response.text), sort_keys=True, indent=4, separators=(",", ": ")))
          issue_fields = pd.DataFrame(data)
        
          return issue_fields

    def get_jira_projects(self):

    
        # Query parameters
        params = {
            "expand": 'description,projectKeys,lead,issueTypes,url,insight,deletedby'
        }
    
        # Fetch projects
        response = requests.get(
            url = "https://phlexglobal.atlassian.net/rest/api/3/project",
            headers=self.headers,
            auth=self.auth,
            params=params
        )
    
        # Convert response to DataFrame
        projects = pd.DataFrame(response.json())
    
        # Ensure 'id' is numeric
        # projects['id'] = pd.to_numeric(projects['id'], errors='coerce').fillna(0).astype(int)
        projects = projects.drop(columns='properties')
        return projects
    def get_unique_jira_projects(self):
        
        unique_projects = self.get_jira_projects()['id'].astype(str).unique().tolist()
        return unique_projects
        
    def get_jira_project_versions(self):

        time.sleep(1)
        projects_unique = self.get_unique_jira_projects()
    
        all_versions = []
    
        for i in projects_unique:
            start_at = 0
            is_last = False
    
            while not is_last:
    
                params = {
                    'startAt': start_at,
                    'maxResults': 50
                }
    
                response = requests.get(
                url = f"https://phlexglobal.atlassian.net/rest/api/3/project/{i}/version",
                headers=self.headers, 
                auth=self.auth, 
                params=params)
                data = response.json()
    
                sprints_batch = data.get('values', [])
                all_versions.extend(sprints_batch)
    
                is_last = data.get('isLast', True)
                start_at += len(sprints_batch)
    
        # Wrzuć wszystko do DataFrame
        project_versions_df = pd.DataFrame(all_versions)
        return project_versions_df

    def get_jira_sprints(self):

        board_ids = self.get_unique_boards()
    
        startAt = 0
        maxResults = 50
        all_sprints = []
    
        # Fetch all sprints for each board
        for i in board_ids:
            start_at = 0  # Reset start_at for each board
            while True:
                url = f"https://phlexglobal.atlassian.net/rest/agile/1.0/board/{i}/sprint"
    
                params = {
                    "startAt": start_at,
                    "maxResults": maxResults
                }
    
                response = requests.get(
                    url,
                    headers=self.headers,
                    auth=self.auth,
                    params=params
                )
    
                if response.status_code != 200:
                    print(f"Error for board {i}: {response.status_code}, {response.text}")
                    break
    
                sprints = response.json()
                if not sprints.get("values"):  # No more sprints
                    break
                all_sprints.extend(sprints.get("values", []))
                start_at += maxResults
                print(f"Board {i}. Status code: {response.status_code}, Retrieved {len(sprints.get('values', []))} sprints")
        all_sprints = pd.DataFrame(all_sprints)
        return all_sprints
    
    def get_jira_users(self):

        # Pagination setup
        start_at = 0
        max_results = 100
        all_users = []
        
        while True:
            params = {
                "startAt": start_at,
                "maxResults": max_results
            }
            response = requests.get(
                url = "https://phlexglobal.atlassian.net/rest/api/3/users", 
                auth=self.auth, 
                params=params)
            
            users = response.json()
            
            if not users:
                break
            all_users.extend(users)
            start_at += max_results
    
        # Convert to DataFrame after fetching all users
        users_df = pd.DataFrame(all_users)
        return users_df

    
    def get_jira_issues_projects(self, days_back=3):
        
        all_issues = []
        errors = []
    
        for pid in self.get_unique_jira_projects():
            start_at = 0
            max_results = 1000
    
            while True:
                params = {
                    "jql": f"project = {pid} AND updated >= -{days_back}d",
                    "maxResults": max_results,
                    "startAt": start_at,
                    "expand": "schema,changelog,transitions,editmeta,properties,renderedFields,versionedRepresentations"
                }
                url = "https://phlexglobal.atlassian.net/rest/api/3/search"
                headers = {"Accept": "application/json"}
    
                response = requests.get(url, auth=self.auth, headers=self.headers, params=params)
    
                if response.status_code != 200:
                    error_detail = {
                        "project": pid,
                        "status_code": response.status_code,
                        "response": response.text
                    }
                    print(f"[ERROR] Failed to fetch project {pid}: {response.status_code}")
                    errors.append(error_detail)
                    break  # Przerywamy tylko ten projekt, przechodzimy do kolejnego
    
                data = response.json()
                issues_list = data.get('issues', [])
                all_issues.extend(issues_list)
    
                total = data.get('total', 0)
                start_at += max_results
    
                if start_at >= total:
                    print(f"[INFO] Project {pid} done, total: {total}")
                    break
    
        if not all_issues:
            print("[INFO] No issues retrieved.")
            return pd.DataFrame()  # Zwróć pusty DataFrame jeśli brak danych
    
        issues_df = pd.DataFrame(all_issues)
    
        # --- Funkcja pomocnicza: wyciąganie daty aktualizacji ---
        def extract_updated(versioned):
            if isinstance(versioned, str):
                try:
                    versioned = json.loads(versioned)
                except:
                    return None
            if isinstance(versioned, dict):
                updated_field = versioned.get('updated')
                if isinstance(updated_field, dict):
                    first_value = next(iter(updated_field.values()), None)
                    if isinstance(first_value, str):
                        return first_value.split('T')[0]
                elif isinstance(updated_field, str):
                    return updated_field.split('T')[0]
            return None
    
        issues_df['updated'] = issues_df.get('versionedRepresentations', None).apply(extract_updated)
        issues_df['updated'] = pd.to_datetime(issues_df['updated'], errors='coerce').dt.date
    
        # --- Zamiana złożonych struktur na JSON ---
        for col in ['versionedRepresentations', 'renderedFields', 'transitions', 'editmeta', 'changelog']:
            if col in issues_df.columns:
                issues_df[col] = issues_df[col].apply(
                    lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
                )
    
        issues_df.reset_index(drop=True, inplace=True)
        issues_df = issues_df.drop(columns='updated')
        if errors:
            print("\n[WARNING] Some projects could not be fetched:")
            for err in errors:
                print(f"- Project: {err['project']}, Status: {err['status_code']}, Message: {err['response']}")
    
        return issues_df

# Usage
api = Jira()

In [None]:
unq = api.get_unique_jira_projects()
unq

In [None]:
# jira_boards

boards = api.get_jira_boards()
boards

In [None]:
python_boards_table = 'BOARDS_FROM_PYTHON'

session.write_pandas(boards,
                         table_name=python_boards_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:

-- create or replace table prod.raw.jira_boards(
-- ID int,
-- NAME varchar,
-- SELF varchar,
-- TYPE varchar,
-- LOCATION variant,
-- PROJECTID int,
-- PROJECTKEY varchar,
-- timestamp timestamp_tz
-- );

truncate table prod.raw.jira_boards;

insert into prod.raw.jira_boards
SELECT
    CAST("id" AS INT)                       AS id,
    CAST("name" AS STRING)                  AS name,
    CAST("self" AS STRING)                  AS self,
    CAST("type" AS STRING)                  AS type,
    CAST("location" AS VARIANT)             AS location,
    CAST(location:projectId AS INT)       AS projectId,
    CAST(location:projectKey AS STRING)   AS projectKey,
    -- value:isPrivate             AS isPrivate,
    -- value:location:displayName  AS displayName,
    -- value:location:projectName  AS projectName,
    -- value:location:projectTypeKey AS projectTypeKey,
    -- value:location:avatarURI    AS avatarURL,
    -- value:name_json             AS name_json,
    -- value:location:userId       AS userId,
    -- value:location:userAccountId AS userAccountId
    current_timestamp as timestamp
FROM BOARDS_FROM_PYTHON

In [None]:
# jira issue fields
issue_fields = api.get_jira_issue_fields()
issue_fields

In [None]:
python_issue_fields_table = 'ISSUE_FIELDS_FROM_PYTHON'

session.write_pandas(issue_fields,
                         table_name=python_issue_fields_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")



In [None]:
-- CREATE OR REPLACE TABLE prod.raw.jira_issue_fields (
--     id STRING,
--     key STRING,
--     name STRING,
--     scope VARIANT,
--     custom BOOLEAN,
--     schema VARIANT,
--     navigable BOOLEAN,
--     orderable BOOLEAN,
--     searchable BOOLEAN,
--     clauseNames VARIANT,
--     untranslatedName varchar,
--     timestamp TIMESTAMP
-- );

truncate table prod.raw.jira_issue_fields;

INSERT INTO prod.raw.jira_issue_fields
SELECT
    "id",
    "key",
    "name",
    "scope",
    "custom",
    "schema",
    "navigable",
    "orderable",
    "searchable",
    "clauseNames",
    "untranslatedName",
    CURRENT_TIMESTAMP AS timestamp
FROM ISSUE_FIELDS_FROM_PYTHON;


In [None]:
# jira projects
projects = api.get_jira_projects()
projects

In [None]:
python_projects_table = 'PROJECTS_FROM_PYTHON'

session.write_pandas(projects,
                         table_name=python_projects_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
-- CREATE or replace TABLE prod.raw.jira_projects (
--     id INT,
--     key STRING,
--     url STRING,
--     lead VARIANT,
--     name STRING,
--     self STRING,
--     uuid STRING,
--     email STRING,
--     roles STRING,
--     style STRING,
--     expand STRING,
--     deleted BOOLEAN,
--     insight STRING,
--     archived BOOLEAN,
--     entityId string,
--     versions STRING,
--     deletedby VARIANT,
--     favourite STRING,
--     isPrivate BOOLEAN,
--     archivedby STRING,
--     avatarUrls VARIANT,
--     components STRING,
--     issueTypes STRING,
--     properties STRING,
--     simplified BOOLEAN,
--     deletedDate DATE,
--     description STRING,
--     permissions STRING,
--     archivedDate STRING,
--     assigneetype STRING,
--     projectTypeKey STRING,
--     projectCategory VARIANT,
--     retentionTillDate TIMESTAMP_TZ,
--     issueTypeHierarchy STRING,
--     timestamp timestamp_tz
-- );

TRUNCATE TABLE prod.raw.jira_projects;
INSERT INTO prod.raw.jira_projects
SELECT
    "id",
    "key",
    "url",
    "lead",
    "name",
    "self",
    "uuid",
    NULL AS "email",
    NULL AS "roles",
    "style",
    "expand",
    NULL AS "deleted",
    NULL AS "insight",
    "archived",
    "entityId",
    NULL AS "versions",
    NULL AS "deletedby",
    NULL AS "favourite",
    "isPrivate",
    NULL AS "archivedby",
    "avatarUrls",
    NULL AS "components",
    "issueTypes",
    NULL AS "properties",
    "simplified",
    NULL AS "deletedDate",
    "description",
    NULL AS "permissions",
    "archivedDate",
    NULL AS "assigneetype",
    "projectTypeKey",
    "projectCategory",
    NULL AS "retentionTillDate",
    NULL AS "issueTypeHierarchy",
    current_timestamp AS timestamp
FROM PROJECTS_FROM_PYTHON;

In [None]:
# jira users
users = api.get_jira_users()
users

In [None]:
python_users_table = 'USERS_FROM_PYTHON'

session.write_pandas(users,
                         table_name=python_users_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
-- CREATE OR REPLACE TABLE prod.raw.jira_users (
--     key STRING,
--     name STRING,
--     self STRING,
--     active BOOLEAN,
--     expand STRING,
--     groups STRING,
--     locale STRING,
--     timeZone STRING,
--     accountId STRING,
--     avatarUrls STRING,
--     accountType STRING,
--     displayName STRING,
--     emailAddress STRING,
--     applicationRoles STRING,
--     timestamp timestamp_tz
-- );

truncate table prod.raw.jira_users;

INSERT INTO prod.raw.jira_users
SELECT
    null as  "key",
    null as "name",
    "self",
    "active",
    null as "expand",
    null as "groups",
    "locale",
    "timeZone",
    "accountId",
    "avatarUrls",
    "accountType",
    "displayName",
    "emailAddress",
    null as "applicationRoles",
    current_timestamp AS timestamp
FROM USERS_FROM_PYTHON;

In [None]:
# jira project versions
project_versions = api.get_jira_project_versions()
project_versions

In [None]:
python_project_versions_table = 'PROJECT_VERSIONS_FROM_PYTHON'

session.write_pandas(project_versions,
                         table_name=python_project_versions_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
-- CREATE OR REPLACE TABLE prod.raw.jira_project_versions (
--     id STRING,
--     name string,
--     self STRING,
--     expand STRING,
--     overdue BOOLEAN,
--     project STRING,
--     archived BOOLEAN,
--     released BOOLEAN,
--     projectId STRING,
--     startDate DATE,
--     releaseDate DATE,
--     userStartDate DATE,
--     userReleaseDate DATE,
--     description string,
--     timestamp timestamp_tz
-- );

truncate table prod.raw.jira_project_versions;

INSERT INTO prod.raw.jira_project_versions
SELECT
    "id",
    "name",
    "self",
    null as "expand",
    "overdue",
    null as "project",
    "archived",
    "released",
    "projectId",
    "startDate",
    "releaseDate",
    "userStartDate",
    "userReleaseDate",
    "description",
    current_timestamp AS timestamp
FROM PROJECT_VERSIONS_FROM_PYTHON;

In [None]:
# jira sprints
sprints = api.get_jira_sprints()
sprints

In [None]:
python_sprints_table = 'SPRINTS_FROM_PYTHON'

session.write_pandas(sprints,
                         table_name=python_sprints_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")


In [None]:
-- CREATE OR REPLACE TABLE prod.raw.jira_sprints (
--     id INT,
--     goal STRING,
--     name STRING,
--     self STRING,
--     state STRING,
--     boardId INT,
--     endDate date,
--     startDate DATE,
--     createdDate DATE,
--     completeDate DATE,
--     originBoardId INT,
--     timestamp timestamp_tz
-- );

TRUNCATE TABLE prod.raw.jira_sprints;

INSERT INTO prod.raw.jira_sprints
SELECT
    "id",
    "goal",
    "name",
    "self",
    "state",
    "id" as boardId,
    DATEADD(hour, 1, "endDate") as "endDate",
    "startDate",
    "createdDate",
    "completeDate",
    "originBoardId",
    current_timestamp AS timestamp
FROM SPRINTS_FROM_PYTHON;

In [None]:
# projects = api.get_jira_projects()
# projects_unique = projects['id'].unique().tolist()
# projects_unique

In [None]:
issues = api.get_jira_issues_projects()
issues

In [None]:
temp_issues_table = 'TEMP_ISSUES'

session.write_pandas(issues,
                         table_name=temp_issues_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
insert into prod.raw.jira_issues
select * from TEMP_ISSUES

In [None]:
-- select  * from prod.raw.jira_issues
-- where "id" like '525164%'


In [None]:
-- CREATE OR REPLACE FUNCTION get_sprint_issues(
--     sprint_id int, 
--     jira_username STRING, 
--     jira_api_token STRING
-- )
-- RETURNS VARIANT
-- LANGUAGE PYTHON
-- RUNTIME_VERSION = '3.9'
-- EXTERNAL_ACCESS_INTEGRATIONS = (jira_integration)
-- PACKAGES = ('requests', 'pandas')
-- HANDLER = 'get_sprint_issues'
-- AS
-- $$
-- import requests
-- import pandas as pd
-- import time

-- def get_sprint_issues(sprint_id, jira_username, jira_api_token):
--     if not jira_username or not jira_api_token:
--         return {"status": "error", "message": "Missing credentials"}
--     time.sleep(2)
--     params = {
--         "startAt": 0,
--         "maxResults": 1000
--     }

--     url = f"https://phlexglobal.atlassian.net/rest/agile/1.0/sprint/{sprint_id}/issue"
--     headers = {"Accept": "application/json"}

--     for attempt in range(3):
--         response = requests.get(
--             url,
--             auth=(jira_username, jira_api_token),
--             headers=headers,
--             timeout=30,
--             params=params
--         )

--         if response.status_code == 429:  # rate limited
--             retry_after = int(response.headers.get("Retry-After", "3"))
--             time.sleep(retry_after)
--             continue
--         elif response.status_code != 200:
--             return {
--                 "status": "error",
--                 "code": response.status_code,
--                 "message": response.text
--             }
--         else:
--             break

--     issues = response.json().get("issues", [])

--     needed_columns = [
--         "id", "key", "self", "expand",
--         "fields.created", "fields.updated", 
--         "fields.customfield_10006", 
--         "fields.customfield_11302", 
--         "fields.status.description", 
--         "fields.status.iconUrl", 
--         "fields.status.id", 
--         "fields.status.name", 
--         "fields.status.self", 
--         "fields.status.statusCategory.colorName",
--         "fields.status.statusCategory.id",
--         "fields.status.statusCategory.key",
--         "fields.status.statusCategory.name",
--         "fields.status.statusCategory.self"
--     ]

--     try:
--         df = pd.json_normalize(issues)
--         df = df[needed_columns].reset_index()
--     except KeyError:
--         return []

--     return df.to_dict(orient="records")

-- $$;


In [None]:
-- chunking into parts containing 100 ID each so API will give us all values in response_json

CREATE OR REPLACE TABLE prod.silver.JIRA_SPRINTS_CHUNKED AS
SELECT 
    distinct(ID), 
    state,
    CEIL(ROW_NUMBER() OVER (ORDER BY ID) / 100.0) AS chunk_id,
    startdate
FROM prod.silver.JIRA_SPRINTS
order by state asc;

select * from prod.silver.JIRA_SPRINTS_CHUNKED;

In [None]:
insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

SELECT 
    sprint_id,
    value:"id"::STRING AS ISSUE_ID,
    value:"key"::STRING AS ISSUE_KEY,
    value:"self"::STRING AS issue_self,
    value:"expand"::STRING AS issue_expand,
    value:"fields.created"::STRING AS created,
    value:"fields.updated"::STRING AS updated,
    value:"fields.customfield_10006"::VARIANT AS customfield_10006,
    value:"fields.customfield_11302"::VARIANT AS customfield_11302,
    value:"fields.status.description"::STRING AS status_description,
    value:"fields.status.iconUrl"::STRING AS status_iconUrl,
    value:"fields.status.id"::STRING AS status_id,
    value:"fields.status.name"::STRING AS status_name,
    value:"fields.status.self"::STRING AS status_self,
    value:"fields.status.statusCategory.colorName"::STRING AS status_color,
    value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
    value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
    value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
    value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
    (sprint_id || '-' || value:"id")::TEXT AS ID,
    current_timestamp as timestamp
FROM (
    SELECT 
      s.ID AS sprint_id,
      prod.raw.get_sprint_issues(s.ID) AS issues_json
    FROM prod.silver.JIRA_SPRINTS_CHUNKED s
    where s.chunk_id between 1 and 5
) t,
LATERAL FLATTEN(input => t.issues_json);

In [None]:
insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

SELECT 
    sprint_id,
    value:"id"::STRING AS ISSUE_ID,
    value:"key"::STRING AS ISSUE_KEY,
    value:"self"::STRING AS issue_self,
    value:"expand"::STRING AS issue_expand,
    value:"fields.created"::STRING AS created,
    value:"fields.updated"::STRING AS updated,
    value:"fields.customfield_10006"::VARIANT AS customfield_10006,
    value:"fields.customfield_11302"::VARIANT AS customfield_11302,
    value:"fields.status.description"::STRING AS status_description,
    value:"fields.status.iconUrl"::STRING AS status_iconUrl,
    value:"fields.status.id"::STRING AS status_id,
    value:"fields.status.name"::STRING AS status_name,
    value:"fields.status.self"::STRING AS status_self,
    value:"fields.status.statusCategory.colorName"::STRING AS status_color,
    value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
    value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
    value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
    value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
    (sprint_id || '-' || value:"id")::TEXT AS ID,
        current_timestamp as timestamp
FROM (
    SELECT 
      s.ID AS sprint_id,
      prod.raw.get_sprint_issues(s.ID) AS issues_json
    FROM prod.silver.JIRA_SPRINTS_CHUNKED s
    where s.chunk_id between 6 and 10
) t,
LATERAL FLATTEN(input => t.issues_json);

In [None]:
insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

SELECT 
    sprint_id,
    value:"id"::STRING AS ISSUE_ID,
    value:"key"::STRING AS ISSUE_KEY,
    value:"self"::STRING AS issue_self,
    value:"expand"::STRING AS issue_expand,
    value:"fields.created"::STRING AS created,
    value:"fields.updated"::STRING AS updated,
    value:"fields.customfield_10006"::VARIANT AS customfield_10006,
    value:"fields.customfield_11302"::VARIANT AS customfield_11302,
    value:"fields.status.description"::STRING AS status_description,
    value:"fields.status.iconUrl"::STRING AS status_iconUrl,
    value:"fields.status.id"::STRING AS status_id,
    value:"fields.status.name"::STRING AS status_name,
    value:"fields.status.self"::STRING AS status_self,
    value:"fields.status.statusCategory.colorName"::STRING AS status_color,
    value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
    value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
    value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
    value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
    (sprint_id || '-' || value:"id")::TEXT AS ID,
        current_timestamp as timestamp
FROM (
    SELECT 
      s.ID AS sprint_id,
      prod.raw.get_sprint_issues(s.ID) AS issues_json
    FROM prod.silver.JIRA_SPRINTS_CHUNKED s
    where s.chunk_id between 11 and 15
) t,
LATERAL FLATTEN(input => t.issues_json);

In [None]:
insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

SELECT 
    sprint_id,
    value:"id"::STRING AS ISSUE_ID,
    value:"key"::STRING AS ISSUE_KEY,
    value:"self"::STRING AS issue_self,
    value:"expand"::STRING AS issue_expand,
    value:"fields.created"::STRING AS created,
    value:"fields.updated"::STRING AS updated,
    value:"fields.customfield_10006"::VARIANT AS customfield_10006,
    value:"fields.customfield_11302"::VARIANT AS customfield_11302,
    value:"fields.status.description"::STRING AS status_description,
    value:"fields.status.iconUrl"::STRING AS status_iconUrl,
    value:"fields.status.id"::STRING AS status_id,
    value:"fields.status.name"::STRING AS status_name,
    value:"fields.status.self"::STRING AS status_self,
    value:"fields.status.statusCategory.colorName"::STRING AS status_color,
    value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
    value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
    value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
    value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
    (sprint_id || '-' || value:"id")::TEXT AS ID,
        current_timestamp as timestamp
FROM (
    SELECT 
      s.ID AS sprint_id,
      prod.raw.get_sprint_issues(s.ID) AS issues_json
    FROM prod.silver.JIRA_SPRINTS_CHUNKED s
    where s.chunk_id between 16 and 20
) t,
LATERAL FLATTEN(input => t.issues_json);

In [None]:
insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

SELECT 
    sprint_id,
    value:"id"::STRING AS ISSUE_ID,
    value:"key"::STRING AS ISSUE_KEY,
    value:"self"::STRING AS issue_self,
    value:"expand"::STRING AS issue_expand,
    value:"fields.created"::STRING AS created,
    value:"fields.updated"::STRING AS updated,
    value:"fields.customfield_10006"::VARIANT AS customfield_10006,
    value:"fields.customfield_11302"::VARIANT AS customfield_11302,
    value:"fields.status.description"::STRING AS status_description,
    value:"fields.status.iconUrl"::STRING AS status_iconUrl,
    value:"fields.status.id"::STRING AS status_id,
    value:"fields.status.name"::STRING AS status_name,
    value:"fields.status.self"::STRING AS status_self,
    value:"fields.status.statusCategory.colorName"::STRING AS status_color,
    value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
    value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
    value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
    value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
    (sprint_id || '-' || value:"id")::TEXT AS ID,
        current_timestamp as timestamp
FROM (
    SELECT 
      s.ID AS sprint_id,
      prod.raw.get_sprint_issues(s.ID) AS issues_json
    FROM prod.silver.JIRA_SPRINTS_CHUNKED s
    where s.chunk_id between 21 and 25
) t,
LATERAL FLATTEN(input => t.issues_json);

In [None]:
insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

SELECT 
    sprint_id,
    value:"id"::STRING AS ISSUE_ID,
    value:"key"::STRING AS ISSUE_KEY,
    value:"self"::STRING AS issue_self,
    value:"expand"::STRING AS issue_expand,
    value:"fields.created"::STRING AS created,
    value:"fields.updated"::STRING AS updated,
    value:"fields.customfield_10006"::VARIANT AS customfield_10006,
    value:"fields.customfield_11302"::VARIANT AS customfield_11302,
    value:"fields.status.description"::STRING AS status_description,
    value:"fields.status.iconUrl"::STRING AS status_iconUrl,
    value:"fields.status.id"::STRING AS status_id,
    value:"fields.status.name"::STRING AS status_name,
    value:"fields.status.self"::STRING AS status_self,
    value:"fields.status.statusCategory.colorName"::STRING AS status_color,
    value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
    value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
    value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
    value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
    (sprint_id || '-' || value:"id")::TEXT AS ID,
        current_timestamp as timestamp
FROM (
    SELECT 
      s.ID AS sprint_id,
     prod.raw.get_sprint_issues(s.ID) AS issues_json
    FROM prod.silver.JIRA_SPRINTS_CHUNKED s
    where s.chunk_id > 25
) t,
LATERAL FLATTEN(input => t.issues_json);

In [None]:
-- insert into prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

-- SELECT 
--     sprint_id,
--     value:"id"::STRING AS ISSUE_ID,
--     value:"key"::STRING AS ISSUE_KEY,
--     value:"self"::STRING AS issue_self,
--     value:"expand"::STRING AS issue_expand,
--     value:"fields.created"::STRING AS created,
--     value:"fields.updated"::STRING AS updated,
--     value:"fields.customfield_10006"::VARIANT AS customfield_10006,
--     value:"fields.customfield_11302"::VARIANT AS customfield_11302,
--     value:"fields.status.description"::STRING AS status_description,
--     value:"fields.status.iconUrl"::STRING AS status_iconUrl,
--     value:"fields.status.id"::STRING AS status_id,
--     value:"fields.status.name"::STRING AS status_name,
--     value:"fields.status.self"::STRING AS status_self,
--     value:"fields.status.statusCategory.colorName"::STRING AS status_color,
--     value:"fields.status.statusCategory.id"::STRING AS status_cat_id,
--     value:"fields.status.statusCategory.key"::STRING AS status_cat_key,
--     value:"fields.status.statusCategory.name"::STRING AS status_cat_name,
--     value:"fields.status.statusCategory.self"::STRING AS status_cat_self,
--     (sprint_id || '-' || value:"id")::TEXT AS ID
-- FROM (
--     SELECT 
--       distinct(s.ID) AS sprint_id,
--       get_sprint_issues(s.ID, $username, $api_token) AS issues_json
--     FROM metrics.staged.JIRA_SPRINTS s
--     -- where s.state <> 'closed'
-- ) t,
-- LATERAL FLATTEN(input => t.issues_json);

In [None]:
select * from prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE

In [None]:
INSERT OVERWRITE INTO prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE
WITH ranked_issues AS (
  SELECT 
    sprint_id,
    ISSUE_ID,
    ISSUE_KEY,
    issue_self,
    issue_expand,
    created,
    updated,
    customfield_10006,
    customfield_11302,
    status_description,
    status_iconUrl,
    status_id,
    status_name,
    status_self,
    status_color,
    status_cat_id,
    status_cat_key,
    status_cat_name,
    status_cat_self,
    ID,
    ROW_NUMBER() OVER (PARTITION BY ID ORDER BY UPDATED DESC) AS RN
  FROM prod.raw.JIRA_SPRINT_ISSUES_SNOWFLAKE
)
SELECT 
    sprint_id,
    ISSUE_ID,
    ISSUE_KEY,
    issue_self,
    issue_expand,
    created,
    updated,
    customfield_10006,
    customfield_11302,
    status_description,
    status_iconUrl,
    status_id,
    status_name,
    status_self,
    status_color,
    status_cat_id,
    status_cat_key,
    status_cat_name,
    status_cat_self,
    ID,
    current_timestamp as TIMESTAMP,
FROM ranked_issues
WHERE RN = 1;


In [None]:
# Stop time
end_time = time.time()

print("Elapsed time:", (end_time - start_time)/60, "minutes")