In [None]:
ALTER SESSION SET TIMEZONE = 'Europe/London';

In [2]:
  # Correct import (requires python-dotenv package)

try:
    import requests
    import pandas as pd
    import time
    from snowflake.snowpark.context import get_active_session
    session = get_active_session()
    results = session.sql("SELECT prod.raw.checkmarx_secret()").collect()[0][0]
except:
    import requests
    import pandas as pd
    import time
    import os
    from dotenv import load_dotenv
    load_dotenv()  # Load .env file
    results = os.getenv('LUKE_API_KEY')  # Get environment variable

In [4]:
class CheckmarxAPI:
    """
    Class to interact with Checkmarx API using JWT authentication.
    """
    def __init__(self):
        # Initialize with required parameters
        self.token_url = "https://us.iam.checkmarx.net/auth/realms/cor/protocol/openid-connect/token" #url for token retrieval
        # self.api_key = os.getenv("LUKE_API_KEY")  # API
        self.jwt = None
        self.token_timestamp = None  # Track when token was obtained
        self.token_refresh_threshold = 28 * 60  # 28 minutes in seconds
        self.limit = 100
        self.api_key = results
        
    def get_jwt_token(self):
        """
        retrieving jwt token
        """
        payload = {
                "grant_type": "refresh_token",
                "client_id": "ast-app",
                "refresh_token": self.api_key
        }
            
        response = requests.post(self.token_url, data=payload)
        self.jwt = response.json().get('access_token')
        
        if response.status_code != 200 or not self.jwt:
            print("Something is wrong, status code:", response.status_code)
        else:
            self.token_timestamp = time.time()  # Store when token was obtained
            return self.jwt

    def should_refresh_token(self):
        """
        Check if token should be refreshed (28 minutes have passed)
        """
        if not self.jwt or not self.token_timestamp:
            return True
        
        elapsed_time = time.time() - self.token_timestamp
        return elapsed_time >= self.token_refresh_threshold

    def ensure_valid_token(self):
        """
        Ensure we have a valid token, refresh if necessary
        """
        if self.should_refresh_token():
            print("Refreshing JWT token...")
            self.get_jwt_token()
            if not self.jwt:
                print("Failed to obtain JWT token")
                return False
        return True

    def get_applications(self):
        """
        Retrieve all applications with pagination.
        """
        if not self.ensure_valid_token():
            return pd.DataFrame()

        applications_endpoint = "https://us.ast.checkmarx.net/api/applications"
        
        headers = {
            "Authorization": f"{self.jwt}",
            "Accept": "application/json; version=1.0"
        }
        
        applications_offset = 0
        all_applications = []
        
        try:
            while True:
                params = {"limit": self.limit, "offset": applications_offset}
                response = requests.get(applications_endpoint, headers=headers, params=params)
                # response.raise_for_status()
                
                data = response.json()
                applications = data.get('applications', [])
                
                if not applications:
                    break
                    
                all_applications.extend(applications)
                applications_offset += self.limit
                
                if len(applications) < self.limit:
                    break
            
            return pd.DataFrame(all_applications)
            
        except requests.RequestException as e:
            print(f"Applications request error: {e}")
            return pd.DataFrame()
        
    def get_scans(self):
        """
        Retrieve all scans with pagination.
        """
        if not self.ensure_valid_token():
            return pd.DataFrame()
    
        scans_endpoint = "https://us.ast.checkmarx.net/api/scans"
        
        headers = {
            "Authorization": f"{self.jwt}",
            "Accept": "application/json; version=1.0"
        }
        
        scans_offset = 0
        all_scans = []
        
        try:
            while True:
                params = {"limit": self.limit, "offset": scans_offset}
                response = requests.get(scans_endpoint, headers=headers, params=params)
                # response.raise_for_status()
                
                data = response.json()
                scans = data.get('scans', [])
                
                if not scans:
                    break
                    
                all_scans.extend(scans)
                scans_offset += self.limit
                
                if len(scans) < self.limit:
                    break
            
            df_scans = pd.DataFrame(all_scans)
            if 'tags' in df_scans.columns:
                df_scans = df_scans.drop(columns='tags')
            return df_scans
            
        except requests.RequestException as e:
            print(f"Scans request error: {e}")
            return pd.DataFrame()

    def get_unique_scans(self):
        """
        Retrieve all applications with pagination.
        """
        unique_scans = self.get_scans()['id'].unique().tolist()
        return unique_scans
        
    def get_scans_metadata(self):
        """
        Retrieve metadata for all scans with automatic token refresh.
        """
        if not self.ensure_valid_token():
            return pd.DataFrame()
        
        scan_ids = self.get_unique_scans()
        total_count = len(scan_ids)
        
        if not scan_ids:
            print("No scan IDs found")
            return pd.DataFrame()
        
        scan_details = []
        
        headers = {
            "Authorization": f"{self.jwt}",
            "Accept": "application/json; version=1.0"
        }
        
        for i, scan_id in enumerate(scan_ids):
            try:
                # Check if we need to refresh token before making request
                if self.should_refresh_token():
                    print(f"Refreshing token at scan {i+1}/{total_count}")
                    if not self.ensure_valid_token():
                        print("Failed to refresh token, stopping...")
                        break
                    # Update headers with new token
                    headers["Authorization"] = f"{self.jwt}"
                
                metadata_endpoint = f"https://us.ast.checkmarx.net/api/sast-metadata/{scan_id}"
                response = requests.get(metadata_endpoint, headers=headers)
                response.raise_for_status()
                
                data = response.json()
                scan_details.append(data)

                # Optional: Print progress
                if (i + 1) % 50 == 0:
                    print(f"Processed {i+1}/{total_count} scans")
                
            except requests.RequestException as e:
                print(f"Metadata request error for scan {scan_id}: {e}")
                continue
                
        df_scan_details = pd.DataFrame(scan_details)   
        return df_scan_details
    
                    
api = CheckmarxAPI()
jwt_token = api.get_jwt_token()

In [None]:
# jwt_token

In [None]:
get_scans = api.get_scans()
get_scans


In [None]:
python_scans_table = 'SCANS_FROM_PYTHON'

session.write_pandas(get_scans,
                         table_name=python_scans_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
create or replace table prod.raw.checkmarx_scans as
SELECT *,
  -- sd.value:"endDate"::string AS enddate
FROM SCANS_FROM_PYTHON,
-- LATERAL FLATTEN(input => parse_json("statusDetails")) sd


In [None]:
-- create or replace table prod.silver.checkmarx_scans as

-- SELECT 
-- "id"::string as ID,
-- "createdAt"::datetime as Scan_Date,
-- "projectName"::string as Project,
-- "branch"::string as Branch,
-- "sourceOrigin"::string as Scan_Origin,
-- "sourceType"::string as Source,
-- "initiator"::string as Initiator,
-- null as Scan_Type,
-- -- parse_json("VALUE"):"loc"::integer as LOC,
-- -- -- "statusDetails",
-- -- "status" as Status,


-- -- "updatedAt" as statusDetails_updatedAt,
-- -- "VALUE",
-- -- parse_json("VALUE"):"details" as value_details,
-- -- parse_json("VALUE"):"endDate" as value_endDate,
-- -- parse_json("VALUE"):"loc"::integer as value_loc,
-- -- parse_json("VALUE"):"name" as value_name,
-- -- parse_json("VALUE"):"startDate" as value_startDate,
-- -- parse_json("VALUE"):"status" as value_status,
-- -- "projectId",

-- -- "userAgent",

-- -- "metadata",
-- -- "engines",
-- -- "SEQ",
-- -- "KEY",
-- -- "PATH",
-- -- "INDEX",

-- --   sd.value:"details"::string AS details,
-- --     sd.value:"endDate"::date AS endDate,
-- --       sd.value:"name"::string AS name,
-- --         sd.value:"startDate"::date AS startDate,
-- --           sd.value:"status"::string AS status
-- FROM prod.raw.checkmarx_scans
-- -- join LATERAL FLATTEN(input => parse_json("statusDetails")) sd

In [None]:
details = api.get_scans_metadata()
details

In [None]:
python_scans_metadata_table = 'SCANS_METADATA'

session.write_pandas(details,
                         table_name=python_scans_metadata_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
create or replace table prod.raw.checkmarx_scans_metadata as 
select * from SCANS_METADATA

In [None]:
create or replace table prod.silver.checkmarx_scans as

SELECT 
    "id"::string as ID,
    "createdAt"::datetime as Scan_Date,
    replace("projectName"::string,'ADO_','') as Project,
    "branch"::string as Branch,
    "sourceOrigin"::string as Scan_Origin,
    "sourceType"::string as Source,
    "initiator"::string as Initiator,
    case when 
        csm."isIncremental" = True 
        then 'Incremental Scan'
        else 'Full Scan' 
    end as Scan_Type,
    csm."loc" as LOC,
    
      CASE 
    WHEN CHARINDEX('_', "projectName"::string) > 0 
    THEN SUBSTRING("projectName"::string, LEN("projectName"::string) - CHARINDEX('_', REVERSE("projectName"::string)) + 2, LEN("projectName"::string))
    ELSE "projectName"::string
  END  as PROJ_REPO_KEY
FROM prod.raw.checkmarx_scans cs
-- JOIN LATERAL FLATTEN(input => parse_json(cs."statusDetails")) sd
LEFT JOIN prod.raw.checkmarx_scans_metadata csm
    ON cs."id" = csm."scanId";

select top 5 * from prod.silver.checkmarx_scans


In [None]:
get_applications = api.get_applications()
get_applications

In [None]:
python_applications_table = 'APPLICATIONS_FROM_PYTHON'

session.write_pandas(get_applications,
                         table_name=python_applications_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
create or replace table prod.raw.checkmarx_applications as 
select * from APPLICATIONS_FROM_PYTHON