# API Documentation

https://sonarcloud.io/web_api/api/metrics?deprecated=false


In [None]:
ALTER SESSION SET TIMEZONE = 'Europe/London';

The production database is "prod". Data pulled directly from the API is being stored in the "raw" schema.
We are keeping commented a "CREATE OR REPLACE" query so that, if any modifications are needed, the script is very easy to update.

In [None]:
try:
    from snowflake.snowpark.context import get_active_session
    import requests
    import pandas as pd
    import datetime 
    import time
    session = get_active_session()
    sonarcloud_api_key = session.sql("SELECT PROD.RAW.sonarcloud_api_key()").collect()[0][0]
    sonarcloud_api_key
except:
    from dotenv import load_dotenv
    load_dotenv()
    tenant_id = os.getenv('tenant_id')
    client_id = os.getenv('client_id')
    client_secret = os.getenv('client_secret')
    subscription_id = os.getenv('subscription_id')
    billing_account_id = os.getenv('billing_account_id')



In [None]:
class SonarcloudApi:
    
    def __init__(self):
        self.sonarcloud_api_key = sonarcloud_api_key
        self.headers = {"Authorization": f"Bearer {self.sonarcloud_api_key}"}
        self.organization = 'phlexglobal'
        self.page_size = 500
        self.page_number = 1
        self.months = range(1,13)
        self.current_year = datetime.datetime.now().year
        
    def get_sonarcloud_issues(self):

        all_issues = []  # list to store all issues
        year_tofind = 2017
        # Main outer loop - iterate through years starting from 2017 up to current year
        while year_tofind <= self.current_year:
            # Inner loop - iterate through all months (1-12) for each year
            for month in self.months:
                # Create API query parameters for the specific month and year
                params = {
                    "organization": self.organization,  # Organization identifier in SonarCloud
                    "ps": self.page_size,  # Page size - number of sonarcloud_api_key per page (maximum 500)
                    "p": self.page_number ,     # Initial page number
                    "createdBefore": f"{year_tofind}-{month+1:02d}-01",  # End date: first day of next month
                    "createdAfter": f"{year_tofind}-{month:02d}-01",     # Start date: first day of current month
                }
    
                # Define the SonarCloud API endpoint for issue search
                url_issues = "https://sonarcloud.io/api/issues/search"
                # Execute API request with defined parameters
                response_issues = requests.get(url_issues, headers=self.headers, params=params)
                # Parse JSON response
                issues_data = response_issues.json()
    
                # Extract total number of sonarcloud_api_key from the response
                # Instead of getting errors we will get {} and 0 for total if there is no data
                total_sonarcloud_api_key = issues_data.get('paging', {}).get('total', 0)
    
                # Skip to next month if no sonarcloud_api_key found for current month
                if total_sonarcloud_api_key == 0:
                    continue
    
                # Calculate total number of pages based on total sonarcloud_api_key and page size
                # If there's a remainder, add one more page
                total_pages = (total_sonarcloud_api_key // params['ps']) + (1 if total_sonarcloud_api_key % params['ps'] > 0 else 0)
    
                # Add reponse to the list
                all_issues.extend(issues_data.get('issues', []))
    
                # Pagination loop - fetch all remaining pages of sonarcloud_api_key
                while params["p"] < total_pages:
                    params["p"] += 1  # Increment page number
                    # Execute API request for the next page
                    response_issues = requests.get(url_issues, headers=self.headers, params=params)
                    # Parse JSON response
                    issues_data = response_issues.json()
                    
                    # Add issues from the current page to our collection
                    all_issues.extend(issues_data.get('issues', []))
    
            # After processing all months for current year, move to next year
            year_tofind += 1
    
        # Convert data to DataFrame
        df_issues = pd.DataFrame(all_issues)
    
        # Rearranging columns because inserting in SQL requires given order to load up data properly
        new_order = ['key', 'rule', 'severity', 'component', 'project', 'line', 'hash', 'textRange', 'flows', 
                    'status', 'message', 'effort', 'debt', 'assignee', 'author', 'tags', 
                    'creationDate', 'updateDate', 'type', 'organization', 'cleanCodeAttribute', 
                    'cleanCodeAttributeCategory', 'impacts', 'issueStatus', 'externalRuleEngine', 
                    'resolution', 'closeDate']
        df_issues = df_issues[new_order]
    
        # Chaning naming to uppercase
        df_issues.columns = ['KEY', 'RULE', 'SEVERITY', 'COMPONENT', 'PROJECT', 'LINE', 'HASH', 'TEXTRANGE', 
                            'FLOWS', 'STATUS', 'MESSAGE', 'EFFORT', 'DEBT', 'ASSIGNEE', 
                            'AUTHOR', 'TAGS', 'CREATIONDATE', 'UPDATEDATE', 'TYPE', 'ORGANIZATION', 
                            'CLEANCODEATTRIBUTE', 'CLEANCODEATTRIBUTECATEGORY', 'IMPACTS', 
                            'ISSUESTATUS', 'EXTERNALRULEENGINE', 'RESOLUTION', 'CLOSEDATE']
        return df_issues

    def get_sonarcloud_components(self):
        
        params = {
                "organization": self.organization,  # Organization identifier in SonarCloud
                "ps": self.page_size,  # Page size - number of sonarcloud_api_key per page (maximum 500)
                "p": self.page_number    
        }
    
        all_components = []  # list to store all components
    
        total_sonarcloud_api_key = 0 
    
        # Execute initial query to get the total number of sonarcloud_api_key
        url_components = "https://sonarcloud.io/api/components/search"
        response_components = requests.get(url_components, headers=self.headers, params=params)
        components_data = response_components.json()
    
        # Get total number of sonarcloud_api_key
        total_sonarcloud_api_key = components_data['paging']['total']
        total_pages = (total_sonarcloud_api_key // params['ps']) + (1 if total_sonarcloud_api_key % params['ps'] > 0 else 0)
    
        # Add components to the general list
        all_components.extend(components_data['components'])
    
        # Loop to collect data from each page, if there is more than one page
        while params["p"] < total_pages:
            params["p"] += 1  # Go to the next page
            response_components = requests.get(url_components, headers=self.headers, params=params)
            components_data = response_components.json()
            
            # Add components from the current page
            all_components.extend(components_data['components'])
    
        # Convert data to DataFrame
        df_components = pd.DataFrame(all_components)
    
        # Rearranging columns because inserting in SQL requires given order to load up data properly
        new_order = ['organization', 'key', 'name', 'qualifier', 'project']
        df_components = df_components[new_order]
    
        # Chaning naming to uppercase
        df_components.columns = ['ORGANIZATION', 'KEY', 'NAME', 'QUALIFIER', 'PROJECT']
        return df_components
        
    def get_unique_sonarcloud_components(self):
        unique = self.get_sonarcloud_components()['KEY'].unique().tolist()        
        return unique
        
    def get_sonarcloud_measures_component(self):
        metrics_keys = ['accepted_issues', 'files', 'ncloc', 'maintainability_issues', 'reliability_issues', 'security_hotspots', 'security_issues', 'line_coverage', 
      'duplicated_lines', 'duplicated_lines_density']
   
        # Loop through each component
        for comp_id in self.get_unique_sonarcloud_components():
                # Loop through each metric key
                for j in metrics_keys:
                    url_labels = f"https://sonarcloud.io/api/measures/component"
                    # Make API request to get component measures
                    response_labels = requests.get(url_labels, headers=self.headers, 
                    params = {
                        "organization": self.organization,
                        "metricKeys" : j,
                        # "metricKeys" : 'accepted_issues',
                        'component' : comp_id
                    }
                    )
                    # Parse response to JSON
                    labels_data = response_labels.json()
                    
                    # Create data structure for the component and its measures
                    data = {
                    'id': labels_data['component']['id'],
                    'key': labels_data['component']['key'],
                    'name': labels_data['component']['name'],
                    'qualifier': labels_data['component']['qualifier'],
                    'measures': labels_data['component']['measures']
                    }
    
    
                    # Add components to the general list
                    all_measures_component.append(data)
                    # Add delay to avoid API rate limiting
                    time.sleep(0.1)
                    # print(all_measures_component)
                # print(comp_id)
    
        # Convert data to DataFrame
        df_measures_component = pd.DataFrame(all_measures_component)
    
    
        new_order = ['id', 'key', 'name', 'qualifier', 'measures']
        df_measures_component = df_measures_component[new_order]
        df_measures_component.columns = ['ID', 'KEY', 'NAME', 'QUALIFIER', 'MEASURES']
        return df_measures_component

    def get_sonarcloud_metrics(self):

        params = {
            "organization": self.organization,
            "ps": self.page_size,  # page size
            "p": self.page_number      # page number
        }
    
    
        all_metrics = []  # List to store all components
        total_sonarcloud_api_key = 0  # Total number of sonarcloud_api_key
    
        # Execute initial query to get the total number of sonarcloud_api_key
        url_labels = "https://sonarcloud.io/api/metrics/search"
        response_labels = requests.get(url_labels, headers=self.headers, params=params)
        labels_data = response_labels.json()['metrics']
    
        df_metrics = pd.DataFrame(labels_data)
    
        new_order = ['id', 'key', 'name', 'type', 'domain', 'direction', 'description', 'qualitative', 'hidden', 'decimalScale']
        df_metrics = df_metrics[new_order]
    
        df_metrics.columns = ['ID', 'KEY', 'NAME', 'TYPE', 'DOMAIN', 'DIRECTION', 'DESCRIPTION', 'QUALITATIVE', 'HIDDEN', 'DECIMALSCALE']
    
        return df_metrics

    def get_sonarcloud_projects(self):    # ---- # ---- Projects

        params = {
            "organization": self.organization,
            "ps": self.page_size,  # page size
            "p": self.page_number      # page number
        }

        all_projects = []  # List to store all components
        total_sonarcloud_api_key = 0  # Total number of sonarcloud_api_key
    
        # Execute initial query to get the total number of sonarcloud_api_key
        url_labels = "https://sonarcloud.io/api/projects/search"
        response_labels = requests.get(url_labels, headers=self.headers, params=params)
        labels_data = response_labels.json()
    
        # Get total number of sonarcloud_api_key
        total_sonarcloud_api_key = labels_data['paging']['total']
        total_pages = (total_sonarcloud_api_key // params['ps']) + (1 if total_sonarcloud_api_key % params['ps'] > 0 else 0)
    
        # Add components to the general list
        all_projects.extend(labels_data['components'])
    
        # Loop to collect data from each page, if there is more than one page
        while params["p"] < total_pages:
            params["p"] += 1  # Go to the next page
            response_labels = requests.get(url_labels, headers=self.headers, params=params)
            labels_data = response_labels.json()
            
            # Add components from the current page
            all_projects.extend(labels_data['components'])
    
    
    
        df_projects = pd.DataFrame(all_projects)
    
        new_order = ['organization', 'key', 'name', 'qualifier', 'visibility', 'lastAnalysisDate', 'revision']
        df_projects = df_projects[new_order]
    
    
        df_projects.columns = ['ORGANIZATION', 'KEY', 'NAME', 'QUALIFIER', 'VISIBILITY', 'LASTANALYSISDATE', 'REVISION']
        
        return df_projects

    def get_sonarcloud_measures_component(self):

        metrics_keys = [
        'accepted_issues', 'files', 'ncloc', 'maintainability_issues', 'reliability_issues', 
          'security_hotspots', 'security_issues', 'line_coverage', 
          'duplicated_lines', 'duplicated_lines_density'
        ]

        
        # We need list of unique component for Measure Components endpoint
        component = self.get_sonarcloud_components()['KEY'].unique().tolist()
        # List to store all measures for components
        all_measures_component = []
    
        # Loop through each component
        for comp_id in component:
                # Loop through each metric key
                for j in metrics_keys:
                    url_labels = f"https://sonarcloud.io/api/measures/component"
                    # Make API request to get component measures
                    response_labels = requests.get(url_labels, headers=self.headers, 
                    params = {
                        "organization": "phlexglobal",
                        "metricKeys" : j,
                        # "metricKeys" : 'accepted_issues',
                        'component' : comp_id
                    }
                    )
                    # Parse response to JSON
                    labels_data = response_labels.json()
                    
                    # Create data structure for the component and its measures
                    data = {
                    'id': labels_data['component']['id'],
                    'key': labels_data['component']['key'],
                    'name': labels_data['component']['name'],
                    'qualifier': labels_data['component']['qualifier'],
                    'measures': labels_data['component']['measures']
                    }
    
    
                    # Add components to the general list
                    all_measures_component.append(data)
                    # Add delay to avoid API rate limiting
                    time.sleep(0.1)
                    # print(all_measures_component)
                # print(comp_id)
    
        # Convert data to DataFrame
        df_measures_component = pd.DataFrame(all_measures_component)
    
    
        new_order = ['id', 'key', 'name', 'qualifier', 'measures']
        df_measures_component = df_measures_component[new_order]
        df_measures_component.columns = ['ID', 'KEY', 'NAME', 'QUALIFIER', 'MEASURES']
        return df_measures_component
    
api = SonarcloudApi()


In [None]:
components = api.get_sonarcloud_components()
components

In [None]:
# Starting Snowflake session to read data from df and build SQL queries upon that
temp_components_table = 'TEMP_COMPONENTS'
session.write_pandas(components,
                         table_name=temp_components_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")

In [None]:
-- CREATE OR REPLACE TABLE prod.raw.sonarcloud_components (
--     ORGANIZATION VARCHAR(16777216),
--     KEY VARCHAR(16777216),
--     NAME VARCHAR(16777216),
--     QUALIFIER VARCHAR(16777216),
--     PROJECT VARCHAR(16777216),
--  timestamp timestamp_tz
-- );

-- TRUNCATE TABLE prod.raw.sonarcloud_components;

insert overwrite into prod.raw.sonarcloud_components
SELECT
    "ORGANIZATION",
    "KEY",
    "NAME",
    "QUALIFIER",
    "PROJECT",
    current_timestamp as timestamp
FROM TEMP_COMPONENTS;


In [None]:
issues = api.get_sonarcloud_issues()
issues

In [None]:
# Starting session for snowflake.
temp_issues_table = 'TEMP_ISSUES'
session.write_pandas(issues,
                         table_name=temp_issues_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary") # no need to create table for this data, temp table is ok

In [None]:
-- CREATE OR REPLACE TABLE prod.raw.sonarcloud_issues (
--     KEY VARCHAR(16777216),
--     RULE VARCHAR(16777216),
--     SEVERITY VARCHAR(16777216),
--     COMPONENT VARCHAR(16777216),
--     PROJECT VARCHAR(16777216),
--     LINE FLOAT,
--     HASH VARCHAR(16777216),
--     TEXTRANGE OBJECT,
--     FLOWS ARRAY,
--     STATUS VARCHAR(16777216),
--     MESSAGE VARCHAR(16777216),
--     EFFORT VARCHAR(16777216),
--     DEBT VARCHAR(16777216),
--     ASSIGNEE VARCHAR(16777216),
--     AUTHOR VARCHAR(16777216),
--     TAGS ARRAY,
--     CREATIONDATE VARCHAR(16777216),
--     UPDATEDATE VARCHAR(16777216),
--     TYPE VARCHAR(16777216),
--     ORGANIZATION VARCHAR(16777216),
--     CLEANCODEATTRIBUTE VARCHAR(16777216),
--     CLEANCODEATTRIBUTECATEGORY VARCHAR(16777216),
--     IMPACTS ARRAY,
--     ISSUESTATUS VARCHAR(16777216),
--     EXTERNALRULEENGINE VARCHAR(16777216),
--     RESOLUTION VARCHAR(16777216),
--     CLOSEDATE VARCHAR(16777216),
--  timestamp timestamp_tz
-- );

-- TRUNCATE TABLE prod.raw.sonarcloud_issues;

insert overwrite into prod.raw.sonarcloud_issues
SELECT
    "KEY",
    "RULE",
    "SEVERITY",
    "COMPONENT",
    "PROJECT",
    "LINE",
    "HASH",
    "TEXTRANGE",
    "FLOWS",
    "STATUS",
    "MESSAGE",
    "EFFORT",
    "DEBT",
    "ASSIGNEE",
    "AUTHOR",
    "TAGS",
    "CREATIONDATE",
    "UPDATEDATE",
    "TYPE",
    "ORGANIZATION",
    "CLEANCODEATTRIBUTE",
    "CLEANCODEATTRIBUTECATEGORY",
    "IMPACTS",
    "ISSUESTATUS",
    "EXTERNALRULEENGINE",
    "RESOLUTION",
    "CLOSEDATE",
    current_timestamp as timestamp
FROM TEMP_ISSUES;

In [None]:
metrics = api.get_sonarcloud_metrics()
metrics

In [None]:
temp_measures_metrics_table = 'TEMP_METRICS_COMPONENT'
session.write_pandas(metrics,
                         table_name=temp_measures_metrics_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")


In [None]:
-- CREATE OR REPLACE TABLE prod.raw.sonarcloud_metrics (
--     ID VARCHAR(16777216),
--     KEY VARCHAR(16777216),
--     NAME VARCHAR(16777216),
--     TYPE VARCHAR(16777216),
--     DOMAIN VARCHAR(16777216),
--     HIDDEN BOOLEAN,
--     DIRECTION FLOAT,
--     DESCRIPTION VARCHAR(16777216),
--     QUALITATIVE BOOLEAN,
--     DECIMALSCALE FLOAT,
--  timestamp timestamp_tz
-- );

-- TRUNCATE TABLE prod.raw.sonarcloud_metrics;

insert overwrite into prod.raw.sonarcloud_metrics
SELECT
    "ID",
    "KEY",
    "NAME",
    "TYPE",
    "DOMAIN",
    "HIDDEN",
    "DIRECTION",
    "DESCRIPTION",
    "QUALITATIVE",
    "DECIMALSCALE",
    current_timestamp as timestamp
FROM TEMP_METRICS_COMPONENT;

In [None]:
projects = api.get_sonarcloud_projects()
projects

In [None]:
temp_measures_projects_table = 'TEMP_PROJECTS'
session.write_pandas(projects,
                         table_name=temp_measures_projects_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")


In [None]:
-- CREATE OR REPLACE TABLE prod.raw.sonarcloud_projects (
--     ORGANIZATION VARCHAR(16777216),
--     KEY VARCHAR(16777216),
--     NAME VARCHAR(16777216),
--     REVISION VARCHAR(16777216),
--     QUALIFIER VARCHAR(16777216),
--     VISIBILITY VARCHAR(16777216),
--     LASTANALYSISDATE VARCHAR(16777216),
--  timestamp timestamp_tz
-- );

-- TRUNCATE TABLE prod.raw.sonarcloud_projects;

insert overwrite into prod.raw.sonarcloud_projects
SELECT
    "ORGANIZATION",
    "KEY",
    "NAME",
    "REVISION",
    "QUALIFIER",
    "VISIBILITY",
    "LASTANALYSISDATE",
    current_timestamp as timestamp
FROM TEMP_PROJECTS;

In [None]:
measures_component = api.get_sonarcloud_measures_component()
measures_component

In [None]:
temp_measures_components_table = 'TEMP_MEASURES_COMPONENTS'

session.write_pandas(measures_component,
                         table_name=temp_measures_components_table,
                         auto_create_table=True,
                         overwrite=True,
                         table_type="temporary")



In [None]:
-- CREATE OR REPLACE TABLE prod.raw.sonarcloud_measures_component (
--     ID VARCHAR(16777216),
--     KEY VARCHAR(16777216),
--     NAME VARCHAR(16777216),
--     MEASURES ARRAY,
--     QUALIFIER VARCHAR(16777216),
--  timestamp timestamp_tz
-- );

-- TRUNCATE TABLE prod.raw.sonarcloud_measures_component;

insert overwrite into prod.raw.sonarcloud_measures_component
SELECT
    "ID",
    "KEY",
    "NAME",
    "MEASURES",
    "QUALIFIER",
    current_timestamp as timestamp
FROM TEMP_MEASURES_COMPONENTS;