In [1]:
import os
import requests
import pandas as pd

# from .manager import validate_project_id, get_dataframe_query
# from . import connection_manager


In [2]:
def get_dataframe(ds_name, project_id="172f5711-3ff4-4640-a68e-858cae300d01", row_count=-1, strategy="top", user_id="1001",
                  filter_condition=None):
    """
    Param:
        ds_name,
        project_id=os.getenv("project_id"),
        row_count=-1,
        strategy="top"
        user_id="1001"
    To get pandas dataframe.
    Need to install mosaic-connector-python for reading dataframe using connector backend,
    git+https://gitlab+deploy-token-14:myUpFE_XRxShG53Hs6tV@git.lti-aiq.in/mosaic-decisions-2-0/mosaic-connector-python.git@1.0.29.3
    """
    try:
        connection_manager="http://fdc-project-manager:80/project-manager"
#         project_id = validate_project_id(project_id)
        url = f"{connection_manager}/connections/api/External/v2/external/getConnConfig/" \
              f"{ds_name}/fdcuser/{project_id}"    # user id hard coded, as it's not being used in API code.
        print(url)
        connection_details = requests.get(url, verify=False).json()

        if connection_details["params"]["READER"]["type"] == "RDBMS":
            if connection_details["params"]["READER"]["sub_type"] == "SNOWFLAKE":
                data_frame = get_snowflake_df(connection_details, row_count, strategy, filter_condition)
            elif connection_details["params"]["READER"]["sub_type"] == "MYSQL":
                data_frame = get_mysql_df(connection_details, row_count, strategy, filter_condition)
            elif connection_details["params"]["READER"]["sub_type"] == "HIVE":
                data_frame = get_hive_df(connection_details, project_id, row_count, strategy, user_id, filter_condition)
            elif connection_details["params"]["READER"]["sub_type"] == "SQLSERVER":
                data_frame = get_sqlserver_df(connection_details, row_count, strategy, filter_condition)
            elif connection_details["params"]["READER"]["sub_type"] == "POSTGRES":
                data_frame = get_postgres_df(connection_details, row_count, strategy, filter_condition)
            else:
                print("Reading dataframe using connector backend")
                # Need to install, git+https://gitlab+deploy-token-14:myUpFE_XRxShG53Hs6tV@git.lti-aiq.in/mosaic-decisions-2-0/mosaic-connector-python.git
                from connector.mosaicio import MosaicioConnector
                connector = MosaicioConnector()
                data_frame = connector.getPandasDataFrame(
                    param=connection_details["params"],
                    row_count=row_count,
                    strategy=strategy
                )
        elif connection_details["params"]["READER"]["type"] == "FILE":
            if connection_details["params"]["READER_STORAGE"]["type"] == "AMAZONS3":
                data_frame = get_s3_df(connection_details, row_count, strategy)
            elif connection_details["params"]["READER_STORAGE"]["type"] == "AZURE":
                data_frame = get_azureblob_df(connection_details, row_count, strategy)
            elif connection_details["params"]["READER_STORAGE"]["type"] == "SFTP":
                data_frame = get_sftp_df(connection_details, row_count, strategy)
        return data_frame
    except Exception as ex:
        print(f'project_id: {project_id}')
        print(f"Connection details fetched: {connection_details}")
        print(f"Exception occurred in get_dataframe: {ex}")


In [3]:
def get_snowflake_df(connection_details, row_count, strategy, filter_condition):
    """
    Param:
        connection_details: connection details dict
        row_count: number of rows to be fetched
        strategy: top/bottom
    To read data frame form snowflake connection
    """
    print("Reading dataframe from snowflake native connector")

    import snowflake.connector
    
    #list of aws region 
    ignore_cloudplatform = ["us-east-1","eu-west-1","eu-central-1","ap-southeast-1","ap-southeast-2"]
    # Connect to Snowflake
    con = snowflake.connector.connect(
        user=connection_details["params"]["READER"]["user"],
        password=connection_details["params"]["READER"]["password"],
        account=connection_details["params"]["READER"]["accountId"],
        database=connection_details["params"]["READER"]["database"],
        role=connection_details["params"]["READER"]["role"],
        cloudPlatform=connection_details["params"]["READER"]["cloudPlatform"],
        schema=connection_details["params"]["READER"]["schema"],
        wareHouse=connection_details["params"]["READER"]["wareHouse"],
        region=connection_details["params"]["READER"]["region"] if connection_details["params"]["READER"]["cloudPlatform"] == "aws" and connection_details["params"]["READER"]["region"] in ignore_cloudplatform \
            else connection_details["params"]["READER"]["region"]+"."+connection_details["params"]["READER"]["cloudPlatform"]
    )
#     connection_details["params"]["READER"]["region"] + ".gcp" if connection_details["params"]["READER"]["cloudPlatform"] == "gcp" else connection_details["params"]["READER"]["region"]
    # Create cursor
    cur = con.cursor()
    query = get_dataframe_query(connection_details['params']['READER']['tables'],
                                row_count, filter_condition, double_quotes=True)  # Get query to fetch details

    cur.execute(f"use warehouse {connection_details['params']['READER']['wareHouse']};")  # Setting up warehouse, it is needed in new snowflake gcp connections.

    cur.execute(query)  # Execute query

    # Read results into a pandas DataFrame
    data_frame = cur.fetch_pandas_all()

    # Close cursor and connection
    cur.close()
    con.close()
    return data_frame


In [4]:
def get_dataframe_query(table_name, row_count, filter_condition, top=None, double_quotes=None):
    """
    To get the query to fetch dataframe
    :param table_name:
    :param row_count:
    :param filter_condition:
    :param top:
    :param double_quotes:
    :return: query
    """
    if double_quotes:
        query = f'SELECT * FROM "{table_name}"'
    else:
        query = f"SELECT * FROM {table_name}"
    if top and int(row_count) > 0:
        print(f"fetching {row_count} records!")
        query = f"SELECT TOP {row_count} * FROM {table_name}"
    if filter_condition:
        query = query + " " + filter_condition
    if not top and int(row_count) > 0:
        print(f"fetching {row_count} records!")
        query = f"{query} LIMIT {row_count}"
    return query


In [5]:
df = get_dataframe("AIRLINE_DEP_DELAY_10K")

http://fdc-project-manager:80/project-manager/connections/api/External/v2/external/getConnConfig/AIRLINE_DEP_DELAY_10K/fdcuser/172f5711-3ff4-4640-a68e-858cae300d01
Reading dataframe from snowflake native connector


In [6]:
df

Unnamed: 0,MONTH,DAY_OF_WEEK,DEP_DEL15,DEP_TIME_BLK,DISTANCE_GROUP,SEGMENT_NUMBER,CONCURRENT_FLIGHTS,NUMBER_OF_SEATS,CARRIER_NAME,AIRPORT_FLIGHTS_MONTH,...,PREVIOUS_AIRPORT,PRCP,SNOW,SNWD,TMAX,AWND,CARRIER_HISTORICAL,DEP_AIRPORT_HIST,DAY_HISTORICAL,DEP_BLOCK_HIST
0,4,6,0,1000-1059,3,3,5,110,Delta Air Lines Inc.,6837,...,Atlanta Municipal,0.0,0.0,0.0,64.0,13.87,0.139558,0.180307,0.177124,0.147773
1,3,2,0,1000-1059,2,2,31,50,Endeavor Air Inc.,14450,...,Bangor International,0.0,0.0,0.0,48.0,12.08,0.155571,0.187883,0.132868,0.156045
2,2,5,0,1000-1059,8,2,37,173,United Air Lines Inc.,16530,...,Los Angeles International,0.0,0.0,0.0,54.0,10.07,0.189496,0.240316,0.236965,0.187484
3,4,6,0,0900-0959,11,2,37,181,Alaska Airlines Inc.,17522,...,Portland International,0.0,0.0,0.0,64.0,7.38,0.132326,0.112840,0.177124,0.135374
4,8,7,0,0900-0959,1,2,27,143,Southwest Airlines Co.,14220,...,Spokane International,0.0,0.0,0.0,104.0,4.92,0.192550,0.123487,0.196519,0.138067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,5,5,0,1800-1859,3,3,71,157,Delta Air Lines Inc.,25360,...,Atlanta Municipal,0.0,0.0,0.0,89.0,16.78,0.141341,0.172771,0.206977,0.309452
9996,8,3,0,2200-2259,1,5,12,110,Delta Air Lines Inc.,14049,...,San Francisco International,0.0,0.0,0.0,89.0,7.16,0.168586,0.238865,0.221672,0.283996
9997,2,5,0,1300-1359,5,3,19,181,Alaska Airlines Inc.,9017,...,Phoenix Sky Harbor International,0.0,0.0,0.0,47.0,6.93,0.233099,0.230231,0.236965,0.246487
9998,1,3,0,0700-0759,2,2,24,76,SkyWest Airlines Inc.,11784,...,Rochester Monroe County,0.0,0.0,5.1,1.0,19.46,0.208051,0.187883,0.180607,0.088475
