In [1]:
# 1. Najpierw zainstaluj wymagane biblioteki
# pip install delta-sharing pandas

import delta_sharing
import pandas as pd
import json

# 2. Utwórz plik konfiguracyjny lub użyj danych bezpośrednio
# Opcja A: Zapisz dane do pliku JSON (zalecane)
config_data = {
    "shareCredentialsVersion": 1,
    "bearerToken": "tUnaIS-csD1HRRzwEqqgrWBEIJdeJFMTivNN8ZF2Aw7wLgnzdZVTK2qwkyDsc0Lc",
    "endpoint": "https://northeurope-c2.azuredatabricks.net/api/2.0/delta-sharing/metastores/4c67fa35-b334-4439-9527-d140d0ffdbe1",
    "expirationTime": "9999-12-30T23:59:59.433Z"
}

# Zapisz do pliku
with open('delta_config.json', 'w') as f:
    json.dump(config_data, f)   

# 3. Połącz się z Delta Share
# Opcja A: Używając pliku konfiguracyjnego
client = delta_sharing.SharingClient('delta_config.json')

# Opcja B: Używając danych bezpośrednio (alternatywnie)
# client = delta_sharing.SharingClient(config_data)

# 4. Sprawdź dostępne share'y
try:
    shares = client.list_shares()
    print("Dostępne shares:")
    for share in shares:
        print(f"- {share.name}")
except Exception as e:
    print(f"Błąd podczas pobierania shares: {e}")

# 5. Sprawdź schematy w konkretnym share (zastąp 'share_name' rzeczywistą nazwą)
try:
    # Najpierw pobierz nazwę pierwszego share
    if shares:
        share_name = shares[0].name
        schemas = client.list_schemas(delta_sharing.Share(share_name))
        print(f"\nSchematy w share '{share_name}':")
        for schema in schemas:
            print(f"- {schema.name}")
            
            # 6. Sprawdź tabele w schemacie
            tables = client.list_tables(schema)
            print(f"  Tabele w schemacie '{schema.name}':")
            for table in tables:
                print(f"    - {table.name}")
                
                # 7. Pobierz dane z tabeli (przykład)
                # Uwaga: to może być duża tabela, więc używaj limit
                try:
                    # Pobierz próbkę danych
                    df = delta_sharing.load_as_pandas(
                        f"{client.profile_file}#{share_name}.{schema.name}.{table.name}",
                        limit=10  # Pobierz tylko 10 wierszy jako przykład
                    )
                    print(f"    Próbka danych z tabeli {table.name}:")
                    print(df.head())
                    print(f"    Wymiary tabeli (próbka): {df.shape}")
                    print()
                except Exception as e:
                    print(f"    Błąd podczas pobierania danych z {table.name}: {e}")
                    
except Exception as e:
    print(f"Błąd: {e}")

# 8. Przykład bardziej zaawansowanego użycia - POPRAWIONA WERSJA
def load_table_data(config_file, share_name, schema_name, table_name, limit=None):
    """
    Funkcja do ładowania danych z konkretnej tabeli
    """
    try:
        table_path = f"{config_file}#{share_name}.{schema_name}.{table_name}"
        
        if limit:
            df = delta_sharing.load_as_pandas(table_path, limit=limit)
        else:
            df = delta_sharing.load_as_pandas(table_path)
        
        return df
    except Exception as e:
        print(f"Błąd podczas ładowania tabeli: {e}")
        return None

# 9. Przykład z filtrowaniem (jeśli obsługiwane) - POPRAWIONA WERSJA
def load_table_with_filter(config_file, share_name, schema_name, table_name, filter_condition=None):
    """
    Funkcja do ładowania danych z filtrowaniem
    """
    try:
        table_path = f"{config_file}#{share_name}.{schema_name}.{table_name}"
        
        # Niektóre implementacje mogą obsługiwać predicate pushdown
        df = delta_sharing.load_as_pandas(table_path)
        
        # Filtrowanie po stronie klienta (jeśli predicate pushdown nie jest dostępny)
        if filter_condition:
            df = df.query(filter_condition)
        
        return df
    except Exception as e:
        print(f"Błąd podczas ładowania z filtrem: {e}")
        return None

# 10. Konkretny przykład dla Twoich danych
print("\n=== KONKRETNE PRZYKŁADY DLA TWOICH DANYCH ===")

# Załaduj dane z tabeli quality_review2
print("Ładowanie danych z quality_review2...")
df_review2 = load_table_data('delta_config.json', 'icon_export', 'icon_export', 'quality_review2', limit=10)
if df_review2 is not None:
    print(f"Załadowano {len(df_review2)} wierszy z quality_review2")
    print(df_review2.head())
    print(f"Kolumny: {list(df_review2.columns)}")
    print()

# Załaduj dane z tabeli quality_review
print("Ładowanie danych z quality_review...")
df_review = load_table_data('delta_config.json', 'icon_export', 'icon_export', 'quality_review', limit=10)
if df_review is not None:
    print(f"Załadowano {len(df_review)} wierszy z quality_review")
    print(df_review.head())
    print(f"Kolumny: {list(df_review.columns)}")
    print()

# Alternatywna metoda - bezpośrednie użycie ścieżki
print("=== ALTERNATYWNA METODA - BEZPOŚREDNIE ŁADOWANIE ===")
try:
    # Ładowanie quality_review2
    df_alt1 = delta_sharing.load_as_pandas(
        "delta_config.json#icon_export.icon_export.quality_review2", 
        limit=5
    )
    print("quality_review2 (alternatywna metoda):")
    print(df_alt1.head())
    print(f"Wymiary: {df_alt1.shape}")
    print()
    
    # Ładowanie quality_review
    df_alt2 = delta_sharing.load_as_pandas(
        "delta_config.json#icon_export.icon_export.quality_review", 
        limit=5
    )
    print("quality_review (alternatywna metoda):")
    print(df_alt2.head())
    print(f"Wymiary: {df_alt2.shape}")
    
except Exception as e:
    print(f"Błąd w alternatywnej metodzie: {e}")

print("\nSkrypt gotowy do użycia!")
print("Pamiętaj o zastąpieniu nazw share/schema/table rzeczywistymi wartościami.")


# Szybki sposób na załadowanie danych:
df1 = delta_sharing.load_as_pandas(
    "delta_config.json#icon_export.icon_export.quality_review", 
    limit=100
)

df2 = delta_sharing.load_as_pandas(
    "delta_config.json#icon_export.icon_export.quality_review2", 
    limit=100
)

df2

Dostępne shares:
- icon_export

Schematy w share 'icon_export':
- icon_export
  Tabele w schemacie 'icon_export':
    - quality_review2
    Błąd podczas pobierania danych z quality_review2: 'SharingClient' object has no attribute 'profile_file'
    - quality_review
    Błąd podczas pobierania danych z quality_review: 'SharingClient' object has no attribute 'profile_file'

=== KONKRETNE PRZYKŁADY DLA TWOICH DANYCH ===
Ładowanie danych z quality_review2...
Załadowano 10 wierszy z quality_review2
               Sponsor               Product        Protocol FileType  \
0  Harmony Biosciences  HBS-101 (Pitolisant)  HBS-101-CL-002     SITE   
1  Harmony Biosciences  HBS-101 (Pitolisant)  HBS-101-CL-002    TRIAL   
2  Harmony Biosciences  HBS-101 (Pitolisant)  HBS-101-CL-002    TRIAL   
3  Harmony Biosciences  HBS-101 (Pitolisant)  HBS-101-CL-002    TRIAL   
4  Harmony Biosciences  HBS-101 (Pitolisant)  HBS-101-CL-002    TRIAL   

         Country                          Site Cycle PackageTy

Unnamed: 0,Sponsor,Product,Protocol,FileType,Country,Site,Cycle,PackageType,PackageName,PackageOwner,...,LastAssignmentCycleCompleteDate,NextCycleScheduledDate,QualityReviewStatus,Assignee,DaysOpen,PercentageComplete,NoDocuments,Remaining,PackageId,AssignmentId
0,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,SITE,UNITED STATES,"002001:Bhattacharjee, Rakesh",1,Adhoc,"Adhoc- Site- 002001:Bhattacharjee, Rakesh",Jimena.Oblea@iconplc.com,...,2021-05-13 20:29:17.875137+00:00,NaT,QC Complete,,0,100,1,0,3102,11565
1,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,1,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2021-11-23 15:34:04.028004+00:00,2022-05-17 16:23:29.077867+00:00,QC Complete,Sandrine.Cayez@iconplc.com,6,100,11,0,5840,52093
2,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,2,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2022-12-05 10:03:11.686305+00:00,2023-05-22 17:14:09.688516+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,13,100,91,0,5840,215266
3,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,3,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2023-12-28 07:42:19.546545+00:00,2024-04-30 18:26:59.653778+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,58,100,189,0,5840,365028
4,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,4,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-02-28 10:59:15.483197+00:00,2024-08-08 21:51:43.412082+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,20,100,3,0,5840,409741
5,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,5,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-04-05 07:35:10.940508+00:00,2024-09-22 14:17:33.857816+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,14,100,3,0,5840,432518
6,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,6,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-04-10 00:25:33.191926+00:00,2024-10-09 16:51:59.364046+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,1,100,1,0,5840,442278
7,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,7,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-10-22 05:54:09.030023+00:00,2025-04-09 19:21:23.954117+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,13,100,76,0,5840,539358
8,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,COUNTRY,UNITED STATES,,1,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2021-11-17 16:56:54.230410+00:00,2022-05-17 16:55:50.668022+00:00,Quality Review,Sandrine.Cayez@iconplc.com,0,100,0,0,5840,52279
9,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,COUNTRY,UNITED STATES,,2,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2022-11-22 17:13:53.887567+00:00,2023-05-22 17:13:53.823564+00:00,Quality Review,,0,100,0,0,5840,215265


In [2]:
# Szybki sposób na załadowanie danych:
df1 = delta_sharing.load_as_pandas(
    "delta_config.json#icon_export.icon_export.quality_review", 
    limit=100
)

df2 = delta_sharing.load_as_pandas(
    "delta_config.json#icon_export.icon_export.quality_review2", 
    limit=100
)

In [3]:
df2

Unnamed: 0,Sponsor,Product,Protocol,FileType,Country,Site,Cycle,PackageType,PackageName,PackageOwner,...,LastAssignmentCycleCompleteDate,NextCycleScheduledDate,QualityReviewStatus,Assignee,DaysOpen,PercentageComplete,NoDocuments,Remaining,PackageId,AssignmentId
0,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,SITE,UNITED STATES,"002001:Bhattacharjee, Rakesh",1,Adhoc,"Adhoc- Site- 002001:Bhattacharjee, Rakesh",Jimena.Oblea@iconplc.com,...,2021-05-13 20:29:17.875137+00:00,NaT,QC Complete,,0,100,1,0,3102,11565
1,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,1,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2021-11-23 15:34:04.028004+00:00,2022-05-17 16:23:29.077867+00:00,QC Complete,Sandrine.Cayez@iconplc.com,6,100,11,0,5840,52093
2,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,2,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2022-12-05 10:03:11.686305+00:00,2023-05-22 17:14:09.688516+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,13,100,91,0,5840,215266
3,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,3,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2023-12-28 07:42:19.546545+00:00,2024-04-30 18:26:59.653778+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,58,100,189,0,5840,365028
4,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,4,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-02-28 10:59:15.483197+00:00,2024-08-08 21:51:43.412082+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,20,100,3,0,5840,409741
5,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,5,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-04-05 07:35:10.940508+00:00,2024-09-22 14:17:33.857816+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,14,100,3,0,5840,432518
6,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,6,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-04-10 00:25:33.191926+00:00,2024-10-09 16:51:59.364046+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,1,100,1,0,5840,442278
7,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,TRIAL,,,7,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2024-10-22 05:54:09.030023+00:00,2025-04-09 19:21:23.954117+00:00,QC Complete,Basawarajgouda.Patil@iconplc.com,13,100,76,0,5840,539358
8,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,COUNTRY,UNITED STATES,,1,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2021-11-17 16:56:54.230410+00:00,2022-05-17 16:55:50.668022+00:00,Quality Review,Sandrine.Cayez@iconplc.com,0,100,0,0,5840,52279
9,Harmony Biosciences,HBS-101 (Pitolisant),HBS-101-CL-002,COUNTRY,UNITED STATES,,2,BioStats,BioStats,Jimena.Oblea@iconplc.com,...,2022-11-22 17:13:53.887567+00:00,2023-05-22 17:13:53.823564+00:00,Quality Review,,0,100,0,0,5840,215265


In [None]:
# ALTERNATYWNE ROZWIĄZANIE: Implementacja Delta Sharing API bez biblioteki delta-sharing
# To można użyć w Snowflake UDF z external access

import requests
import json
import pandas as pd
from typing import Dict, List, Optional

class SnowflakeDeltaSharing:
    """
    Implementacja Delta Sharing API bez użycia biblioteki delta-sharing
    Dostosowana do użycia w Snowflake UDF
    """
    
    def __init__(self, endpoint: str, bearer_token: str, metastore_id: str):
        self.endpoint = endpoint
        self.bearer_token = bearer_token
        self.metastore_id = metastore_id
        self.base_url = f"{endpoint}/metastores/{metastore_id}"
        self.headers = {
            'Authorization': f'Bearer {bearer_token}',
            'Content-Type': 'application/json',
            'User-Agent': 'Snowflake-Delta-Sharing-Client/1.0'
        }
    
    def list_shares(self) -> Dict:
        """Lista dostępnych shares"""
        try:
            url = f"{self.base_url}/shares"
            response = requests.get(url, headers=self.headers, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": f"Failed to list shares: {str(e)}"}
    
    def list_schemas(self, share_name: str) -> Dict:
        """Lista schematów w share"""
        try:
            url = f"{self.base_url}/shares/{share_name}/schemas"
            response = requests.get(url, headers=self.headers, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": f"Failed to list schemas: {str(e)}"}
    
    def list_tables(self, share_name: str, schema_name: str) -> Dict:
        """Lista tabel w schemacie"""
        try:
            url = f"{self.base_url}/shares/{share_name}/schemas/{schema_name}/tables"
            response = requests.get(url, headers=self.headers, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": f"Failed to list tables: {str(e)}"}
    
    def get_table_metadata(self, share_name: str, schema_name: str, table_name: str) -> Dict:
        """Pobierz metadane tabeli"""
        try:
            url = f"{self.base_url}/shares/{share_name}/schemas/{schema_name}/tables/{table_name}/metadata"
            response = requests.get(url, headers=self.headers, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": f"Failed to get table metadata: {str(e)}"}
    
    def query_table(self, share_name: str, schema_name: str, table_name: str, 
                   limit: Optional[int] = None, predicates: Optional[List] = None) -> Dict:
        """Zapytanie do tabeli"""
        try:
            url = f"{self.base_url}/shares/{share_name}/schemas/{schema_name}/tables/{table_name}/query"
            
            payload = {
                "predicateHints": predicates or [],
                "limitHint": limit
            }
            
            # Usuń None values
            payload = {k: v for k, v in payload.items() if v is not None}
            
            response = requests.post(url, headers=self.headers, json=payload, timeout=60)
            response.raise_for_status()
            
            # Delta Sharing zwraca dane w różnych formatach
            # Tutaj obsługujemy podstawowe przypadki
            content_type = response.headers.get('content-type', '')
            
            if 'application/json' in content_type:
                return response.json()
            elif 'application/x-ndjson' in content_type:
                # Newline-delimited JSON
                lines = response.text.strip().split('\n')
                return [json.loads(line) for line in lines if line.strip()]
            else:
                # Prawdopodobnie format binarny (Parquet)
                return {
                    "data_format": content_type,
                    "data_size": len(response.content),
                    "data_preview": response.content[:100].hex()  # Hex preview
                }
                
        except Exception as e:
            return {"error": f"Failed to query table: {str(e)}"}
    
    def download_table_files(self, share_name: str, schema_name: str, table_name: str) -> Dict:
        """Pobierz informacje o plikach tabeli (dla zaawansowanego użycia)"""
        try:
            url = f"{self.base_url}/shares/{share_name}/schemas/{schema_name}/tables/{table_name}/files"
            response = requests.get(url, headers=self.headers, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": f"Failed to get table files: {str(e)}"}

# FUNKCJE POMOCNICZE DO UŻYCIA W SNOWFLAKE UDF

def create_delta_client_from_config(config_dict: Dict) -> SnowflakeDeltaSharing:
    """Utwórz klienta z konfiguracji"""
    return SnowflakeDeltaSharing(
        endpoint=config_dict['endpoint'],
        bearer_token=config_dict['bearerToken'],
        metastore_id=config_dict['endpoint'].split('/')[-1]  # Wyciągnij z URL
    )

def get_quality_review_data(limit: int = 100) -> Dict:
    """
    Funkcja specyficzna dla Twoich danych
    Ta funkcja może być bezpośrednio użyta w Snowflake UDF
    """
    config = {
        "endpoint": "https://northeurope-c2.azuredatabricks.net/api/2.0/delta-sharing/metastores/4c67fa35-b334-4339-9527-d140d0ffdbe1",
        "bearerToken": "tUnaIS-csD1HRRzwEqqgrWBEIJdeJFMTivNN8ZF2Aw7wLgnzdZVTK2qwkyDsc0Lc"
    }
    
    client = SnowflakeDeltaSharing(
        endpoint="https://northeurope-c2.azuredatabricks.net/api/2.0/delta-sharing",
        bearer_token=config["bearerToken"],
        metastore_id="4c67fa35-b334-4339-9527-d140d0ffdbe1"
    )
    
    # Pobierz dane z quality_review2
    result = client.query_table(
        share_name="icon_export",
        schema_name="icon_export", 
        table_name="quality_review2",
        limit=limit
    )
    
    return result

def explore_delta_sharing_structure() -> Dict:
    """Eksploruj strukturę Delta Sharing"""
    config = {
        "endpoint": "https://northeurope-c2.azuredatabricks.net/api/2.0/delta-sharing",
        "bearerToken": "tUnaIS-csD1HRRzwEqqgrWBEIJdeJFMTivNN8ZF2Aw7wLgnzdZVTK2qwkyDsc0Lc",
        "metastore_id": "4c67fa35-b334-4339-9527-d140d0ffdbe1"
    }
    
    client = SnowflakeDeltaSharing(**config)
    
    result = {
        "shares": client.list_shares(),
        "schemas": client.list_schemas("icon_export"),
        "tables": client.list_tables("icon_export", "icon_export"),
        "quality_review2_metadata": client.get_table_metadata("icon_export", "icon_export", "quality_review2")
    }
    
    return result

# PRZYKŁAD IMPLEMENTACJI BATCH PROCESSING
def batch_sync_delta_tables(tables_config: List[Dict], batch_size: int = 1000) -> Dict:
    """
    Synchronizacja wielu tabel w trybie batch
    Może być użyte w Snowflake Stored Procedure
    """
    config = {
        "endpoint": "https://northeurope-c2.azuredatabricks.net/api/2.0/delta-sharing",
        "bearerToken": "tUnaIS-csD1HRRzwEqqgrWBEIJdeJFMTivNN8ZF2Aw7wLgnzdZVTK2qwkyDsc0Lc",
        "metastore_id": "4c67fa35-b334-4339-9527-d140d0ffdbe1"
    }
    
    client = SnowflakeDeltaSharing(**config)
    results = {}
    
    for table_config in tables_config:
        share_name = table_config.get('share_name')
        schema_name = table_config.get('schema_name') 
        table_name = table_config.get('table_name')
        
        try:
            # Pobierz dane
            data = client.query_table(
                share_name=share_name,
                schema_name=schema_name,
                table_name=table_name,
                limit=batch_size
            )
            
            results[f"{share_name}.{schema_name}.{table_name}"] = {
                "status": "success",
                "data": data
            }
            
        except Exception as e:
            results[f"{share_name}.{schema_name}.{table_name}"] = {
                "status": "error",
                "error": str(e)
            }
    
    return results

# TESTOWANIE LOKALNIE (przed wdrożeniem do Snowflake)
if __name__ == "__main__":
    # Test podstawowej funkcjonalności
    print("Testing Delta Sharing connection...")
    
    try:
        result = get_quality_review_data(5)
        print("Success!")
        print(json.dumps(result, indent=2))
    except Exception as e:
        print(f"Error: {e}")
    
    # Test eksploracji struktury
    print("\nExploring structure...")
    try:
        structure = explore_delta_sharing_structure()
        print(json.dumps(structure, indent=2))
    except Exception as e:
        print(f"Error: {e}")