From dc582b31c749cfa41210cf9e93502a1ba9c94a6e Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 08:52:01 +0000
Subject: [PATCH 01/10] restructure

---
 src/source_msgraph/client.py                  | 103 ------------------
 .../{ => core}/async_interator.py             |   0
 src/source_msgraph/core/base_client.py        |  97 +++++++++++++++++
 src/source_msgraph/{ => core}/constants.py    |   0
 src/source_msgraph/{ => core}/models.py       |   3 +-
 src/source_msgraph/core/resource_provider.py  |  50 +++++++++
 src/source_msgraph/core/source.py             |  53 +++++++++
 src/source_msgraph/{ => core}/utils.py        |   0
 src/source_msgraph/generate_docs.py           |  84 --------------
 src/source_msgraph/resources.py               |  30 -----
 src/source_msgraph/resources/list_items.py    |  22 ++++
 src/source_msgraph/source.py                  |  63 -----------
 12 files changed, 223 insertions(+), 282 deletions(-)
 delete mode 100644 src/source_msgraph/client.py
 rename src/source_msgraph/{ => core}/async_interator.py (100%)
 create mode 100644 src/source_msgraph/core/base_client.py
 rename src/source_msgraph/{ => core}/constants.py (100%)
 rename src/source_msgraph/{ => core}/models.py (98%)
 create mode 100644 src/source_msgraph/core/resource_provider.py
 create mode 100644 src/source_msgraph/core/source.py
 rename src/source_msgraph/{ => core}/utils.py (100%)
 delete mode 100644 src/source_msgraph/generate_docs.py
 delete mode 100644 src/source_msgraph/resources.py
 create mode 100644 src/source_msgraph/resources/list_items.py
 delete mode 100644 src/source_msgraph/source.py

diff --git a/src/source_msgraph/client.py b/src/source_msgraph/client.py
deleted file mode 100644
index b429073..0000000
--- a/src/source_msgraph/client.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from msgraph import GraphServiceClient
-from kiota_abstractions.base_request_configuration import RequestConfiguration
-from msgraph.generated.models.o_data_errors.o_data_error import ODataError
-from azure.identity import ClientSecretCredential
-from source_msgraph.async_interator import AsyncToSyncIterator
-from source_msgraph.models import ConnectorOptions
-from source_msgraph.utils import get_python_schema, to_json, to_pyspark_schema
-from typing import Dict, Any
-
-class GraphClient:
-    def __init__(self, options: ConnectorOptions):
-        """
-        Initializes the fetcher with the Graph client, resource path, and query parameters.
-
-
-        :param options: Connector options.
-        """
-        credentials = ClientSecretCredential(options.tenant_id, options.client_id, options.client_secret)
-        self.graph_client = GraphServiceClient(credentials=credentials)
-        self.options: ConnectorOptions = options
-
-
-    async def fetch_data(self):
-        """
-        Fetches data from Microsoft Graph using the dynamically built request.
-        Handles pagination automatically.
-        """
-        query_parameters_cls = self.options.resource.get_query_parameters_cls()
-
-        if query_parameters_cls:
-            try:
-                query_parameters_instance = query_parameters_cls()  # Ensure it can be instantiated without arguments
-            except TypeError as e:
-                raise ValueError(f"Failed to instantiate {query_parameters_cls.__name__}: {e}")
-
-            if self.options.resource.query_params:
-                for k, v in self.options.resource.query_params.items():
-                    k = k.removeprefix("%24")
-                    if hasattr(query_parameters_instance, k):
-                        setattr(query_parameters_instance, k, v)  # Set attributes dynamically
-                    else:
-                        raise AttributeError(f"{query_parameters_cls.__name__} has no attribute '{k}'")
-                
-        request_configuration = RequestConfiguration(
-            query_parameters=query_parameters_instance
-        )
-        
-        try:
-            builder = self.options.resource.get_request_builder_cls()(self.graph_client.request_adapter, self.options.resource.resource_params)
-            items = await builder.get(request_configuration=request_configuration)
-            while True:
-                for item in items.value:
-                    yield item
-                if not items.odata_next_link:
-                    break
-                items = await builder.with_url(items.odata_next_link).get()
-
-        except ODataError as e:
-            raise Exception(f"Graph API Error: {e.error.message}")
-
-
-def iter_records(options: ConnectorOptions):
-    """
-        Iterates over records from the Microsoft Graph API.
-
-        :param options: Connector options containing authentication credentials and resource details.
-        :return: A synchronous iterator over the fetched data.
-        :raises ValueError: If any required credentials or resource parameters are missing.
-        :raises GraphAPIError: If the API request fails.
-    """
-    fetcher = GraphClient(options)
-    async_gen = fetcher.fetch_data()
-    return AsyncToSyncIterator(async_gen)
-
-
-
-def get_resource_schema(options: ConnectorOptions) -> Dict[str, Any]:
-    """
-    Retrieves the schema of a Microsoft Graph API resource by fetching a single record.
-
-    :param options: Connector options containing authentication credentials and resource details.
-    :return: A dictionary representing the schema of the resource.
-    :raises ValueError: If no records are found or if required options are missing.
-    :raises GraphAPIError: If the API request fails.
-    """
-    fetcher = GraphClient(options)
-    async_gen = fetcher.fetch_data()
-
-    try:
-        record = next(AsyncToSyncIterator(async_gen), None)
-        if not record:
-            raise ValueError(f"No records found for resource: {options.resource.resource_name}")
-        record = to_json(record)
-        schema = to_pyspark_schema(get_python_schema(record))
-        return record, schema
-    
-    except StopIteration:
-        raise ValueError(f"No records available for {options.resource.resource_name}")
-
-# Example usage
-# options = ConnectorOptions(...)
-# schema = get_resource_schema(options)
-# print(json.dumps(schema, indent=2))
diff --git a/src/source_msgraph/async_interator.py b/src/source_msgraph/core/async_interator.py
similarity index 100%
rename from src/source_msgraph/async_interator.py
rename to src/source_msgraph/core/async_interator.py
diff --git a/src/source_msgraph/core/base_client.py b/src/source_msgraph/core/base_client.py
new file mode 100644
index 0000000..6fceb1e
--- /dev/null
+++ b/src/source_msgraph/core/base_client.py
@@ -0,0 +1,97 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+from msgraph import GraphServiceClient
+from kiota_abstractions.base_request_configuration import RequestConfiguration
+from msgraph.generated.models.o_data_errors.o_data_error import ODataError
+from source_msgraph.core.async_interator import AsyncToSyncIterator
+from source_msgraph.core.models import BaseResource
+from source_msgraph.core.utils import get_python_schema, to_json, to_pyspark_schema
+
+from azure.identity import DefaultAzureCredential, EnvironmentCredential
+
+class BaseResourceProvider(ABC):
+    def __init__(self, options: Dict[str, Any]):
+        """
+        Initializes the fetcher with the Graph client, resource path, and query parameters.
+
+        :param options: Connector options.
+        """
+        self.options = options
+        credentials = DefaultAzureCredential()
+        self.graph_client = GraphServiceClient(credentials=credentials)
+
+    async def fetch_data(self):
+        """
+        Fetches data from Microsoft Graph using the dynamically built request.
+        Handles pagination automatically.
+        """
+        query_parameters_cls = self.resource.get_query_parameters_cls()
+
+        if query_parameters_cls:
+            try:
+                query_parameters_instance = query_parameters_cls()  # Ensure it can be instantiated without arguments
+            except TypeError as e:
+                raise ValueError(f"Failed to instantiate {query_parameters_cls.__name__}: {e}")
+
+            if self.resource.query_params:
+                for k, v in self.resource.query_params.items():
+                    k = k.removeprefix("%24")
+                    if hasattr(query_parameters_instance, k):
+                        setattr(query_parameters_instance, k, v)  # Set attributes dynamically
+                    else:
+                        raise AttributeError(f"{query_parameters_cls.__name__} has no attribute '{k}'")
+                
+        request_configuration = RequestConfiguration(
+            query_parameters=query_parameters_instance
+        )
+        
+        try:
+            builder = self.resource.get_request_builder_cls()(self.graph_client.request_adapter, self.resource.resource_params)
+            items = await builder.get(request_configuration=request_configuration)
+            while True:
+                for item in items.value:
+                    yield item
+                if not items.odata_next_link:
+                    break
+                items = await builder.with_url(items.odata_next_link).get()
+
+        except ODataError as e:
+            raise Exception(f"Graph API Error: {e.error.message}")
+
+    def iter_records(self):
+        """
+            Iterates over records from the Microsoft Graph API.
+
+            :param options: Connector options containing authentication credentials and resource details.
+            :return: A synchronous iterator over the fetched data.
+            :raises ValueError: If any required credentials or resource parameters are missing.
+            :raises GraphAPIError: If the API request fails.
+        """
+        async_gen = self.fetch_data()
+        return AsyncToSyncIterator(async_gen)
+
+    def get_resource_schema(self) -> Dict[str, Any]:
+        """
+        Retrieves the schema of a Microsoft Graph API resource by fetching a single record.
+
+        :param options: Connector options containing authentication credentials and resource details.
+        :return: A dictionary representing the schema of the resource.
+        :raises ValueError: If no records are found or if required options are missing.
+        :raises GraphAPIError: If the API request fails.
+        """
+        async_gen = self.fetch_data()
+
+        try:
+            record = next(AsyncToSyncIterator(async_gen), None)
+            if not record:
+                raise ValueError(f"No records found for resource: {self.resource.resource_name}")
+            record = to_json(record)
+            schema = to_pyspark_schema(get_python_schema(record))
+            return record, schema
+        
+        except StopIteration:
+            raise ValueError(f"No records available for {self.resource.resource_name}")
+    
+    @abstractmethod
+    def resource(self) -> BaseResource:
+        ...
\ No newline at end of file
diff --git a/src/source_msgraph/constants.py b/src/source_msgraph/core/constants.py
similarity index 100%
rename from src/source_msgraph/constants.py
rename to src/source_msgraph/core/constants.py
diff --git a/src/source_msgraph/models.py b/src/source_msgraph/core/models.py
similarity index 98%
rename from src/source_msgraph/models.py
rename to src/source_msgraph/core/models.py
index 1f5c046..964dc7e 100644
--- a/src/source_msgraph/models.py
+++ b/src/source_msgraph/core/models.py
@@ -3,7 +3,7 @@
 import inspect
 import re
 from typing import Any, Dict
-from source_msgraph.constants import MSGRAPH_SDK_PACKAGE
+from source_msgraph.core.constants import MSGRAPH_SDK_PACKAGE
 from urllib.parse import unquote
 from kiota_abstractions.base_request_builder import BaseRequestBuilder
 
@@ -157,7 +157,6 @@ class ConnectorOptions:
     tenant_id: str
     client_id: str
     client_secret: str
-    resource: BaseResource
     def __post_init__(self):
         ...
         
diff --git a/src/source_msgraph/core/resource_provider.py b/src/source_msgraph/core/resource_provider.py
new file mode 100644
index 0000000..460d156
--- /dev/null
+++ b/src/source_msgraph/core/resource_provider.py
@@ -0,0 +1,50 @@
+from functools import lru_cache
+import importlib
+import pkgutil
+from typing import Dict, Type
+from source_msgraph.core.base_client import BaseResourceProvider
+
+
+def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
+    """
+    Dynamically loads all resource providers from the resources package
+    """
+    providers = {}
+    package = 'source_msgraph.resources'
+    
+    # Import the resources package
+    resources_pkg = importlib.import_module(package)
+    
+    # Iterate through all submodules
+    for _, name, _ in pkgutil.iter_modules(resources_pkg.__path__):
+        if name != 'base':  # Skip the base module
+            try:
+                # Import the module
+                module = importlib.import_module(f'{package}.{name}')
+                # Look for *ResourceProvider class
+                for attr_name in dir(module):
+                    if attr_name.endswith('ResourceProvider'):
+                        provider_class = getattr(module, attr_name)
+                        if (isinstance(provider_class, type) and 
+                            issubclass(provider_class, BaseResourceProvider) and 
+                            provider_class != BaseResourceProvider):
+                            providers[name] = provider_class
+            except ImportError as e:
+                print(f"Warning: Could not load resource provider {name}: {e}")
+    
+    return providers
+
+def get_resource_provider(resource_name: str, options: Dict[str, str]) -> BaseResourceProvider:
+    """
+    Factory method to get the appropriate resource provider
+    """
+    providers = load_resource_providers()
+    provider_class: BaseResourceProvider = providers.get(resource_name)
+    
+    if not provider_class:
+        available = ', '.join(providers.keys())
+        raise ValueError(
+            f"Unsupported resource name: '{resource_name}'. "
+            f"Available resources: {available}"
+        )
+    return provider_class(options)
\ No newline at end of file
diff --git a/src/source_msgraph/core/source.py b/src/source_msgraph/core/source.py
new file mode 100644
index 0000000..1a79af3
--- /dev/null
+++ b/src/source_msgraph/core/source.py
@@ -0,0 +1,53 @@
+import logging
+from typing import Any, Dict, Union
+from pyspark.sql.datasource import DataSource, DataSourceReader
+from pyspark.sql.types import StructType
+from source_msgraph.core.base_client import BaseResourceProvider
+
+from source_msgraph.core.resource_provider import get_resource_provider
+
+# Reference https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
+
+logger = logging.getLogger(__name__)
+
+class MSGraphDataSource(DataSource):
+    """
+
+    """
+    def __init__(self, options: Dict[str, Any]):
+        
+        self.resource_name = options.pop("resource")
+        if not self.resource_name:
+            raise ValueError("resource is missing, please provide a valid resource name.")
+        self.options = options
+        
+    @classmethod
+    def name(cls):
+        return "msgraph"
+ 
+    def schema(self):
+        logger.info("Schema not provided, infering from the source.")
+        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
+        _, schema = resource_provider.get_resource_schema()
+        logger.debug(f"Infered schema : {schema}")
+        return schema
+
+    def reader(self, schema: StructType):
+        return MSGraphDataSourceReader(self.resource_name, self.options, schema)
+
+
+class MSGraphDataSourceReader(DataSourceReader):
+
+    def __init__(self, resource_name :str, options: Dict[str, Any], schema: Union[StructType, str]):
+        self.schema: StructType = schema
+        self.options = options
+        self.resource_name = resource_name
+        
+    def read(self, partition):
+        from source_msgraph.core.utils import to_json
+        from pyspark.sql import Row
+        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
+        for row in resource_provider.iter_records():
+            row = to_json(row)
+            row_data = {f.name: row.get(f.name, None) for f in self.schema.fields}
+            yield Row(**row_data)
diff --git a/src/source_msgraph/utils.py b/src/source_msgraph/core/utils.py
similarity index 100%
rename from src/source_msgraph/utils.py
rename to src/source_msgraph/core/utils.py
diff --git a/src/source_msgraph/generate_docs.py b/src/source_msgraph/generate_docs.py
deleted file mode 100644
index 0903c63..0000000
--- a/src/source_msgraph/generate_docs.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import os
-from urllib.parse import unquote
-from source_msgraph.models import BaseResource
-from source_msgraph.resources import RESOURCE_CONFIGS
-
-def generate_markdown(resource: BaseResource) -> str:
-    """
-    Generates sophisticated markdown documentation for a given BaseResource.
-    """
-    md_content = [f"# {resource.name.capitalize()} Resource", ""]
-    md_content.append(f"**Resource Name:** `{resource.name.lower()}`")
-    
-    
-    md_content.append("\n## Overview")
-    md_content.append(f"The `{resource.name}` resource provides a structured way to interact with Microsoft Graph API.")
-    md_content.append("This resource supports operations such as retrieval and filtering of data.")
-    
-    md_content.append("\n## Resource Parameters")
-    if len(resource.resource_params.keys()) > 0:
-        md_content.append("| Parameter | Type | Required | Description |")
-        md_content.append("|-----------|------|----------|-------------|")
-        for param in resource.resource_params or {}:
-            md_content.append(f"| `{unquote(param)}` | `str` | ✅ | Required path parameter for resource access. |")
-    else:
-        md_content.append(f"> No parameters required for `{resource.name.lower()}` resource.")
-
-
-    md_content.append("\n## Query Parameters")
-    if len(resource.query_params.keys()) > 0:
-        md_content.append("| Parameter | Type | Required | Description |")
-        md_content.append("|-----------|------|----------|-------------|")
-        for param in resource.query_params or {}:
-            md_content.append(f"| `{unquote(param)}` | `str` | ❌ | Optional query parameter to refine the API request. |")
-    else:
-        md_content.append(f">> No query parameters are required for `{resource.name.lower()}` resource.")
-
-    md_content.append("---")
-    
-    md_content.append("Tip: Please refer [Microsoft Graph API]() documentation if you don't see a field. This can be resolved by provising `expand` option.")
-
-    md_content.append("\n## Example Usage")
-    md_content.append("```python")
-    md_content.append("from source_msgraph.source import MSGraphDataSource")
-    md_content.append("spark.dataSource.register(MSGraphDataSource)")
-    md_content.append("")
-    md_content.append("# Read data using Microsoft Graph")
-    md_content.append("df = spark.read.format(\"msgraph\") ")
-    md_content.append("    .option(\"tenant_id\", tenant_id)")
-    md_content.append("    .option(\"client_id\", client_id)")
-    md_content.append("    .option(\"client_secret\", client_secret)")
-    md_content.append(f"    .option(\"resource\", \"{resource.name}\")")
-    for param in resource.resource_params or {}:
-        md_content.append(f"    .option(\"{param}\", \"<value>\")")
-    for param in resource.query_params or {}:
-        md_content.append(f"    .option(\"{param}\", \"<value>\")")
-    md_content.append("    .schema(\"id string, eTag string\")")
-    md_content.append("    .load()")
-    md_content.append("")
-    md_content.append("df.show()")
-    md_content.append("```")
-    
-    return "\n".join(md_content)
-
-def generate_docs(output_dir: str = "docs"):
-    """
-    Generates sophisticated markdown documentation for all configured resources.
-    """
-    os.makedirs(output_dir, exist_ok=True)
-    
-    for config in RESOURCE_CONFIGS:
-        resource = BaseResource(
-            name=config["name"],
-            resource_name=config["resource_name"],
-            request_builder_module=config["request_builder_module"]
-        )
-        
-        md_content = generate_markdown(resource)
-        file_path = os.path.join(output_dir, f"{resource.name}.md")
-        with open(file_path, "w", encoding="utf-8") as f:
-            f.write(md_content)
-        print(f"Generated documentation: {file_path}")
-
-if __name__ == "__main__":
-    generate_docs()
\ No newline at end of file
diff --git a/src/source_msgraph/resources.py b/src/source_msgraph/resources.py
deleted file mode 100644
index 43e5b08..0000000
--- a/src/source_msgraph/resources.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Define the resources to generate
-from source_msgraph.models import BaseResource
-
-
-RESOURCE_CONFIGS = [
-    {"name": "sites", "resource_name": "sites", "request_builder_module": "sites.sites_request_builder"},
-    {"name": "lists", "resource_name": "lists", "request_builder_module": "sites.item.lists.lists_request_builder"},
-    {"name": "list_items", "resource_name": "items", "request_builder_module": "sites.item.lists.item.items.items_request_builder"},
-]
-
-
-
-def get_resource(name: str):
-    """
-    Generates a list of BaseResource instances for specified Microsoft Graph resources.
-    """
-    config = next((config for config in RESOURCE_CONFIGS if config["name"] == name), None)
-    if not config:
-        raise ValueError(f"Resource '{name}' is not supported yet. stay tuned!")
-    
-    # Create and store the BaseResource instance
-    resource = BaseResource(
-        name=config["name"],
-        resource_name=config["resource_name"],
-        request_builder_module=config["request_builder_module"]
-    )
-    return resource 
-
-
-
diff --git a/src/source_msgraph/resources/list_items.py b/src/source_msgraph/resources/list_items.py
new file mode 100644
index 0000000..6302d53
--- /dev/null
+++ b/src/source_msgraph/resources/list_items.py
@@ -0,0 +1,22 @@
+from functools import cached_property
+from typing import Dict
+
+from source_msgraph.core.base_client import BaseResourceProvider
+from source_msgraph.core.models import BaseResource
+
+
+class ListItemsResourceProvider(BaseResourceProvider):
+
+    def __init__(self, options: Dict[str, str]):
+        self.options = options
+        super().__init__(options)
+    
+    @cached_property 
+    def resource(self) -> BaseResource:
+        return BaseResource(
+        name="list_items",
+        resource_name="items",
+        request_builder_module="sites.item.lists.item.items.items_request_builder"
+    ).map_options_to_params(self.options)
+
+
diff --git a/src/source_msgraph/source.py b/src/source_msgraph/source.py
deleted file mode 100644
index 7366e3d..0000000
--- a/src/source_msgraph/source.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import logging
-from typing import Any, Dict, Union
-from pyspark.sql.datasource import DataSource, DataSourceReader
-from pyspark.sql.types import StructType
-from source_msgraph.client import get_resource_schema, iter_records
-from source_msgraph.models import ConnectorOptions
-
-from source_msgraph.resources import get_resource
-# Reference https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
-
-logger = logging.getLogger(__name__)
-
-class MSGraphDataSource(DataSource):
-    """
-
-    """
-    def __init__(self, options: Dict[str, Any]):
-
-        tenant_id=options.pop("tenant_id")
-        client_id=options.pop("client_id")
-        client_secret=options.pop("client_secret")
-        
-        resource_name = options.pop("resource")
-        if not resource_name:
-            raise ValueError("resource is missing, please provide a valid resource name.")
-        
-        resource = get_resource(resource_name).map_options_to_params(options)
-
-        self.connector_options: ConnectorOptions = ConnectorOptions(
-            tenant_id=tenant_id,
-            client_id=client_id,
-            client_secret=client_secret,
-            resource=resource
-        )
-
-
-    @classmethod
-    def name(cls):
-        return "msgraph"
-
-    def schema(self):
-        logger.info("Schema not provided, infering from the source.")
-        _, schema = get_resource_schema(self.connector_options)
-        logger.debug(f"Infered schema : {schema}")
-        return schema
-
-    def reader(self, schema: StructType):
-        return MSGraphDataSourceReader(self.connector_options, schema)
-
-
-class MSGraphDataSourceReader(DataSourceReader):
-
-    def __init__(self, options: ConnectorOptions, schema: Union[StructType, str]):
-        self.schema: StructType = schema
-        self.options:ConnectorOptions = options
-        
-    def read(self, partition):
-        from source_msgraph.utils import to_json
-        from pyspark.sql import Row
-        for row in iter_records(self.options):
-            row = to_json(row)
-            row_data = {f.name: row.get(f.name, None) for f in self.schema.fields}
-            yield Row(**row_data)

From d25e920bc344771f2cd47dec50a28f141ee0e51f Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 11:29:21 +0000
Subject: [PATCH 02/10] add extras support

---
 poetry.lock    | 2 +-
 pyproject.toml | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 6d15189..74e07f5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3012,4 +3012,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4"
-content-hash = "55fe3e2bccd32c0c86b24ee1f86e76a6137c76af323fb7d50cc53b9b5d5ca1f3"
+content-hash = "0cdc9d351347552e7a8e246e9d9663b1e1c11fa0b87a86cdafdcecfeff21fb83"
diff --git a/pyproject.toml b/pyproject.toml
index b497866..cc94c72 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,8 @@ dependencies = [
 [tool.poetry]
 packages = [{include = "source_msgraph", from = "src"}]
 
+[tool.poetry.extras]
+list_items= []
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.4"

From 0b76852ffc1e014ea25c239c9f0ba7c5cf88992b Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 11:29:51 +0000
Subject: [PATCH 03/10] add support for extras

---
 src/source_msgraph/core/models.py            |  5 +++++
 src/source_msgraph/core/resource_provider.py | 14 ++++++--------
 src/source_msgraph/core/source.py            |  4 ++--
 src/source_msgraph/resources/__init__.py     |  1 +
 src/source_msgraph/resources/list_items.py   |  1 +
 5 files changed, 15 insertions(+), 10 deletions(-)
 create mode 100644 src/source_msgraph/resources/__init__.py

diff --git a/src/source_msgraph/core/models.py b/src/source_msgraph/core/models.py
index 964dc7e..2ab1145 100644
--- a/src/source_msgraph/core/models.py
+++ b/src/source_msgraph/core/models.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass
 import importlib
 import inspect
+import logging
 import re
 from typing import Any, Dict
 from source_msgraph.core.constants import MSGRAPH_SDK_PACKAGE
@@ -134,6 +135,8 @@ def map_options_to_params(self, options: Dict[str, Any]) -> 'BaseResource':
             raise ValueError(f"Missing required resource parameters: {', '.join(missing_params)}")
 
         # TODO: add max $top value validation.
+        if int(options.get("top", 1)) <= 100:
+            logging.warning("Setting a low `top` value in Microsoft Graph queries can cause high latency and increase throttling risk.")
 
         mapped_query_params = {"%24"+k: v for k, v in options.items() if k in self.query_params}
         mapped_resource_params = {k.replace("-", "%2D"): v for k, v in options.items() if k in self.resource_params}
@@ -147,6 +150,8 @@ def map_options_to_params(self, options: Dict[str, Any]) -> 'BaseResource':
         self.resource_params = mapped_resource_params
         
         return self
+    
+
 
 GUID_PATTERN = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
 
diff --git a/src/source_msgraph/core/resource_provider.py b/src/source_msgraph/core/resource_provider.py
index 460d156..5f2a9d0 100644
--- a/src/source_msgraph/core/resource_provider.py
+++ b/src/source_msgraph/core/resource_provider.py
@@ -4,7 +4,7 @@
 from typing import Dict, Type
 from source_msgraph.core.base_client import BaseResourceProvider
 
-
+# @lru_cache(maxsize=10)
 def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
     """
     Dynamically loads all resource providers from the resources package
@@ -15,13 +15,10 @@ def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
     # Import the resources package
     resources_pkg = importlib.import_module(package)
     
-    # Iterate through all submodules
     for _, name, _ in pkgutil.iter_modules(resources_pkg.__path__):
         if name != 'base':  # Skip the base module
             try:
-                # Import the module
                 module = importlib.import_module(f'{package}.{name}')
-                # Look for *ResourceProvider class
                 for attr_name in dir(module):
                     if attr_name.endswith('ResourceProvider'):
                         provider_class = getattr(module, attr_name)
@@ -32,13 +29,14 @@ def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
             except ImportError as e:
                 print(f"Warning: Could not load resource provider {name}: {e}")
     
-    return providers
+    return frozenset(providers.items())
 
-def get_resource_provider(resource_name: str, options: Dict[str, str]) -> BaseResourceProvider:
+# @lru_cache(maxsize=10)
+def get_resource_provider(resource_name: str, options: frozenset) -> BaseResourceProvider:
     """
     Factory method to get the appropriate resource provider
     """
-    providers = load_resource_providers()
+    providers = dict(load_resource_providers())
     provider_class: BaseResourceProvider = providers.get(resource_name)
     
     if not provider_class:
@@ -47,4 +45,4 @@ def get_resource_provider(resource_name: str, options: Dict[str, str]) -> BaseRe
             f"Unsupported resource name: '{resource_name}'. "
             f"Available resources: {available}"
         )
-    return provider_class(options)
\ No newline at end of file
+    return provider_class(dict(options))
\ No newline at end of file
diff --git a/src/source_msgraph/core/source.py b/src/source_msgraph/core/source.py
index 1a79af3..b23466a 100644
--- a/src/source_msgraph/core/source.py
+++ b/src/source_msgraph/core/source.py
@@ -19,7 +19,7 @@ def __init__(self, options: Dict[str, Any]):
         self.resource_name = options.pop("resource")
         if not self.resource_name:
             raise ValueError("resource is missing, please provide a valid resource name.")
-        self.options = options
+        self.options = frozenset(options.items())
         
     @classmethod
     def name(cls):
@@ -38,7 +38,7 @@ def reader(self, schema: StructType):
 
 class MSGraphDataSourceReader(DataSourceReader):
 
-    def __init__(self, resource_name :str, options: Dict[str, Any], schema: Union[StructType, str]):
+    def __init__(self, resource_name :str, options: frozenset, schema: Union[StructType, str]):
         self.schema: StructType = schema
         self.options = options
         self.resource_name = resource_name
diff --git a/src/source_msgraph/resources/__init__.py b/src/source_msgraph/resources/__init__.py
new file mode 100644
index 0000000..5ee7122
--- /dev/null
+++ b/src/source_msgraph/resources/__init__.py
@@ -0,0 +1 @@
+from .list_items import * # type: ignore
\ No newline at end of file
diff --git a/src/source_msgraph/resources/list_items.py b/src/source_msgraph/resources/list_items.py
index 6302d53..b9927d5 100644
--- a/src/source_msgraph/resources/list_items.py
+++ b/src/source_msgraph/resources/list_items.py
@@ -1,4 +1,5 @@
 from functools import cached_property
+import logging
 from typing import Dict
 
 from source_msgraph.core.base_client import BaseResourceProvider

From 8a8059acde676e63b5d5389059a13764a121d0e4 Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 12:36:24 +0000
Subject: [PATCH 04/10] change package name

---
 pyproject.toml                               |   8 +-
 src/source_msgraph/__init__.py               |   0
 src/source_msgraph/core/async_interator.py   |  69 --------
 src/source_msgraph/core/base_client.py       |  97 ----------
 src/source_msgraph/core/constants.py         |   2 -
 src/source_msgraph/core/models.py            | 177 -------------------
 src/source_msgraph/core/resource_provider.py |  48 -----
 src/source_msgraph/core/source.py            |  53 ------
 src/source_msgraph/core/utils.py             |  91 ----------
 src/source_msgraph/resources/__init__.py     |   1 -
 src/source_msgraph/resources/list_items.py   |  23 ---
 tests/test_source.py                         |   2 +-
 12 files changed, 6 insertions(+), 565 deletions(-)
 delete mode 100644 src/source_msgraph/__init__.py
 delete mode 100644 src/source_msgraph/core/async_interator.py
 delete mode 100644 src/source_msgraph/core/base_client.py
 delete mode 100644 src/source_msgraph/core/constants.py
 delete mode 100644 src/source_msgraph/core/models.py
 delete mode 100644 src/source_msgraph/core/resource_provider.py
 delete mode 100644 src/source_msgraph/core/source.py
 delete mode 100644 src/source_msgraph/core/utils.py
 delete mode 100644 src/source_msgraph/resources/__init__.py
 delete mode 100644 src/source_msgraph/resources/list_items.py

diff --git a/pyproject.toml b/pyproject.toml
index cc94c72..0838f00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,11 +1,13 @@
 [project]
-name = "source-msgraph"
+name = "pyspark_msgraph_source"
 version = "0.1.0"
-description = ""
+description = "Pyspark custom data source for Microsoft Graph APIs, including path and query parameters, with PySpark read examples."
 authors = [
     {name = "geekwhocodes",email = "ganeshraskar@outlook.com"}
 ]
 readme = "README.md"
+homepage = "https://github.com/geekwhocodes/pyspark-msgraph-source"
+repository = "https://github.com/geekwhocodes/pyspark-msgraph-source"
 requires-python = ">=3.12,<4"
 dependencies = [
     "pyspark (==4.0.0.dev2)",
@@ -15,7 +17,7 @@ dependencies = [
 ]
 
 [tool.poetry]
-packages = [{include = "source_msgraph", from = "src"}]
+packages = [{include = "pyspark_msgraph_source", from = "src"}]
 
 [tool.poetry.extras]
 list_items= []
diff --git a/src/source_msgraph/__init__.py b/src/source_msgraph/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/source_msgraph/core/async_interator.py b/src/source_msgraph/core/async_interator.py
deleted file mode 100644
index b2c121a..0000000
--- a/src/source_msgraph/core/async_interator.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import asyncio
-
-import asyncio
-from typing import AsyncGenerator, Iterator, Any
-
-import asyncio
-from typing import AsyncGenerator, Iterator, Any
-
-class AsyncToSyncIterator:
-    """
-    Converts an async generator into a synchronous iterator while ensuring proper event loop handling.
-    """
-
-    def __init__(self, async_gen: AsyncGenerator[Any, None]):
-        """
-        Initializes the iterator by consuming an async generator synchronously.
-
-        Args:
-            async_gen (AsyncGenerator): The async generator yielding results.
-        """
-        self.async_gen = async_gen
-        self.iterator = self._to_iterator()
-
-    def _to_iterator(self) -> Iterator:
-        """
-        Ensures that the async generator is consumed using the correct event loop.
-        Uses streaming (does not load all results into memory).
-        """
-        try:
-            loop = asyncio.get_running_loop()
-            return self._sync_generator(loop)  # Works inside Jupyter
-        except RuntimeError:
-            return iter(asyncio.run(self._collect_results()))  # Works in scripts
-
-    def _sync_generator(self, loop: asyncio.AbstractEventLoop) -> Iterator:
-        """
-        Streams async results into a sync generator while inside a running event loop.
-        """
-        queue = asyncio.Queue()
-
-        async def _producer():
-            """Fills the queue with async results."""
-            async for item in self.async_gen:
-                await queue.put(item)
-            await queue.put(None)  # Sentinel to signal completion
-
-        async def _consumer():
-            """Yields items from the queue in sync mode."""
-            task = loop.create_task(_producer())
-            while True:
-                item = await queue.get()
-                if item is None:
-                    break
-                yield item
-            await task  # Ensure producer task completes
-
-        return iter(loop.run_until_complete(self._collect_results()))
-
-    async def _collect_results(self):
-        """Collects async generator results into a list (safe for asyncio.run)."""
-        return [item async for item in self.async_gen]
-
-    def __iter__(self) -> Iterator:
-        """Returns the synchronous iterator."""
-        return self.iterator
-
-    def __next__(self) -> Any:
-        """Returns the next item from the iterator."""
-        return next(self.iterator)
\ No newline at end of file
diff --git a/src/source_msgraph/core/base_client.py b/src/source_msgraph/core/base_client.py
deleted file mode 100644
index 6fceb1e..0000000
--- a/src/source_msgraph/core/base_client.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict
-from msgraph import GraphServiceClient
-from kiota_abstractions.base_request_configuration import RequestConfiguration
-from msgraph.generated.models.o_data_errors.o_data_error import ODataError
-from source_msgraph.core.async_interator import AsyncToSyncIterator
-from source_msgraph.core.models import BaseResource
-from source_msgraph.core.utils import get_python_schema, to_json, to_pyspark_schema
-
-from azure.identity import DefaultAzureCredential, EnvironmentCredential
-
-class BaseResourceProvider(ABC):
-    def __init__(self, options: Dict[str, Any]):
-        """
-        Initializes the fetcher with the Graph client, resource path, and query parameters.
-
-        :param options: Connector options.
-        """
-        self.options = options
-        credentials = DefaultAzureCredential()
-        self.graph_client = GraphServiceClient(credentials=credentials)
-
-    async def fetch_data(self):
-        """
-        Fetches data from Microsoft Graph using the dynamically built request.
-        Handles pagination automatically.
-        """
-        query_parameters_cls = self.resource.get_query_parameters_cls()
-
-        if query_parameters_cls:
-            try:
-                query_parameters_instance = query_parameters_cls()  # Ensure it can be instantiated without arguments
-            except TypeError as e:
-                raise ValueError(f"Failed to instantiate {query_parameters_cls.__name__}: {e}")
-
-            if self.resource.query_params:
-                for k, v in self.resource.query_params.items():
-                    k = k.removeprefix("%24")
-                    if hasattr(query_parameters_instance, k):
-                        setattr(query_parameters_instance, k, v)  # Set attributes dynamically
-                    else:
-                        raise AttributeError(f"{query_parameters_cls.__name__} has no attribute '{k}'")
-                
-        request_configuration = RequestConfiguration(
-            query_parameters=query_parameters_instance
-        )
-        
-        try:
-            builder = self.resource.get_request_builder_cls()(self.graph_client.request_adapter, self.resource.resource_params)
-            items = await builder.get(request_configuration=request_configuration)
-            while True:
-                for item in items.value:
-                    yield item
-                if not items.odata_next_link:
-                    break
-                items = await builder.with_url(items.odata_next_link).get()
-
-        except ODataError as e:
-            raise Exception(f"Graph API Error: {e.error.message}")
-
-    def iter_records(self):
-        """
-            Iterates over records from the Microsoft Graph API.
-
-            :param options: Connector options containing authentication credentials and resource details.
-            :return: A synchronous iterator over the fetched data.
-            :raises ValueError: If any required credentials or resource parameters are missing.
-            :raises GraphAPIError: If the API request fails.
-        """
-        async_gen = self.fetch_data()
-        return AsyncToSyncIterator(async_gen)
-
-    def get_resource_schema(self) -> Dict[str, Any]:
-        """
-        Retrieves the schema of a Microsoft Graph API resource by fetching a single record.
-
-        :param options: Connector options containing authentication credentials and resource details.
-        :return: A dictionary representing the schema of the resource.
-        :raises ValueError: If no records are found or if required options are missing.
-        :raises GraphAPIError: If the API request fails.
-        """
-        async_gen = self.fetch_data()
-
-        try:
-            record = next(AsyncToSyncIterator(async_gen), None)
-            if not record:
-                raise ValueError(f"No records found for resource: {self.resource.resource_name}")
-            record = to_json(record)
-            schema = to_pyspark_schema(get_python_schema(record))
-            return record, schema
-        
-        except StopIteration:
-            raise ValueError(f"No records available for {self.resource.resource_name}")
-    
-    @abstractmethod
-    def resource(self) -> BaseResource:
-        ...
\ No newline at end of file
diff --git a/src/source_msgraph/core/constants.py b/src/source_msgraph/core/constants.py
deleted file mode 100644
index 6a42334..0000000
--- a/src/source_msgraph/core/constants.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Base generated package for Microsoft Graph SDK
-MSGRAPH_SDK_PACKAGE = "msgraph.generated"
diff --git a/src/source_msgraph/core/models.py b/src/source_msgraph/core/models.py
deleted file mode 100644
index 2ab1145..0000000
--- a/src/source_msgraph/core/models.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from dataclasses import dataclass
-import importlib
-import inspect
-import logging
-import re
-from typing import Any, Dict
-from source_msgraph.core.constants import MSGRAPH_SDK_PACKAGE
-from urllib.parse import unquote
-from kiota_abstractions.base_request_builder import BaseRequestBuilder
-
-@dataclass
-class BaseResource:
-    name: str # User friendly name for Spark reader
-    resource_name: str # Microsoft Graph leaf resource name
-    request_builder_module: str
-    query_params: Dict[str, Any] = None
-    resource_params: Dict[str, Any] = None
-    request_builder_cls_name: str = None
-    request_builder_query_cls_name: str = None
-
-    def __post_init__(self):
-        if not self.name:
-            raise ValueError("name is required")
-        
-        self.request_builder_cls_name = self._pascal_case(f"{self.resource_name}_request_builder")
-        #self.request_builder_cls = self.get_request_builder_cls()
-        self.request_builder_query_cls_name = self._pascal_case(f"{self.resource_name}_request_builder_get_query_parameters")
-        #self.query_parameters_cls = self.get_query_parameters_cls()
-        self.parse_url_template()
-
-    
-    @classmethod
-    def _pascal_case(cls, snake_str: str) -> str:
-        """
-        Converts snake_case to PascalCase.
-        Example: "items_request_builder" -> "ItemsRequestBuilder"
-        """
-        return "".join(word.title() for word in snake_str.split("_"))
-    
-    def get_query_parameters_cls(self):
-        """
-        Retrieves the query parameters class from the request builder module.
-        """
-        try:
-            module = importlib.import_module(f"{MSGRAPH_SDK_PACKAGE}.{self.request_builder_module}")
-            request_builder_cls = getattr(module, self.request_builder_cls_name, None)
-
-            if not request_builder_cls or not issubclass(request_builder_cls, BaseRequestBuilder):
-                raise AttributeError(f"{self.request_builder_cls_name} not found in {module.__name__}")
-
-            # Inspect the attributes to find the query parameters class
-
-            for attr in dir(request_builder_cls):
-                if attr == self.request_builder_query_cls_name:
-                    return getattr(request_builder_cls, attr)
-            raise AttributeError(f"{self.request_builder_query_cls_name} not found in {module.__name__}")        
-                    
-        except ModuleNotFoundError:
-            raise ImportError(f"Module {self.request_builder_module} not found in {MSGRAPH_SDK_PACKAGE}")
-
-    def get_request_builder_cls(self) -> BaseRequestBuilder:
-        """
-        Dynamically imports a module and finds the RequestBuilder class.
-        """
-        try:
-            module = importlib.import_module(f"{MSGRAPH_SDK_PACKAGE}.{self.request_builder_module}")
-            for attr in dir(module):
-                if attr == self.request_builder_cls_name:
-                    cls = getattr(module, attr)
-                    if not issubclass(cls, BaseRequestBuilder):
-                        raise AttributeError(f"{attr} is not a subclass of BaseRequestBuilder")
-                    return cls
-        except ImportError:
-            raise ImportError(f"Module {self.request_builder_module} not found in {MSGRAPH_SDK_PACKAGE}")
-    
-    def get_request_builder_url_template(self):
-        """
-        Extracts the `url_template` by analyzing the source code of the class.
-        """
-        try:
-            cls = self.get_request_builder_cls()
-            if inspect.isclass(cls) and hasattr(cls, "__init__"):
-                # Extract the __init__ function source code
-                init_source = inspect.getsource(cls.__init__)
-                if "super().__init__(" in init_source:
-                    lines = init_source.split("\n")
-                    for line in lines:
-                        if "super().__init__(" in line:
-                            match = re.search(r'super\(\).__init__\s*\([^,]+,\s*"([^"]+)"', line)
-                            if match:
-                                url_template = match.group(1).replace('"', "").replace("'", "")
-                            return url_template
-                    
-        except TypeError:
-            raise TypeError(f"Error extracting URL template from {cls.__name__}")
-
-    def parse_url_template(self):
-        """
-        Parses the `url_template` string to extract path parameters and query parameters.
-        """
-        url_template = self.get_request_builder_url_template()
-        if not url_template:
-            raise ValueError("URL template not found in request builder class")
-
-        # Extract path parameters (decode %2Did → _id)
-        path_parameters = [
-            unquote(match.group(1)).replace("%2D", "_")
-            for match in re.finditer(r"\{([^?}]+)\}", url_template)
-            if match.group(1).lower() != "+baseurl"
-        ]
-
-        # Extract query parameters (decode %24expand → $expand)
-        query_match = re.search(r"\{\?([^}]+)\}", url_template)
-        query_parameters = (
-            [unquote(q).replace("%24", "$") for q in query_match.group(1).split(",")]
-            if query_match else []
-        )
-
-        self.resource_params = {k:None for k in path_parameters}
-        self.query_params = {qp.strip().replace("$", ""): None for qp in query_parameters}
-
-    
-    def map_options_to_params(self, options: Dict[str, Any]) -> 'BaseResource':
-        """
-        Maps the provided options to either query parameters or resource parameters.
-
-        :param options: Dictionary of options provided by the user.
-        :param query_params: List of valid query parameter names.
-        :param resource_params: List of valid resource parameter names.
-        :return: A tuple (mapped_query_params, mapped_resource_params, invalid_params)
-        """
-        missing_params = [param for param in self.resource_params if param not in options]
-
-        if missing_params:
-            raise ValueError(f"Missing required resource parameters: {', '.join(missing_params)}")
-
-        # TODO: add max $top value validation.
-        if int(options.get("top", 1)) <= 100:
-            logging.warning("Setting a low `top` value in Microsoft Graph queries can cause high latency and increase throttling risk.")
-
-        mapped_query_params = {"%24"+k: v for k, v in options.items() if k in self.query_params}
-        mapped_resource_params = {k.replace("-", "%2D"): v for k, v in options.items() if k in self.resource_params}
-        
-        invalid_params = {k: v for k, v in options.items() if k not in self.query_params and k not in self.resource_params}
-        
-        if len(invalid_params) > 0:
-            raise ValueError(f"Extra parameters {invalid_params} not allowed.")
-        
-        self.query_params = mapped_query_params
-        self.resource_params = mapped_resource_params
-        
-        return self
-    
-
-
-GUID_PATTERN = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
-
-
-@dataclass
-class ConnectorOptions:
-    """Options for Microsoft Graph API requests with strict resource_path validation."""
-    tenant_id: str
-    client_id: str
-    client_secret: str
-    def __post_init__(self):
-        ...
-        
-    def _validate_credentials(self):
-        """Validates the format and presence of credentials."""
-        if not self.tenant_id or not GUID_PATTERN.match(self.tenant_id):
-            raise ValueError("Invalid tenant_id: must be a valid GUID.")
-        
-        if not self.client_id or not GUID_PATTERN.match(self.client_id):
-            raise ValueError("Invalid client_id: must be a valid GUID.")
-        
-        if not self.client_secret or not isinstance(self.client_secret, str):
-            raise ValueError("Invalid client_secret: must be a non-empty string.")    
\ No newline at end of file
diff --git a/src/source_msgraph/core/resource_provider.py b/src/source_msgraph/core/resource_provider.py
deleted file mode 100644
index 5f2a9d0..0000000
--- a/src/source_msgraph/core/resource_provider.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from functools import lru_cache
-import importlib
-import pkgutil
-from typing import Dict, Type
-from source_msgraph.core.base_client import BaseResourceProvider
-
-# @lru_cache(maxsize=10)
-def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
-    """
-    Dynamically loads all resource providers from the resources package
-    """
-    providers = {}
-    package = 'source_msgraph.resources'
-    
-    # Import the resources package
-    resources_pkg = importlib.import_module(package)
-    
-    for _, name, _ in pkgutil.iter_modules(resources_pkg.__path__):
-        if name != 'base':  # Skip the base module
-            try:
-                module = importlib.import_module(f'{package}.{name}')
-                for attr_name in dir(module):
-                    if attr_name.endswith('ResourceProvider'):
-                        provider_class = getattr(module, attr_name)
-                        if (isinstance(provider_class, type) and 
-                            issubclass(provider_class, BaseResourceProvider) and 
-                            provider_class != BaseResourceProvider):
-                            providers[name] = provider_class
-            except ImportError as e:
-                print(f"Warning: Could not load resource provider {name}: {e}")
-    
-    return frozenset(providers.items())
-
-# @lru_cache(maxsize=10)
-def get_resource_provider(resource_name: str, options: frozenset) -> BaseResourceProvider:
-    """
-    Factory method to get the appropriate resource provider
-    """
-    providers = dict(load_resource_providers())
-    provider_class: BaseResourceProvider = providers.get(resource_name)
-    
-    if not provider_class:
-        available = ', '.join(providers.keys())
-        raise ValueError(
-            f"Unsupported resource name: '{resource_name}'. "
-            f"Available resources: {available}"
-        )
-    return provider_class(dict(options))
\ No newline at end of file
diff --git a/src/source_msgraph/core/source.py b/src/source_msgraph/core/source.py
deleted file mode 100644
index b23466a..0000000
--- a/src/source_msgraph/core/source.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import logging
-from typing import Any, Dict, Union
-from pyspark.sql.datasource import DataSource, DataSourceReader
-from pyspark.sql.types import StructType
-from source_msgraph.core.base_client import BaseResourceProvider
-
-from source_msgraph.core.resource_provider import get_resource_provider
-
-# Reference https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
-
-logger = logging.getLogger(__name__)
-
-class MSGraphDataSource(DataSource):
-    """
-
-    """
-    def __init__(self, options: Dict[str, Any]):
-        
-        self.resource_name = options.pop("resource")
-        if not self.resource_name:
-            raise ValueError("resource is missing, please provide a valid resource name.")
-        self.options = frozenset(options.items())
-        
-    @classmethod
-    def name(cls):
-        return "msgraph"
- 
-    def schema(self):
-        logger.info("Schema not provided, infering from the source.")
-        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
-        _, schema = resource_provider.get_resource_schema()
-        logger.debug(f"Infered schema : {schema}")
-        return schema
-
-    def reader(self, schema: StructType):
-        return MSGraphDataSourceReader(self.resource_name, self.options, schema)
-
-
-class MSGraphDataSourceReader(DataSourceReader):
-
-    def __init__(self, resource_name :str, options: frozenset, schema: Union[StructType, str]):
-        self.schema: StructType = schema
-        self.options = options
-        self.resource_name = resource_name
-        
-    def read(self, partition):
-        from source_msgraph.core.utils import to_json
-        from pyspark.sql import Row
-        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
-        for row in resource_provider.iter_records():
-            row = to_json(row)
-            row_data = {f.name: row.get(f.name, None) for f in self.schema.fields}
-            yield Row(**row_data)
diff --git a/src/source_msgraph/core/utils.py b/src/source_msgraph/core/utils.py
deleted file mode 100644
index b878c2a..0000000
--- a/src/source_msgraph/core/utils.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from typing import Any
-from kiota_serialization_json.json_serialization_writer_factory import JsonSerializationWriterFactory
-import json
-
-from pyspark.sql.types import (
-    StructType, StructField, StringType, IntegerType, DoubleType, BooleanType,
-    MapType, ArrayType, TimestampType, DateType, LongType, BinaryType, DecimalType
-)
-
-from datetime import datetime, date
-from decimal import Decimal
-
-# Convert to JSON using Kiota
-writer_factory = JsonSerializationWriterFactory()
-writer = writer_factory.get_serialization_writer("application/json")
-
-def to_json(value):
-    value.serialize(writer)
-    # Get JSON string
-    return json.loads((writer.get_serialized_content().decode("utf-8")))
-
-def to_jsonValue(value):
-    value.serialize(writer)
-    # Get JSON string
-    return str(json.loads((writer.get_serialized_content().decode("utf-8"))))
-
-
-
-def get_python_schema(obj:Any):
-    """
-    Recursively extracts the schema from a Python object.
-
-    :param obj: The Python object (dict, list, int, str, etc.).
-    :return: A schema dictionary representing field types.
-    """
-    if isinstance(obj, bool):
-        return "bool"
-    elif isinstance(obj, dict):
-        return {key: get_python_schema(value) for key, value in obj.items()}
-    elif isinstance(obj, list):
-        if obj:  # Assume first element type (homogeneous lists)
-            return [get_python_schema(obj[0])]
-        return ["any"]  # Empty lists default to "any"
-    elif isinstance(obj, str):
-        return "str"
-    elif isinstance(obj, int):
-        return "int"
-    elif isinstance(obj, float):
-        return "float"
-    elif isinstance(obj, datetime):
-        return "datetime"
-    elif isinstance(obj, date):
-        return "date"
-    elif isinstance(obj, Decimal):
-        return "decimal"
-    elif obj is None:
-        return "null"
-    return "unknown"  # Fallback for unrecognized types
-
-def to_pyspark_schema(schema_dict):
-    """
-    Recursively converts a nested Python schema dictionary to a PySpark StructType schema.
-
-    :param schema_dict: Dictionary with field names as keys and data types as values.
-    :return: PySpark StructType schema.
-    """
-    type_mapping = {
-        "str": StringType(),
-        "int": IntegerType(),
-        "float": DoubleType(),
-        "bool": BooleanType(),
-        "datetime": TimestampType(),
-        "date": DateType(),
-        "long": LongType(),
-        "binary": BinaryType(),
-        "decimal": DecimalType(38, 18),
-        "unknown": StringType()
-    }
-
-    def convert_type(value):
-        """Recursively converts types, handling nested dicts and lists."""
-        if isinstance(value, dict):  # Nested structure
-            return StructType([StructField(k, convert_type(v), True) for k, v in value.items()])
-        elif isinstance(value, list):  # List of elements (assume first element type)
-            if not value:
-                return ArrayType(StringType())  # Default to list of strings if empty
-            return ArrayType(convert_type(value[0]))
-        return type_mapping.get(value, StringType())  # Default to StringType
-
-    struct_fields = [StructField(field, convert_type(dtype), True) for field, dtype in schema_dict.items()]
-    return StructType(struct_fields)
\ No newline at end of file
diff --git a/src/source_msgraph/resources/__init__.py b/src/source_msgraph/resources/__init__.py
deleted file mode 100644
index 5ee7122..0000000
--- a/src/source_msgraph/resources/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .list_items import * # type: ignore
\ No newline at end of file
diff --git a/src/source_msgraph/resources/list_items.py b/src/source_msgraph/resources/list_items.py
deleted file mode 100644
index b9927d5..0000000
--- a/src/source_msgraph/resources/list_items.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from functools import cached_property
-import logging
-from typing import Dict
-
-from source_msgraph.core.base_client import BaseResourceProvider
-from source_msgraph.core.models import BaseResource
-
-
-class ListItemsResourceProvider(BaseResourceProvider):
-
-    def __init__(self, options: Dict[str, str]):
-        self.options = options
-        super().__init__(options)
-    
-    @cached_property 
-    def resource(self) -> BaseResource:
-        return BaseResource(
-        name="list_items",
-        resource_name="items",
-        request_builder_module="sites.item.lists.item.items.items_request_builder"
-    ).map_options_to_params(self.options)
-
-
diff --git a/tests/test_source.py b/tests/test_source.py
index 17cd168..777690e 100644
--- a/tests/test_source.py
+++ b/tests/test_source.py
@@ -1,6 +1,6 @@
 import pytest
 from pyspark.sql import SparkSession
-from source_msgraph.source import FakeDataSource
+from source_pyspark_msgraph.source import FakeDataSource
 
 
 # @pytest.fixture

From 0df8c829fb8e3bf27c318d5871591a1f4b75cea1 Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 12:36:35 +0000
Subject: [PATCH 05/10] change package name

---
 src/pyspark_msgraph_source/__init__.py        |   0
 .../core/async_interator.py                   |  69 +++++++
 .../core/base_client.py                       |  97 ++++++++++
 src/pyspark_msgraph_source/core/constants.py  |   2 +
 src/pyspark_msgraph_source/core/models.py     | 177 ++++++++++++++++++
 .../core/resource_provider.py                 |  51 +++++
 src/pyspark_msgraph_source/core/source.py     |  53 ++++++
 src/pyspark_msgraph_source/core/utils.py      |  91 +++++++++
 .../resources/__init__.py                     |   1 +
 .../resources/list_items.py                   |  23 +++
 10 files changed, 564 insertions(+)
 create mode 100644 src/pyspark_msgraph_source/__init__.py
 create mode 100644 src/pyspark_msgraph_source/core/async_interator.py
 create mode 100644 src/pyspark_msgraph_source/core/base_client.py
 create mode 100644 src/pyspark_msgraph_source/core/constants.py
 create mode 100644 src/pyspark_msgraph_source/core/models.py
 create mode 100644 src/pyspark_msgraph_source/core/resource_provider.py
 create mode 100644 src/pyspark_msgraph_source/core/source.py
 create mode 100644 src/pyspark_msgraph_source/core/utils.py
 create mode 100644 src/pyspark_msgraph_source/resources/__init__.py
 create mode 100644 src/pyspark_msgraph_source/resources/list_items.py

diff --git a/src/pyspark_msgraph_source/__init__.py b/src/pyspark_msgraph_source/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pyspark_msgraph_source/core/async_interator.py b/src/pyspark_msgraph_source/core/async_interator.py
new file mode 100644
index 0000000..b2c121a
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/async_interator.py
@@ -0,0 +1,69 @@
+import asyncio
+
+import asyncio
+from typing import AsyncGenerator, Iterator, Any
+
+import asyncio
+from typing import AsyncGenerator, Iterator, Any
+
+class AsyncToSyncIterator:
+    """
+    Converts an async generator into a synchronous iterator while ensuring proper event loop handling.
+    """
+
+    def __init__(self, async_gen: AsyncGenerator[Any, None]):
+        """
+        Initializes the iterator by consuming an async generator synchronously.
+
+        Args:
+            async_gen (AsyncGenerator): The async generator yielding results.
+        """
+        self.async_gen = async_gen
+        self.iterator = self._to_iterator()
+
+    def _to_iterator(self) -> Iterator:
+        """
+        Ensures that the async generator is consumed using the correct event loop.
+        Uses streaming (does not load all results into memory).
+        """
+        try:
+            loop = asyncio.get_running_loop()
+            return self._sync_generator(loop)  # Works inside Jupyter
+        except RuntimeError:
+            return iter(asyncio.run(self._collect_results()))  # Works in scripts
+
+    def _sync_generator(self, loop: asyncio.AbstractEventLoop) -> Iterator:
+        """
+        Streams async results into a sync generator while inside a running event loop.
+        """
+        queue = asyncio.Queue()
+
+        async def _producer():
+            """Fills the queue with async results."""
+            async for item in self.async_gen:
+                await queue.put(item)
+            await queue.put(None)  # Sentinel to signal completion
+
+        async def _consumer():
+            """Yields items from the queue in sync mode."""
+            task = loop.create_task(_producer())
+            while True:
+                item = await queue.get()
+                if item is None:
+                    break
+                yield item
+            await task  # Ensure producer task completes
+
+        return iter(loop.run_until_complete(self._collect_results()))
+
+    async def _collect_results(self):
+        """Collects async generator results into a list (safe for asyncio.run)."""
+        return [item async for item in self.async_gen]
+
+    def __iter__(self) -> Iterator:
+        """Returns the synchronous iterator."""
+        return self.iterator
+
+    def __next__(self) -> Any:
+        """Returns the next item from the iterator."""
+        return next(self.iterator)
\ No newline at end of file
diff --git a/src/pyspark_msgraph_source/core/base_client.py b/src/pyspark_msgraph_source/core/base_client.py
new file mode 100644
index 0000000..56bc709
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/base_client.py
@@ -0,0 +1,97 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+from msgraph import GraphServiceClient
+from kiota_abstractions.base_request_configuration import RequestConfiguration
+from msgraph.generated.models.o_data_errors.o_data_error import ODataError
+from pyspark_msgraph_source.core.async_interator import AsyncToSyncIterator
+from pyspark_msgraph_source.core.models import BaseResource
+from pyspark_msgraph_source.core.utils import get_python_schema, to_json, to_pyspark_schema
+
+from azure.identity import DefaultAzureCredential, EnvironmentCredential
+
+class BaseResourceProvider(ABC):
+    def __init__(self, options: Dict[str, Any]):
+        """
+        Initializes the fetcher with the Graph client, resource path, and query parameters.
+
+        :param options: Connector options.
+        """
+        self.options = options
+        credentials = DefaultAzureCredential()
+        self.graph_client = GraphServiceClient(credentials=credentials)
+
+    async def fetch_data(self):
+        """
+        Fetches data from Microsoft Graph using the dynamically built request.
+        Handles pagination automatically.
+        """
+        query_parameters_cls = self.resource.get_query_parameters_cls()
+
+        if query_parameters_cls:
+            try:
+                query_parameters_instance = query_parameters_cls()  # Ensure it can be instantiated without arguments
+            except TypeError as e:
+                raise ValueError(f"Failed to instantiate {query_parameters_cls.__name__}: {e}")
+
+            if self.resource.query_params:
+                for k, v in self.resource.query_params.items():
+                    k = k.removeprefix("%24")
+                    if hasattr(query_parameters_instance, k):
+                        setattr(query_parameters_instance, k, v)  # Set attributes dynamically
+                    else:
+                        raise AttributeError(f"{query_parameters_cls.__name__} has no attribute '{k}'")
+                
+        request_configuration = RequestConfiguration(
+            query_parameters=query_parameters_instance
+        )
+        
+        try:
+            builder = self.resource.get_request_builder_cls()(self.graph_client.request_adapter, self.resource.resource_params)
+            items = await builder.get(request_configuration=request_configuration)
+            while True:
+                for item in items.value:
+                    yield item
+                if not items.odata_next_link:
+                    break
+                items = await builder.with_url(items.odata_next_link).get()
+
+        except ODataError as e:
+            raise Exception(f"Graph API Error: {e.error.message}")
+
+    def iter_records(self):
+        """
+            Iterates over records from the Microsoft Graph API.
+
+            :param options: Connector options containing authentication credentials and resource details.
+            :return: A synchronous iterator over the fetched data.
+            :raises ValueError: If any required credentials or resource parameters are missing.
+            :raises GraphAPIError: If the API request fails.
+        """
+        async_gen = self.fetch_data()
+        return AsyncToSyncIterator(async_gen)
+
+    def get_resource_schema(self) -> Dict[str, Any]:
+        """
+        Retrieves the schema of a Microsoft Graph API resource by fetching a single record.
+
+        :param options: Connector options containing authentication credentials and resource details.
+        :return: A dictionary representing the schema of the resource.
+        :raises ValueError: If no records are found or if required options are missing.
+        :raises GraphAPIError: If the API request fails.
+        """
+        async_gen = self.fetch_data()
+
+        try:
+            record = next(AsyncToSyncIterator(async_gen), None)
+            if not record:
+                raise ValueError(f"No records found for resource: {self.resource.resource_name}")
+            record = to_json(record)
+            schema = to_pyspark_schema(get_python_schema(record))
+            return record, schema
+        
+        except StopIteration:
+            raise ValueError(f"No records available for {self.resource.resource_name}")
+    
+    @abstractmethod
+    def resource(self) -> BaseResource:
+        ...
\ No newline at end of file
diff --git a/src/pyspark_msgraph_source/core/constants.py b/src/pyspark_msgraph_source/core/constants.py
new file mode 100644
index 0000000..6a42334
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/constants.py
@@ -0,0 +1,2 @@
+# Base generated package for Microsoft Graph SDK
+MSGRAPH_SDK_PACKAGE = "msgraph.generated"
diff --git a/src/pyspark_msgraph_source/core/models.py b/src/pyspark_msgraph_source/core/models.py
new file mode 100644
index 0000000..7651816
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/models.py
@@ -0,0 +1,177 @@
+from dataclasses import dataclass
+import importlib
+import inspect
+import logging
+import re
+from typing import Any, Dict
+from pyspark_msgraph_source.core.constants import MSGRAPH_SDK_PACKAGE
+from urllib.parse import unquote
+from kiota_abstractions.base_request_builder import BaseRequestBuilder
+
+@dataclass
+class BaseResource:
+    name: str # User friendly name for Spark reader
+    resource_name: str # Microsoft Graph leaf resource name
+    request_builder_module: str
+    query_params: Dict[str, Any] = None
+    resource_params: Dict[str, Any] = None
+    request_builder_cls_name: str = None
+    request_builder_query_cls_name: str = None
+
+    def __post_init__(self):
+        if not self.name:
+            raise ValueError("name is required")
+        
+        self.request_builder_cls_name = self._pascal_case(f"{self.resource_name}_request_builder")
+        #self.request_builder_cls = self.get_request_builder_cls()
+        self.request_builder_query_cls_name = self._pascal_case(f"{self.resource_name}_request_builder_get_query_parameters")
+        #self.query_parameters_cls = self.get_query_parameters_cls()
+        self.parse_url_template()
+
+    
+    @classmethod
+    def _pascal_case(cls, snake_str: str) -> str:
+        """
+        Converts snake_case to PascalCase.
+        Example: "items_request_builder" -> "ItemsRequestBuilder"
+        """
+        return "".join(word.title() for word in snake_str.split("_"))
+    
+    def get_query_parameters_cls(self):
+        """
+        Retrieves the query parameters class from the request builder module.
+        """
+        try:
+            module = importlib.import_module(f"{MSGRAPH_SDK_PACKAGE}.{self.request_builder_module}")
+            request_builder_cls = getattr(module, self.request_builder_cls_name, None)
+
+            if not request_builder_cls or not issubclass(request_builder_cls, BaseRequestBuilder):
+                raise AttributeError(f"{self.request_builder_cls_name} not found in {module.__name__}")
+
+            # Inspect the attributes to find the query parameters class
+
+            for attr in dir(request_builder_cls):
+                if attr == self.request_builder_query_cls_name:
+                    return getattr(request_builder_cls, attr)
+            raise AttributeError(f"{self.request_builder_query_cls_name} not found in {module.__name__}")        
+                    
+        except ModuleNotFoundError:
+            raise ImportError(f"Module {self.request_builder_module} not found in {MSGRAPH_SDK_PACKAGE}")
+
+    def get_request_builder_cls(self) -> BaseRequestBuilder:
+        """
+        Dynamically imports a module and finds the RequestBuilder class.
+        """
+        try:
+            module = importlib.import_module(f"{MSGRAPH_SDK_PACKAGE}.{self.request_builder_module}")
+            for attr in dir(module):
+                if attr == self.request_builder_cls_name:
+                    cls = getattr(module, attr)
+                    if not issubclass(cls, BaseRequestBuilder):
+                        raise AttributeError(f"{attr} is not a subclass of BaseRequestBuilder")
+                    return cls
+        except ImportError:
+            raise ImportError(f"Module {self.request_builder_module} not found in {MSGRAPH_SDK_PACKAGE}")
+    
+    def get_request_builder_url_template(self):
+        """
+        Extracts the `url_template` by analyzing the source code of the class.
+        """
+        try:
+            cls = self.get_request_builder_cls()
+            if inspect.isclass(cls) and hasattr(cls, "__init__"):
+                # Extract the __init__ function source code
+                init_source = inspect.getsource(cls.__init__)
+                if "super().__init__(" in init_source:
+                    lines = init_source.split("\n")
+                    for line in lines:
+                        if "super().__init__(" in line:
+                            match = re.search(r'super\(\).__init__\s*\([^,]+,\s*"([^"]+)"', line)
+                            if match:
+                                url_template = match.group(1).replace('"', "").replace("'", "")
+                            return url_template
+                    
+        except TypeError:
+            raise TypeError(f"Error extracting URL template from {cls.__name__}")
+
+    def parse_url_template(self):
+        """
+        Parses the `url_template` string to extract path parameters and query parameters.
+        """
+        url_template = self.get_request_builder_url_template()
+        if not url_template:
+            raise ValueError("URL template not found in request builder class")
+
+        # Extract path parameters (decode %2Did → _id)
+        path_parameters = [
+            unquote(match.group(1)).replace("%2D", "_")
+            for match in re.finditer(r"\{([^?}]+)\}", url_template)
+            if match.group(1).lower() != "+baseurl"
+        ]
+
+        # Extract query parameters (decode %24expand → $expand)
+        query_match = re.search(r"\{\?([^}]+)\}", url_template)
+        query_parameters = (
+            [unquote(q).replace("%24", "$") for q in query_match.group(1).split(",")]
+            if query_match else []
+        )
+
+        self.resource_params = {k:None for k in path_parameters}
+        self.query_params = {qp.strip().replace("$", ""): None for qp in query_parameters}
+
+    
+    def map_options_to_params(self, options: Dict[str, Any]) -> 'BaseResource':
+        """
+        Maps the provided options to either query parameters or resource parameters.
+
+        :param options: Dictionary of options provided by the user.
+        :param query_params: List of valid query parameter names.
+        :param resource_params: List of valid resource parameter names.
+        :return: A tuple (mapped_query_params, mapped_resource_params, invalid_params)
+        """
+        missing_params = [param for param in self.resource_params if param not in options]
+
+        if missing_params:
+            raise ValueError(f"Missing required resource parameters: {', '.join(missing_params)}")
+
+        # TODO: add max $top value validation.
+        if int(options.get("top", 1)) <= 100:
+            logging.warning("Setting a low `top` value in Microsoft Graph queries can cause high latency and increase throttling risk.")
+
+        mapped_query_params = {"%24"+k: v for k, v in options.items() if k in self.query_params}
+        mapped_resource_params = {k.replace("-", "%2D"): v for k, v in options.items() if k in self.resource_params}
+        
+        invalid_params = {k: v for k, v in options.items() if k not in self.query_params and k not in self.resource_params}
+        
+        if len(invalid_params) > 0:
+            raise ValueError(f"Extra parameters {invalid_params} not allowed.")
+        
+        self.query_params = mapped_query_params
+        self.resource_params = mapped_resource_params
+        
+        return self
+    
+
+
+GUID_PATTERN = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
+
+
+@dataclass
+class ConnectorOptions:
+    """Options for Microsoft Graph API requests with strict resource_path validation."""
+    tenant_id: str
+    client_id: str
+    client_secret: str
+    def __post_init__(self):
+        ...
+        
+    def _validate_credentials(self):
+        """Validates the format and presence of credentials."""
+        if not self.tenant_id or not GUID_PATTERN.match(self.tenant_id):
+            raise ValueError("Invalid tenant_id: must be a valid GUID.")
+        
+        if not self.client_id or not GUID_PATTERN.match(self.client_id):
+            raise ValueError("Invalid client_id: must be a valid GUID.")
+        
+        if not self.client_secret or not isinstance(self.client_secret, str):
+            raise ValueError("Invalid client_secret: must be a non-empty string.")    
\ No newline at end of file
diff --git a/src/pyspark_msgraph_source/core/resource_provider.py b/src/pyspark_msgraph_source/core/resource_provider.py
new file mode 100644
index 0000000..bc72ac6
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/resource_provider.py
@@ -0,0 +1,51 @@
+from functools import lru_cache
+import importlib
+import logging
+import pkgutil
+from typing import Dict, Type
+from pyspark_msgraph_source.core.base_client import BaseResourceProvider
+
+# @lru_cache(maxsize=10)
+def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
+    """
+    Dynamically loads all resource providers from the resources package
+    """
+    providers = {}
+    root_package = __package__.split('.')[0]
+    logging.debug(f"Current root package {root_package}.")
+    
+    package = f'{root_package}.resources'
+    
+    resources_pkg = importlib.import_module(package)
+    
+    for _, name, _ in pkgutil.iter_modules(resources_pkg.__path__):
+        if name != 'base':  # Skip the base module
+            try:
+                module = importlib.import_module(f'{package}.{name}')
+                for attr_name in dir(module):
+                    if attr_name.endswith('ResourceProvider'):
+                        provider_class = getattr(module, attr_name)
+                        if (isinstance(provider_class, type) and 
+                            issubclass(provider_class, BaseResourceProvider) and 
+                            provider_class != BaseResourceProvider):
+                            providers[name] = provider_class
+            except ImportError as e:
+                print(f"Warning: Could not load resource provider {name}: {e}")
+    
+    return frozenset(providers.items())
+
+# @lru_cache(maxsize=10)
+def get_resource_provider(resource_name: str, options: frozenset) -> BaseResourceProvider:
+    """
+    Factory method to get the appropriate resource provider
+    """
+    providers = dict(load_resource_providers())
+    provider_class: BaseResourceProvider = providers.get(resource_name)
+    
+    if not provider_class:
+        available = ', '.join(providers.keys())
+        raise ValueError(
+            f"Unsupported resource name: '{resource_name}'. "
+            f"Available resources: {available}"
+        )
+    return provider_class(dict(options))
\ No newline at end of file
diff --git a/src/pyspark_msgraph_source/core/source.py b/src/pyspark_msgraph_source/core/source.py
new file mode 100644
index 0000000..efa4660
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/source.py
@@ -0,0 +1,53 @@
+import logging
+from typing import Any, Dict, Union
+from pyspark.sql.datasource import DataSource, DataSourceReader
+from pyspark.sql.types import StructType
+from pyspark_msgraph_source.core.base_client import BaseResourceProvider
+
+from pyspark_msgraph_source.core.resource_provider import get_resource_provider
+
+# Reference https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
+
+logger = logging.getLogger(__name__)
+
+class MSGraphDataSource(DataSource):
+    """
+
+    """
+    def __init__(self, options: Dict[str, Any]):
+        
+        self.resource_name = options.pop("resource")
+        if not self.resource_name:
+            raise ValueError("resource is missing, please provide a valid resource name.")
+        self.options = frozenset(options.items())
+        
+    @classmethod
+    def name(cls):
+        return "msgraph"
+ 
+    def schema(self):
+        logger.info("Schema not provided, infering from the source.")
+        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
+        _, schema = resource_provider.get_resource_schema()
+        logger.debug(f"Infered schema : {schema}")
+        return schema
+
+    def reader(self, schema: StructType):
+        return MSGraphDataSourceReader(self.resource_name, self.options, schema)
+
+
+class MSGraphDataSourceReader(DataSourceReader):
+
+    def __init__(self, resource_name :str, options: frozenset, schema: Union[StructType, str]):
+        self.schema: StructType = schema
+        self.options = options
+        self.resource_name = resource_name
+        
+    def read(self, partition):
+        from pyspark_msgraph_source.core.utils import to_json
+        from pyspark.sql import Row
+        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
+        for row in resource_provider.iter_records():
+            row = to_json(row)
+            row_data = {f.name: row.get(f.name, None) for f in self.schema.fields}
+            yield Row(**row_data)
diff --git a/src/pyspark_msgraph_source/core/utils.py b/src/pyspark_msgraph_source/core/utils.py
new file mode 100644
index 0000000..b878c2a
--- /dev/null
+++ b/src/pyspark_msgraph_source/core/utils.py
@@ -0,0 +1,91 @@
+from typing import Any
+from kiota_serialization_json.json_serialization_writer_factory import JsonSerializationWriterFactory
+import json
+
+from pyspark.sql.types import (
+    StructType, StructField, StringType, IntegerType, DoubleType, BooleanType,
+    MapType, ArrayType, TimestampType, DateType, LongType, BinaryType, DecimalType
+)
+
+from datetime import datetime, date
+from decimal import Decimal
+
+# Convert to JSON using Kiota
+writer_factory = JsonSerializationWriterFactory()
+writer = writer_factory.get_serialization_writer("application/json")
+
+def to_json(value):
+    value.serialize(writer)
+    # Get JSON string
+    return json.loads((writer.get_serialized_content().decode("utf-8")))
+
+def to_jsonValue(value):
+    value.serialize(writer)
+    # Get JSON string
+    return str(json.loads((writer.get_serialized_content().decode("utf-8"))))
+
+
+
+def get_python_schema(obj:Any):
+    """
+    Recursively extracts the schema from a Python object.
+
+    :param obj: The Python object (dict, list, int, str, etc.).
+    :return: A schema dictionary representing field types.
+    """
+    if isinstance(obj, bool):
+        return "bool"
+    elif isinstance(obj, dict):
+        return {key: get_python_schema(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        if obj:  # Assume first element type (homogeneous lists)
+            return [get_python_schema(obj[0])]
+        return ["any"]  # Empty lists default to "any"
+    elif isinstance(obj, str):
+        return "str"
+    elif isinstance(obj, int):
+        return "int"
+    elif isinstance(obj, float):
+        return "float"
+    elif isinstance(obj, datetime):
+        return "datetime"
+    elif isinstance(obj, date):
+        return "date"
+    elif isinstance(obj, Decimal):
+        return "decimal"
+    elif obj is None:
+        return "null"
+    return "unknown"  # Fallback for unrecognized types
+
+def to_pyspark_schema(schema_dict):
+    """
+    Recursively converts a nested Python schema dictionary to a PySpark StructType schema.
+
+    :param schema_dict: Dictionary with field names as keys and data types as values.
+    :return: PySpark StructType schema.
+    """
+    type_mapping = {
+        "str": StringType(),
+        "int": IntegerType(),
+        "float": DoubleType(),
+        "bool": BooleanType(),
+        "datetime": TimestampType(),
+        "date": DateType(),
+        "long": LongType(),
+        "binary": BinaryType(),
+        "decimal": DecimalType(38, 18),
+        "unknown": StringType()
+    }
+
+    def convert_type(value):
+        """Recursively converts types, handling nested dicts and lists."""
+        if isinstance(value, dict):  # Nested structure
+            return StructType([StructField(k, convert_type(v), True) for k, v in value.items()])
+        elif isinstance(value, list):  # List of elements (assume first element type)
+            if not value:
+                return ArrayType(StringType())  # Default to list of strings if empty
+            return ArrayType(convert_type(value[0]))
+        return type_mapping.get(value, StringType())  # Default to StringType
+
+    struct_fields = [StructField(field, convert_type(dtype), True) for field, dtype in schema_dict.items()]
+    return StructType(struct_fields)
\ No newline at end of file
diff --git a/src/pyspark_msgraph_source/resources/__init__.py b/src/pyspark_msgraph_source/resources/__init__.py
new file mode 100644
index 0000000..5ee7122
--- /dev/null
+++ b/src/pyspark_msgraph_source/resources/__init__.py
@@ -0,0 +1 @@
+from .list_items import * # type: ignore
\ No newline at end of file
diff --git a/src/pyspark_msgraph_source/resources/list_items.py b/src/pyspark_msgraph_source/resources/list_items.py
new file mode 100644
index 0000000..e3d1293
--- /dev/null
+++ b/src/pyspark_msgraph_source/resources/list_items.py
@@ -0,0 +1,23 @@
+from functools import cached_property
+import logging
+from typing import Dict
+
+from pyspark_msgraph_source.core.base_client import BaseResourceProvider
+from pyspark_msgraph_source.core.models import BaseResource
+
+
+class ListItemsResourceProvider(BaseResourceProvider):
+
+    def __init__(self, options: Dict[str, str]):
+        self.options = options
+        super().__init__(options)
+    
+    @cached_property 
+    def resource(self) -> BaseResource:
+        return BaseResource(
+        name="list_items",
+        resource_name="items",
+        request_builder_module="sites.item.lists.item.items.items_request_builder"
+    ).map_options_to_params(self.options)
+
+

From 13bca5db3f700ca82d6f01d8ae9b1541df55620c Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 13:36:07 +0000
Subject: [PATCH 06/10] init docs

---
 docs/api/core.md                            |   3 +
 docs/api/index.md                           |  14 +
 docs/index.md                               |  17 +
 mkdocs.yml                                  |  18 +
 poetry.lock                                 | 425 +++++++++++++++++++-
 pyproject.toml                              |   3 +
 src/pyspark_msgraph_source/core/__init__.py |   0
 7 files changed, 473 insertions(+), 7 deletions(-)
 create mode 100644 docs/api/core.md
 create mode 100644 docs/api/index.md
 create mode 100644 docs/index.md
 create mode 100644 mkdocs.yml
 create mode 100644 src/pyspark_msgraph_source/core/__init__.py

diff --git a/docs/api/core.md b/docs/api/core.md
new file mode 100644
index 0000000..b5c6a31
--- /dev/null
+++ b/docs/api/core.md
@@ -0,0 +1,3 @@
+# Core Engine
+
+::: pyspark_msgraph_source.core.async_interator
diff --git a/docs/api/index.md b/docs/api/index.md
new file mode 100644
index 0000000..86888cd
--- /dev/null
+++ b/docs/api/index.md
@@ -0,0 +1,14 @@
+# API Reference
+
+Welcome to the API Reference of `your_package`.
+
+Below are the available modules and submodules:
+
+## Core
+- [Core Overview](core.md)
+
+## Utils
+- [Utils Helpers](utils.md)
+
+## API Client
+- [API Client](api_client.md)
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..000ea34
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,17 @@
+# Welcome to MkDocs
+
+For full documentation visit [mkdocs.org](https://www.mkdocs.org).
+
+## Commands
+
+* `mkdocs new [dir-name]` - Create a new project.
+* `mkdocs serve` - Start the live-reloading docs server.
+* `mkdocs build` - Build the documentation site.
+* `mkdocs -h` - Print help message and exit.
+
+## Project layout
+
+    mkdocs.yml    # The configuration file.
+    docs/
+        index.md  # The documentation homepage.
+        ...       # Other markdown pages, images and other files.
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..8109506
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,18 @@
+site_name: Pyspark MSGraph Source
+theme:
+  name: material
+
+plugins:
+  - search
+  - mkdocstrings:
+      handlers:
+        python:
+          paths: ["src/"]  # or wherever your package code is
+          options:
+            show_source: true
+
+nav:
+  - Home: index.md
+  - API Reference:
+      - Overview: api/index.md
+      - Core: api/core.md
diff --git a/poetry.lock b/poetry.lock
index 74e07f5..77f6987 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -240,6 +240,40 @@ msal = ">=1.30.0"
 msal-extensions = ">=1.2.0"
 typing-extensions = ">=4.0.0"
 
+[[package]]
+name = "babel"
+version = "2.17.0"
+description = "Internationalization utilities"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"},
+    {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"},
+]
+
+[package.extras]
+dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""]
+
+[[package]]
+name = "backrefs"
+version = "5.8"
+description = "A wrapper around re and regex that adds additional back references."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "backrefs-5.8-py310-none-any.whl", hash = "sha256:c67f6638a34a5b8730812f5101376f9d41dc38c43f1fdc35cb54700f6ed4465d"},
+    {file = "backrefs-5.8-py311-none-any.whl", hash = "sha256:2e1c15e4af0e12e45c8701bd5da0902d326b2e200cafcd25e49d9f06d44bb61b"},
+    {file = "backrefs-5.8-py312-none-any.whl", hash = "sha256:bbef7169a33811080d67cdf1538c8289f76f0942ff971222a16034da88a73486"},
+    {file = "backrefs-5.8-py313-none-any.whl", hash = "sha256:e3a63b073867dbefd0536425f43db618578528e3896fb77be7141328642a1585"},
+    {file = "backrefs-5.8-py39-none-any.whl", hash = "sha256:a66851e4533fb5b371aa0628e1fee1af05135616b86140c9d787a2ffdf4b8fdc"},
+    {file = "backrefs-5.8.tar.gz", hash = "sha256:2cab642a205ce966af3dd4b38ee36009b31fa9502a35fd61d59ccc116e40a6bd"},
+]
+
+[package.extras]
+extras = ["regex"]
+
 [[package]]
 name = "black"
 version = "25.1.0"
@@ -291,7 +325,7 @@ version = "2025.1.31"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
@@ -396,7 +430,7 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -514,7 +548,6 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["dev"]
-markers = "sys_platform == \"win32\" or platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -821,6 +854,24 @@ files = [
     {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"},
 ]
 
+[[package]]
+name = "ghp-import"
+version = "2.1.0"
+description = "Copy your docs directly to the gh-pages branch."
+optional = false
+python-versions = "*"
+groups = ["dev"]
+files = [
+    {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"},
+    {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"},
+]
+
+[package.dependencies]
+python-dateutil = ">=2.8.1"
+
+[package.extras]
+dev = ["flake8", "markdown", "twine", "wheel"]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.67.0"
@@ -839,6 +890,21 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4
 [package.extras]
 grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
 
+[[package]]
+name = "griffe"
+version = "1.6.0"
+description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "griffe-1.6.0-py3-none-any.whl", hash = "sha256:9f1dfe035d4715a244ed2050dfbceb05b1f470809ed4f6bb10ece5a7302f8dd1"},
+    {file = "griffe-1.6.0.tar.gz", hash = "sha256:eb5758088b9c73ad61c7ac014f3cdfb4c57b5c2fcbfca69996584b702aefa354"},
+]
+
+[package.dependencies]
+colorama = ">=0.4"
+
 [[package]]
 name = "grpcio"
 version = "1.70.0"
@@ -1045,7 +1111,7 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -1197,6 +1263,24 @@ docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alab
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
 
+[[package]]
+name = "jinja2"
+version = "3.1.5"
+description = "A very fast and expressive template engine."
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
+    {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=2.0"
+
+[package.extras]
+i18n = ["Babel (>=2.7)"]
+
 [[package]]
 name = "jupyter-client"
 version = "8.6.3"
@@ -1257,6 +1341,77 @@ files = [
 docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
 testing = ["coverage", "pyyaml"]
 
+[[package]]
+name = "markupsafe"
+version = "3.0.2"
+description = "Safely add untrusted strings to HTML/XML markup."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"},
+    {file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"},
+    {file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"},
+    {file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"},
+    {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"},
+    {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
+]
+
 [[package]]
 name = "matplotlib-inline"
 version = "0.1.7"
@@ -1284,6 +1439,18 @@ files = [
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
 ]
 
+[[package]]
+name = "mergedeep"
+version = "1.3.4"
+description = "A deep merge function for 🐍."
+optional = false
+python-versions = ">=3.6"
+groups = ["dev"]
+files = [
+    {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"},
+    {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
+]
+
 [[package]]
 name = "microsoft-kiota-abstractions"
 version = "1.9.2"
@@ -1398,6 +1565,157 @@ files = [
 [package.dependencies]
 microsoft-kiota-abstractions = ">=1.9.2,<1.10.0"
 
+[[package]]
+name = "mkdocs"
+version = "1.6.1"
+description = "Project documentation with Markdown."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"},
+    {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""}
+ghp-import = ">=1.0"
+jinja2 = ">=2.11.1"
+markdown = ">=3.3.6"
+markupsafe = ">=2.0.1"
+mergedeep = ">=1.3.4"
+mkdocs-get-deps = ">=0.2.0"
+packaging = ">=20.5"
+pathspec = ">=0.11.1"
+pyyaml = ">=5.1"
+pyyaml-env-tag = ">=0.1"
+watchdog = ">=2.0"
+
+[package.extras]
+i18n = ["babel (>=2.9.0)"]
+min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"]
+
+[[package]]
+name = "mkdocs-autorefs"
+version = "1.4.0"
+description = "Automatically link across pages in MkDocs."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "mkdocs_autorefs-1.4.0-py3-none-any.whl", hash = "sha256:bad19f69655878d20194acd0162e29a89c3f7e6365ffe54e72aa3fd1072f240d"},
+    {file = "mkdocs_autorefs-1.4.0.tar.gz", hash = "sha256:a9c0aa9c90edbce302c09d050a3c4cb7c76f8b7b2c98f84a7a05f53d00392156"},
+]
+
+[package.dependencies]
+Markdown = ">=3.3"
+markupsafe = ">=2.0.1"
+mkdocs = ">=1.1"
+
+[[package]]
+name = "mkdocs-get-deps"
+version = "0.2.0"
+description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"},
+    {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"},
+]
+
+[package.dependencies]
+mergedeep = ">=1.3.4"
+platformdirs = ">=2.2.0"
+pyyaml = ">=5.1"
+
+[[package]]
+name = "mkdocs-material"
+version = "9.6.7"
+description = "Documentation that simply works"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "mkdocs_material-9.6.7-py3-none-any.whl", hash = "sha256:8a159e45e80fcaadd9fbeef62cbf928569b93df954d4dc5ba76d46820caf7b47"},
+    {file = "mkdocs_material-9.6.7.tar.gz", hash = "sha256:3e2c1fceb9410056c2d91f334a00cdea3215c28750e00c691c1e46b2a33309b4"},
+]
+
+[package.dependencies]
+babel = ">=2.10,<3.0"
+backrefs = ">=5.7.post1,<6.0"
+colorama = ">=0.4,<1.0"
+jinja2 = ">=3.0,<4.0"
+markdown = ">=3.2,<4.0"
+mkdocs = ">=1.6,<2.0"
+mkdocs-material-extensions = ">=1.3,<2.0"
+paginate = ">=0.5,<1.0"
+pygments = ">=2.16,<3.0"
+pymdown-extensions = ">=10.2,<11.0"
+requests = ">=2.26,<3.0"
+
+[package.extras]
+git = ["mkdocs-git-committers-plugin-2 (>=1.1,<3)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"]
+imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<11.0)"]
+recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"]
+
+[[package]]
+name = "mkdocs-material-extensions"
+version = "1.3.1"
+description = "Extension pack for Python Markdown and MkDocs Material."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"},
+    {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"},
+]
+
+[[package]]
+name = "mkdocstrings"
+version = "0.28.2"
+description = "Automatic documentation from sources, for MkDocs."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "mkdocstrings-0.28.2-py3-none-any.whl", hash = "sha256:57f79c557e2718d217d6f6a81bf75a0de097f10e922e7e5e00f085c3f0ff6895"},
+    {file = "mkdocstrings-0.28.2.tar.gz", hash = "sha256:9b847266d7a588ea76a8385eaebe1538278b4361c0d1ce48ed005be59f053569"},
+]
+
+[package.dependencies]
+Jinja2 = ">=2.11.1"
+Markdown = ">=3.6"
+MarkupSafe = ">=1.1"
+mkdocs = ">=1.4"
+mkdocs-autorefs = ">=1.4"
+mkdocs-get-deps = ">=0.2"
+mkdocstrings-python = {version = ">=0.5.2", optional = true, markers = "extra == \"python\""}
+pymdown-extensions = ">=6.3"
+
+[package.extras]
+crystal = ["mkdocstrings-crystal (>=0.3.4)"]
+python = ["mkdocstrings-python (>=0.5.2)"]
+python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"]
+
+[[package]]
+name = "mkdocstrings-python"
+version = "1.16.2"
+description = "A Python handler for mkdocstrings."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "mkdocstrings_python-1.16.2-py3-none-any.whl", hash = "sha256:ff7e719404e59ad1a72f1afbe854769984c889b8fa043c160f6c988e1ad9e966"},
+    {file = "mkdocstrings_python-1.16.2.tar.gz", hash = "sha256:942ec1a2e0481d28f96f93be3d6e343cab92a21e5baf01c37dd2d7236c4d0bd7"},
+]
+
+[package.dependencies]
+griffe = ">=0.49"
+mkdocs-autorefs = ">=1.4"
+mkdocstrings = ">=0.28.2"
+
 [[package]]
 name = "msal"
 version = "1.31.1"
@@ -1795,6 +2113,22 @@ files = [
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
 ]
 
+[[package]]
+name = "paginate"
+version = "0.5.7"
+description = "Divides large result sets into pages for easier browsing"
+optional = false
+python-versions = "*"
+groups = ["dev"]
+files = [
+    {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"},
+    {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"},
+]
+
+[package.extras]
+dev = ["pytest", "tox"]
+lint = ["black"]
+
 [[package]]
 name = "pandas"
 version = "2.2.3"
@@ -2330,6 +2664,25 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
+[[package]]
+name = "pymdown-extensions"
+version = "10.14.3"
+description = "Extension pack for Python Markdown."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pymdown_extensions-10.14.3-py3-none-any.whl", hash = "sha256:05e0bee73d64b9c71a4ae17c72abc2f700e8bc8403755a00580b49a4e9f189e9"},
+    {file = "pymdown_extensions-10.14.3.tar.gz", hash = "sha256:41e576ce3f5d650be59e900e4ceff231e0aed2a88cf30acaee41e02f063a061b"},
+]
+
+[package.dependencies]
+markdown = ">=3.6"
+pyyaml = "*"
+
+[package.extras]
+extra = ["pygments (>=2.19.1)"]
+
 [[package]]
 name = "pyspark"
 version = "4.0.0.dev2"
@@ -2491,6 +2844,21 @@ files = [
     {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
 ]
 
+[[package]]
+name = "pyyaml-env-tag"
+version = "0.1"
+description = "A custom YAML tag for referencing environment variables in YAML files. "
+optional = false
+python-versions = ">=3.6"
+groups = ["dev"]
+files = [
+    {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"},
+    {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"},
+]
+
+[package.dependencies]
+pyyaml = "*"
+
 [[package]]
 name = "pyzmq"
 version = "26.2.1"
@@ -2619,7 +2987,7 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -2758,7 +3126,7 @@ version = "2.3.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
     {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
@@ -2791,6 +3159,49 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
 
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+description = "Filesystem events monitoring"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"},
+    {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"},
+    {file = "watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3"},
+    {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c"},
+    {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"},
+    {file = "watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c"},
+    {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"},
+    {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"},
+    {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"},
+    {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"},
+    {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"},
+    {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"},
+    {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8"},
+    {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a"},
+    {file = "watchdog-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c"},
+    {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881"},
+    {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11"},
+    {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa"},
+    {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"},
+    {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"},
+    {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"},
+    {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"},
+    {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"},
+]
+
+[package.extras]
+watchmedo = ["PyYAML (>=3.10)"]
+
 [[package]]
 name = "wcwidth"
 version = "0.2.13"
@@ -3012,4 +3423,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4"
-content-hash = "0cdc9d351347552e7a8e246e9d9663b1e1c11fa0b87a86cdafdcecfeff21fb83"
+content-hash = "24c46e7ab41949a8b9dd45260a7c6725f13c1133550548e3010cf1bd30f5a2e6"
diff --git a/pyproject.toml b/pyproject.toml
index 0838f00..b37169b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,9 @@ grpcio-status = "^1.60.1"
 pandas = "^2.2.0"
 ipykernel = "^6.29.5"
 markdown = "^3.7"
+mkdocs = "^1.6.1"
+mkdocs-material = "^9.6.7"
+mkdocstrings = {extras = ["python"], version = "^0.28.2"}
 
 [build-system]
 requires = ["poetry-core>=2.0.0,<3.0.0"]
diff --git a/src/pyspark_msgraph_source/core/__init__.py b/src/pyspark_msgraph_source/core/__init__.py
new file mode 100644
index 0000000..e69de29

From 446257ada06efedf63949eec4bb6e49f43903c41 Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 14:22:06 +0000
Subject: [PATCH 07/10] add core docs

---
 docs/api/core.md                              |   3 -
 docs/api/core/async-iterator.md               |   3 +
 docs/api/core/client.md                       |   3 +
 docs/api/core/models.md                       |   3 +
 docs/api/core/resource-provider.md            |   3 +
 docs/api/core/utils.md                        |   3 +
 mkdocs.yml                                    |  14 +-
 .../{async_interator.py => async_iterator.py} |   2 +
 .../core/base_client.py                       | 113 +++++++++++---
 src/pyspark_msgraph_source/core/models.py     | 146 ++++++++++++------
 .../core/resource_provider.py                 |  59 +++++--
 src/pyspark_msgraph_source/core/source.py     | 107 +++++++++++--
 src/pyspark_msgraph_source/core/utils.py      |  89 ++++++++---
 13 files changed, 415 insertions(+), 133 deletions(-)
 delete mode 100644 docs/api/core.md
 create mode 100644 docs/api/core/async-iterator.md
 create mode 100644 docs/api/core/client.md
 create mode 100644 docs/api/core/models.md
 create mode 100644 docs/api/core/resource-provider.md
 create mode 100644 docs/api/core/utils.md
 rename src/pyspark_msgraph_source/core/{async_interator.py => async_iterator.py} (94%)

diff --git a/docs/api/core.md b/docs/api/core.md
deleted file mode 100644
index b5c6a31..0000000
--- a/docs/api/core.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Core Engine
-
-::: pyspark_msgraph_source.core.async_interator
diff --git a/docs/api/core/async-iterator.md b/docs/api/core/async-iterator.md
new file mode 100644
index 0000000..d280bce
--- /dev/null
+++ b/docs/api/core/async-iterator.md
@@ -0,0 +1,3 @@
+# Core Engine
+
+::: pyspark_msgraph_source.core.async_iterator
diff --git a/docs/api/core/client.md b/docs/api/core/client.md
new file mode 100644
index 0000000..1b406a9
--- /dev/null
+++ b/docs/api/core/client.md
@@ -0,0 +1,3 @@
+# Core Engine
+
+::: pyspark_msgraph_source.core.base_client
diff --git a/docs/api/core/models.md b/docs/api/core/models.md
new file mode 100644
index 0000000..396ba36
--- /dev/null
+++ b/docs/api/core/models.md
@@ -0,0 +1,3 @@
+# Core Engine
+
+::: pyspark_msgraph_source.core.models
diff --git a/docs/api/core/resource-provider.md b/docs/api/core/resource-provider.md
new file mode 100644
index 0000000..ba837e5
--- /dev/null
+++ b/docs/api/core/resource-provider.md
@@ -0,0 +1,3 @@
+# Core Engine
+
+::: pyspark_msgraph_source.core.resource_provider
diff --git a/docs/api/core/utils.md b/docs/api/core/utils.md
new file mode 100644
index 0000000..231bd80
--- /dev/null
+++ b/docs/api/core/utils.md
@@ -0,0 +1,3 @@
+# Core Engine
+
+::: pyspark_msgraph_source.core.utils
diff --git a/mkdocs.yml b/mkdocs.yml
index 8109506..ae842f2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: Pyspark MSGraph Source
+site_name: PySpark MSGraph Source
 theme:
   name: material
 
@@ -7,12 +7,18 @@ plugins:
   - mkdocstrings:
       handlers:
         python:
-          paths: ["src/"]  # or wherever your package code is
+          paths: ["src/"]
           options:
-            show_source: true
+            show_source: false
 
 nav:
   - Home: index.md
   - API Reference:
       - Overview: api/index.md
-      - Core: api/core.md
+      - Core:
+          - Source: api/core/source.md
+          - Base Client: api/core/client.md
+          - Resource Provider: api/core/resource-provider.md
+          - Models: api/core/models.md
+          - Async Iterator: api/core/async-iterator.md
+          - Utils: api/core/utils.md
diff --git a/src/pyspark_msgraph_source/core/async_interator.py b/src/pyspark_msgraph_source/core/async_iterator.py
similarity index 94%
rename from src/pyspark_msgraph_source/core/async_interator.py
rename to src/pyspark_msgraph_source/core/async_iterator.py
index b2c121a..4216dd9 100644
--- a/src/pyspark_msgraph_source/core/async_interator.py
+++ b/src/pyspark_msgraph_source/core/async_iterator.py
@@ -9,6 +9,8 @@
 class AsyncToSyncIterator:
     """
     Converts an async generator into a synchronous iterator while ensuring proper event loop handling.
+    
+    This is required because Microsoft Graph SDK for Python(https://github.com/microsoftgraph/msgraph-sdk-python) is async first.
     """
 
     def __init__(self, async_gen: AsyncGenerator[Any, None]):
diff --git a/src/pyspark_msgraph_source/core/base_client.py b/src/pyspark_msgraph_source/core/base_client.py
index 56bc709..ec4b162 100644
--- a/src/pyspark_msgraph_source/core/base_client.py
+++ b/src/pyspark_msgraph_source/core/base_client.py
@@ -3,18 +3,47 @@
 from msgraph import GraphServiceClient
 from kiota_abstractions.base_request_configuration import RequestConfiguration
 from msgraph.generated.models.o_data_errors.o_data_error import ODataError
-from pyspark_msgraph_source.core.async_interator import AsyncToSyncIterator
+from pyspark_msgraph_source.core.async_iterator import AsyncToSyncIterator
 from pyspark_msgraph_source.core.models import BaseResource
 from pyspark_msgraph_source.core.utils import get_python_schema, to_json, to_pyspark_schema
 
-from azure.identity import DefaultAzureCredential, EnvironmentCredential
+from azure.identity import DefaultAzureCredential
+
 
 class BaseResourceProvider(ABC):
+    """
+        Abstract base class to handle fetching data from Microsoft Graph API and 
+        provide schema extraction for resources.
+    """
+
     def __init__(self, options: Dict[str, Any]):
-        """
-        Initializes the fetcher with the Graph client, resource path, and query parameters.
+        """ 
+            Initializes the resource provider with Graph client and options.
+
+            This sets up the Microsoft Graph client using `DefaultAzureCredential`,
+            which automatically handles Azure Active Directory (AAD) authentication
+            by trying multiple credential types in a fixed order, such as:
+
+                - Environment variables
+                - Managed Identity (for Azure-hosted environments)
+                - Azure CLI credentials
+                - Visual Studio Code login
+                - Interactive browser login (if applicable)
+            
+            This allows seamless local development and production deployments
+            without code changes to the authentication mechanism.
+
+            See Also:
+                defaultazurecredential:
+                https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential
 
-        :param options: Connector options.
+            Args:
+                options (Dict[str, Any]): Connector options including authentication 
+                    details and resource configurations.
+
+            Raises:
+                CredentialUnavailableError: If no valid credentials are found during
+                    authentication.
         """
         self.options = options
         credentials = DefaultAzureCredential()
@@ -22,14 +51,26 @@ def __init__(self, options: Dict[str, Any]):
 
     async def fetch_data(self):
         """
-        Fetches data from Microsoft Graph using the dynamically built request.
-        Handles pagination automatically.
+            Asynchronously fetches data from Microsoft Graph API with automatic 
+            pagination handling.
+
+            Yields:
+                Any: Each record fetched from the API.
+
+            Raises:
+                ValueError: If the resource query parameters cannot be instantiated.
+                AttributeError: If invalid query parameters are provided.
+                Exception: If a Graph API error occurs.
+
+            Example:
+                async for record in provider.fetch_data():
+                    print(record)
         """
         query_parameters_cls = self.resource.get_query_parameters_cls()
 
         if query_parameters_cls:
             try:
-                query_parameters_instance = query_parameters_cls()  # Ensure it can be instantiated without arguments
+                query_parameters_instance = query_parameters_cls()
             except TypeError as e:
                 raise ValueError(f"Failed to instantiate {query_parameters_cls.__name__}: {e}")
 
@@ -37,16 +78,19 @@ async def fetch_data(self):
                 for k, v in self.resource.query_params.items():
                     k = k.removeprefix("%24")
                     if hasattr(query_parameters_instance, k):
-                        setattr(query_parameters_instance, k, v)  # Set attributes dynamically
+                        setattr(query_parameters_instance, k, v)
                     else:
                         raise AttributeError(f"{query_parameters_cls.__name__} has no attribute '{k}'")
-                
+
         request_configuration = RequestConfiguration(
             query_parameters=query_parameters_instance
         )
-        
+
         try:
-            builder = self.resource.get_request_builder_cls()(self.graph_client.request_adapter, self.resource.resource_params)
+            builder = self.resource.get_request_builder_cls()(
+                self.graph_client.request_adapter, 
+                self.resource.resource_params
+            )
             items = await builder.get(request_configuration=request_configuration)
             while True:
                 for item in items.value:
@@ -60,24 +104,36 @@ async def fetch_data(self):
 
     def iter_records(self):
         """
-            Iterates over records from the Microsoft Graph API.
+            Provides a synchronous iterator over records from the Microsoft Graph API.
+
+            Returns:
+                Iterator[Any]: Synchronous iterator over the fetched records.
+
+            Raises:
+                ValueError: If required credentials or resource parameters are missing.
+                Exception: If the API request fails.
 
-            :param options: Connector options containing authentication credentials and resource details.
-            :return: A synchronous iterator over the fetched data.
-            :raises ValueError: If any required credentials or resource parameters are missing.
-            :raises GraphAPIError: If the API request fails.
+            Example:
+                for record in provider.iter_records():
+                    print(record)
         """
         async_gen = self.fetch_data()
         return AsyncToSyncIterator(async_gen)
 
     def get_resource_schema(self) -> Dict[str, Any]:
         """
-        Retrieves the schema of a Microsoft Graph API resource by fetching a single record.
+            Retrieves the schema of a Microsoft Graph API resource by sampling a record.
+
+            Returns:
+                Tuple[Dict[str, Any], StructType]: A tuple containing the sample record 
+                and its corresponding PySpark schema.
+
+            Raises:
+                ValueError: If no records are found or required options are missing.
+                Exception: If the API request fails.
 
-        :param options: Connector options containing authentication credentials and resource details.
-        :return: A dictionary representing the schema of the resource.
-        :raises ValueError: If no records are found or if required options are missing.
-        :raises GraphAPIError: If the API request fails.
+            Example:
+                record, schema = provider.get_resource_schema()
         """
         async_gen = self.fetch_data()
 
@@ -88,10 +144,17 @@ def get_resource_schema(self) -> Dict[str, Any]:
             record = to_json(record)
             schema = to_pyspark_schema(get_python_schema(record))
             return record, schema
-        
+
         except StopIteration:
             raise ValueError(f"No records available for {self.resource.resource_name}")
-    
+
     @abstractmethod
     def resource(self) -> BaseResource:
-        ...
\ No newline at end of file
+        """
+            Abstract property that must be implemented to provide the resource 
+            configuration.
+
+            Returns:
+                BaseResource: The resource definition to use for fetching data.
+        """
+        ...
diff --git a/src/pyspark_msgraph_source/core/models.py b/src/pyspark_msgraph_source/core/models.py
index 7651816..7ee6357 100644
--- a/src/pyspark_msgraph_source/core/models.py
+++ b/src/pyspark_msgraph_source/core/models.py
@@ -8,10 +8,24 @@
 from urllib.parse import unquote
 from kiota_abstractions.base_request_builder import BaseRequestBuilder
 
+
 @dataclass
 class BaseResource:
-    name: str # User friendly name for Spark reader
-    resource_name: str # Microsoft Graph leaf resource name
+    """
+    Represents a resource from Microsoft Graph API, such as list_items, users, etc.
+
+    Attributes:
+        name (str): User-friendly name for the Spark reader.
+        resource_name (str): Microsoft Graph leaf resource name (e.g., users, items).
+        request_builder_module (str): Module path of the request builder class from the MSGraph Python SDK.
+        query_params (Dict[str, Any], optional): Extracted query parameters from the URL template.
+        resource_params (Dict[str, Any], optional): Extracted path parameters from the URL template.
+        request_builder_cls_name (str, optional): PascalCase name of the request builder class.
+        request_builder_query_cls_name (str, optional): PascalCase name of the request builder's query parameters class.
+    """
+
+    name: str
+    resource_name: str
     request_builder_module: str
     query_params: Dict[str, Any] = None
     resource_params: Dict[str, Any] = None
@@ -19,27 +33,42 @@ class BaseResource:
     request_builder_query_cls_name: str = None
 
     def __post_init__(self):
+        """
+        Initializes derived attributes and parses the URL template.
+
+        Raises:
+            ValueError: If the 'name' attribute is not provided.
+        """
         if not self.name:
             raise ValueError("name is required")
-        
+
         self.request_builder_cls_name = self._pascal_case(f"{self.resource_name}_request_builder")
-        #self.request_builder_cls = self.get_request_builder_cls()
         self.request_builder_query_cls_name = self._pascal_case(f"{self.resource_name}_request_builder_get_query_parameters")
-        #self.query_parameters_cls = self.get_query_parameters_cls()
         self.parse_url_template()
 
-    
     @classmethod
     def _pascal_case(cls, snake_str: str) -> str:
         """
-        Converts snake_case to PascalCase.
-        Example: "items_request_builder" -> "ItemsRequestBuilder"
+        Converts a snake_case string to PascalCase.
+
+        Args:
+            snake_str (str): The snake_case string to convert.
+
+        Returns:
+            str: PascalCase formatted string.
         """
         return "".join(word.title() for word in snake_str.split("_"))
-    
+
     def get_query_parameters_cls(self):
         """
         Retrieves the query parameters class from the request builder module.
+
+        Returns:
+            Any: Query parameters class object.
+
+        Raises:
+            ImportError: If the request builder module is not found.
+            AttributeError: If the required class is not found.
         """
         try:
             module = importlib.import_module(f"{MSGRAPH_SDK_PACKAGE}.{self.request_builder_module}")
@@ -48,19 +77,24 @@ def get_query_parameters_cls(self):
             if not request_builder_cls or not issubclass(request_builder_cls, BaseRequestBuilder):
                 raise AttributeError(f"{self.request_builder_cls_name} not found in {module.__name__}")
 
-            # Inspect the attributes to find the query parameters class
-
             for attr in dir(request_builder_cls):
                 if attr == self.request_builder_query_cls_name:
                     return getattr(request_builder_cls, attr)
-            raise AttributeError(f"{self.request_builder_query_cls_name} not found in {module.__name__}")        
-                    
+            raise AttributeError(f"{self.request_builder_query_cls_name} not found in {module.__name__}")
+
         except ModuleNotFoundError:
             raise ImportError(f"Module {self.request_builder_module} not found in {MSGRAPH_SDK_PACKAGE}")
 
     def get_request_builder_cls(self) -> BaseRequestBuilder:
         """
-        Dynamically imports a module and finds the RequestBuilder class.
+        Dynamically imports a module and retrieves the request builder class.
+
+        Returns:
+            BaseRequestBuilder: The request builder class.
+
+        Raises:
+            ImportError: If the module is not found.
+            AttributeError: If the class is not valid.
         """
         try:
             module = importlib.import_module(f"{MSGRAPH_SDK_PACKAGE}.{self.request_builder_module}")
@@ -72,85 +106,89 @@ def get_request_builder_cls(self) -> BaseRequestBuilder:
                     return cls
         except ImportError:
             raise ImportError(f"Module {self.request_builder_module} not found in {MSGRAPH_SDK_PACKAGE}")
-    
+
     def get_request_builder_url_template(self):
         """
-        Extracts the `url_template` by analyzing the source code of the class.
+        Extracts the URL template from the request builder class's __init__ method.
+
+        Returns:
+            str: URL template string.
+
+        Raises:
+            TypeError: If the URL template cannot be extracted.
         """
         try:
             cls = self.get_request_builder_cls()
             if inspect.isclass(cls) and hasattr(cls, "__init__"):
-                # Extract the __init__ function source code
                 init_source = inspect.getsource(cls.__init__)
                 if "super().__init__(" in init_source:
-                    lines = init_source.split("\n")
-                    for line in lines:
+                    for line in init_source.split("\n"):
                         if "super().__init__(" in line:
                             match = re.search(r'super\(\).__init__\s*\([^,]+,\s*"([^"]+)"', line)
                             if match:
-                                url_template = match.group(1).replace('"', "").replace("'", "")
-                            return url_template
-                    
+                                return match.group(1).replace('"', "").replace("'", "")
         except TypeError:
             raise TypeError(f"Error extracting URL template from {cls.__name__}")
 
     def parse_url_template(self):
         """
-        Parses the `url_template` string to extract path parameters and query parameters.
+        Parses the URL template to extract path and query parameters.
+
+        Raises:
+            ValueError: If the URL template is not found.
         """
         url_template = self.get_request_builder_url_template()
         if not url_template:
             raise ValueError("URL template not found in request builder class")
 
-        # Extract path parameters (decode %2Did → _id)
         path_parameters = [
             unquote(match.group(1)).replace("%2D", "_")
             for match in re.finditer(r"\{([^?}]+)\}", url_template)
             if match.group(1).lower() != "+baseurl"
         ]
 
-        # Extract query parameters (decode %24expand → $expand)
         query_match = re.search(r"\{\?([^}]+)\}", url_template)
         query_parameters = (
             [unquote(q).replace("%24", "$") for q in query_match.group(1).split(",")]
             if query_match else []
         )
 
-        self.resource_params = {k:None for k in path_parameters}
+        self.resource_params = {k: None for k in path_parameters}
         self.query_params = {qp.strip().replace("$", ""): None for qp in query_parameters}
 
-    
     def map_options_to_params(self, options: Dict[str, Any]) -> 'BaseResource':
         """
-        Maps the provided options to either query parameters or resource parameters.
+        Maps provided options to valid query and resource parameters.
+
+        Args:
+            options (Dict[str, Any]): User-provided options.
 
-        :param options: Dictionary of options provided by the user.
-        :param query_params: List of valid query parameter names.
-        :param resource_params: List of valid resource parameter names.
-        :return: A tuple (mapped_query_params, mapped_resource_params, invalid_params)
+        Returns:
+            BaseResource: Updated instance with mapped parameters.
+
+        Raises:
+            ValueError: If required resource parameters are missing or extra parameters are provided.
         """
         missing_params = [param for param in self.resource_params if param not in options]
 
         if missing_params:
             raise ValueError(f"Missing required resource parameters: {', '.join(missing_params)}")
 
-        # TODO: add max $top value validation.
         if int(options.get("top", 1)) <= 100:
             logging.warning("Setting a low `top` value in Microsoft Graph queries can cause high latency and increase throttling risk.")
 
-        mapped_query_params = {"%24"+k: v for k, v in options.items() if k in self.query_params}
+        mapped_query_params = {"%24" + k: v for k, v in options.items() if k in self.query_params}
         mapped_resource_params = {k.replace("-", "%2D"): v for k, v in options.items() if k in self.resource_params}
-        
+
         invalid_params = {k: v for k, v in options.items() if k not in self.query_params and k not in self.resource_params}
-        
-        if len(invalid_params) > 0:
+
+        if invalid_params:
             raise ValueError(f"Extra parameters {invalid_params} not allowed.")
-        
+
         self.query_params = mapped_query_params
         self.resource_params = mapped_resource_params
-        
+
         return self
-    
 
 
 GUID_PATTERN = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
@@ -158,20 +196,26 @@ def map_options_to_params(self, options: Dict[str, Any]) -> 'BaseResource':
 
 @dataclass
 class ConnectorOptions:
-    """Options for Microsoft Graph API requests with strict resource_path validation."""
+    """
+    Options for Microsoft Graph API requests with strict credential validation.
+
+    Attributes:
+        tenant_id (str): Azure tenant ID (GUID).
+        client_id (str): Azure client ID (GUID).
+        client_secret (str): Azure client secret.
+    """
     tenant_id: str
     client_id: str
     client_secret: str
+
     def __post_init__(self):
         ...
-        
+
     def _validate_credentials(self):
-        """Validates the format and presence of credentials."""
-        if not self.tenant_id or not GUID_PATTERN.match(self.tenant_id):
-            raise ValueError("Invalid tenant_id: must be a valid GUID.")
-        
-        if not self.client_id or not GUID_PATTERN.match(self.client_id):
-            raise ValueError("Invalid client_id: must be a valid GUID.")
-        
-        if not self.client_secret or not isinstance(self.client_secret, str):
-            raise ValueError("Invalid client_secret: must be a non-empty string.")    
\ No newline at end of file
+        """
+        Validates the format and presence of credentials.
+
+        Raises:
+            ValueError: If any credential is invalid or missing.
+        """
+        ...
diff --git a/src/pyspark_msgraph_source/core/resource_provider.py b/src/pyspark_msgraph_source/core/resource_provider.py
index bc72ac6..468e5d1 100644
--- a/src/pyspark_msgraph_source/core/resource_provider.py
+++ b/src/pyspark_msgraph_source/core/resource_provider.py
@@ -5,19 +5,34 @@
 from typing import Dict, Type
 from pyspark_msgraph_source.core.base_client import BaseResourceProvider
 
+
 # @lru_cache(maxsize=10)
 def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
     """
-    Dynamically loads all resource providers from the resources package
+    Dynamically loads all resource providers from the `resources` package.
+
+    This function scans the `resources` subpackage of the current root package,
+    discovers all modules (excluding `base.py`), and imports any classes ending
+    with `ResourceProvider` that are subclasses of `BaseResourceProvider`.
+
+    This allows dynamic discovery and registration of new resource providers
+    without requiring explicit imports.
+
+    Returns:
+        Dict[str, Type[BaseResourceProvider]]: A dictionary mapping resource
+        names (module names) to their corresponding resource provider classes.
+
+    Example:
+        providers = load_resource_providers()
+        print(providers.keys())
     """
     providers = {}
     root_package = __package__.split('.')[0]
     logging.debug(f"Current root package {root_package}.")
-    
+
     package = f'{root_package}.resources'
-    
     resources_pkg = importlib.import_module(package)
-    
+
     for _, name, _ in pkgutil.iter_modules(resources_pkg.__path__):
         if name != 'base':  # Skip the base module
             try:
@@ -25,27 +40,49 @@ def load_resource_providers() -> Dict[str, Type[BaseResourceProvider]]:
                 for attr_name in dir(module):
                     if attr_name.endswith('ResourceProvider'):
                         provider_class = getattr(module, attr_name)
-                        if (isinstance(provider_class, type) and 
-                            issubclass(provider_class, BaseResourceProvider) and 
+                        if (isinstance(provider_class, type) and
+                            issubclass(provider_class, BaseResourceProvider) and
                             provider_class != BaseResourceProvider):
                             providers[name] = provider_class
             except ImportError as e:
                 print(f"Warning: Could not load resource provider {name}: {e}")
-    
-    return frozenset(providers.items())
+
+    return providers
+
 
 # @lru_cache(maxsize=10)
 def get_resource_provider(resource_name: str, options: frozenset) -> BaseResourceProvider:
     """
-    Factory method to get the appropriate resource provider
+    Factory method to retrieve the appropriate resource provider based on its name.
+
+    This function looks up the resource provider class registered in
+    `load_resource_providers()`, instantiates it with the provided options,
+    and returns the instance.
+
+    Args:
+        resource_name (str): The name of the resource (typically the module name).
+        options (frozenset): A frozenset of key-value pairs representing the
+            configuration options for the provider.
+
+    Returns:
+        BaseResourceProvider: An instance of the corresponding resource provider.
+
+    Raises:
+        ValueError: If the requested resource name is not found in the
+            available providers.
+
+    Example:
+        provider = get_resource_provider('users', frozenset({'tenant_id': 'xxx'}.items()))
+        for record in provider.iter_records():
+            print(record)
     """
     providers = dict(load_resource_providers())
     provider_class: BaseResourceProvider = providers.get(resource_name)
-    
+
     if not provider_class:
         available = ', '.join(providers.keys())
         raise ValueError(
             f"Unsupported resource name: '{resource_name}'. "
             f"Available resources: {available}"
         )
-    return provider_class(dict(options))
\ No newline at end of file
+    return provider_class(dict(options))
diff --git a/src/pyspark_msgraph_source/core/source.py b/src/pyspark_msgraph_source/core/source.py
index efa4660..c5e85a6 100644
--- a/src/pyspark_msgraph_source/core/source.py
+++ b/src/pyspark_msgraph_source/core/source.py
@@ -1,52 +1,127 @@
 import logging
-from typing import Any, Dict, Union
+from typing import Any, Dict, Iterator, Tuple, Union
 from pyspark.sql.datasource import DataSource, DataSourceReader
 from pyspark.sql.types import StructType
 from pyspark_msgraph_source.core.base_client import BaseResourceProvider
-
 from pyspark_msgraph_source.core.resource_provider import get_resource_provider
 
-# Reference https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
+# Reference: https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
 
 logger = logging.getLogger(__name__)
 
+
 class MSGraphDataSource(DataSource):
     """
+    A custom PySpark DataSource implementation to read data from Microsoft Graph API.
+
+    This datasource uses dynamic resource providers to connect to different
+    Microsoft Graph resources based on the `resource` option.
+
+    If schema inference is required, it fetches sample data to infer the schema.
+
+    See Also:
+        Databricks PySpark DataSource API: 
+        https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources
+
+    Args:
+        options (Dict[str, Any]): Connector options, including the required 
+            `resource` name and authentication parameters.
+
+    Raises:
+        ValueError: If the `resource` option is missing.
 
+    Example:
+        df = spark.read.format("msgraph") \
+            .option("resource", "list_items") \
+            .option("site-id", "<site-id>") \
+            .option("list-id", "<list-id>") \
+            .option("top", 999) \
+            .option("expand", "fields") \
+            .load()
+
+        df.show()
     """
+
     def __init__(self, options: Dict[str, Any]):
-        
-        self.resource_name = options.pop("resource")
+        self.resource_name = options.pop("resource", None)
         if not self.resource_name:
             raise ValueError("resource is missing, please provide a valid resource name.")
         self.options = frozenset(options.items())
-        
+
     @classmethod
-    def name(cls):
+    def name(cls) -> str:
+        """
+        Returns the registered name of the DataSource.
+
+        Returns:
+            str: The name of the DataSource, "msgraph".
+        """
         return "msgraph"
- 
+
     def schema(self):
-        logger.info("Schema not provided, infering from the source.")
-        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
+        """
+        Infers the schema of the Microsoft Graph resource.
+
+        This will call the corresponding resource provider to fetch a sample
+        record and determine its schema.
+
+        Returns:
+            StructType: The inferred schema of the resource.
+        """
+        logger.info("Schema not provided, inferring from the source.")
+        resource_provider: BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
         _, schema = resource_provider.get_resource_schema()
-        logger.debug(f"Infered schema : {schema}")
+        logger.debug(f"Inferred schema: {schema}")
         return schema
 
-    def reader(self, schema: StructType):
+    def reader(self, schema: StructType) -> "MSGraphDataSourceReader":
+        """
+        Provides the DataSourceReader to read data.
+
+        Args:
+            schema (StructType): The schema to apply to the records.
+
+        Returns:
+            MSGraphDataSourceReader: The configured reader for this resource.
+        """
         return MSGraphDataSourceReader(self.resource_name, self.options, schema)
 
 
 class MSGraphDataSourceReader(DataSourceReader):
+    """
+    A DataSourceReader to fetch records from a Microsoft Graph resource.
+
+    This reader uses the resource provider to iterate over records and
+    yields rows compatible with the provided schema.
+
+    Args:
+        resource_name (str): The name of the Microsoft Graph resource.
+        options (frozenset): Connector options.
+        schema (Union[StructType, str]): The schema to apply to the records.
+    """
 
-    def __init__(self, resource_name :str, options: frozenset, schema: Union[StructType, str]):
+    def __init__(self, resource_name: str, options: frozenset, schema: Union[StructType, str]):
         self.schema: StructType = schema
         self.options = options
         self.resource_name = resource_name
-        
-    def read(self, partition):
+
+    def read(self, partition) -> Union[Iterator[Tuple], Iterator["RecordBatch"]]: # type: ignore
+        """
+        Reads records from the Microsoft Graph API.
+
+        For each record fetched from the resource provider, it transforms
+        the record into a PySpark Row object matching the schema.
+
+        Args:
+            partition: Unused in this implementation (for future partitioning support).
+
+        Yields:
+            Row: A PySpark Row object for each record.
+        """
         from pyspark_msgraph_source.core.utils import to_json
         from pyspark.sql import Row
-        resource_provider:BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
+
+        resource_provider: BaseResourceProvider = get_resource_provider(self.resource_name, self.options)
         for row in resource_provider.iter_records():
             row = to_json(row)
             row_data = {f.name: row.get(f.name, None) for f in self.schema.fields}
diff --git a/src/pyspark_msgraph_source/core/utils.py b/src/pyspark_msgraph_source/core/utils.py
index b878c2a..7f9d658 100644
--- a/src/pyspark_msgraph_source/core/utils.py
+++ b/src/pyspark_msgraph_source/core/utils.py
@@ -1,10 +1,10 @@
-from typing import Any
+from typing import Any, Dict, List, Union
 from kiota_serialization_json.json_serialization_writer_factory import JsonSerializationWriterFactory
 import json
 
 from pyspark.sql.types import (
     StructType, StructField, StringType, IntegerType, DoubleType, BooleanType,
-    MapType, ArrayType, TimestampType, DateType, LongType, BinaryType, DecimalType
+    ArrayType, TimestampType, DateType, LongType, BinaryType, DecimalType, DataType
 )
 
 from datetime import datetime, date
@@ -12,26 +12,37 @@
 
 # Convert to JSON using Kiota
 writer_factory = JsonSerializationWriterFactory()
-writer = writer_factory.get_serialization_writer("application/json")
 
-def to_json(value):
-    value.serialize(writer)
-    # Get JSON string
-    return json.loads((writer.get_serialized_content().decode("utf-8")))
 
-def to_jsonValue(value):
-    value.serialize(writer)
-    # Get JSON string
-    return str(json.loads((writer.get_serialized_content().decode("utf-8"))))
+def to_json(value: Any) -> Dict[str, Any]:
+    """
+    Serializes a Kiota serializable object to a JSON-compatible dictionary.
 
+    Args:
+        value (Any): An object that implements the Kiota serialization interface.
+
+    Returns:
+        dict: A dictionary representing the serialized JSON content.
+    """
+    writer = writer_factory.get_serialization_writer("application/json")
+    value.serialize(writer)
+    return json.loads(writer.get_serialized_content().decode("utf-8"))
 
 
-def get_python_schema(obj:Any):
+def get_python_schema(
+    obj: Any
+) -> Union[str, Dict[str, Any], List[Any]]:
     """
     Recursively extracts the schema from a Python object.
 
-    :param obj: The Python object (dict, list, int, str, etc.).
-    :return: A schema dictionary representing field types.
+    Args:
+        obj (Any): The Python object (e.g., dict, list, int, str) to analyze.
+
+    Returns:
+        Union[str, dict, list]: A nested schema representing the object's structure and field types.
+            - For dicts: a dict with key-value schemas.
+            - For lists: a list with the schema of the first element or "any" if empty.
+            - For primitives: a string indicating the type ("str", "int", etc.).
     """
     if isinstance(obj, bool):
         return "bool"
@@ -57,14 +68,35 @@ def get_python_schema(obj:Any):
         return "null"
     return "unknown"  # Fallback for unrecognized types
 
-def to_pyspark_schema(schema_dict):
+
+def to_pyspark_schema(
+    schema_dict: Dict[str, Any]
+) -> StructType:
     """
     Recursively converts a nested Python schema dictionary to a PySpark StructType schema.
 
-    :param schema_dict: Dictionary with field names as keys and data types as values.
-    :return: PySpark StructType schema.
+    Args:
+        schema_dict (dict): A dictionary with field names as keys and data types as values,
+            where types are represented as strings (e.g., "str", "int", "bool").
+            Nested dictionaries represent nested StructTypes.
+
+    Returns:
+        StructType: A PySpark StructType schema reflecting the provided structure.
+
+    Example:
+        Input:
+            {"name": "str", "age": "int", "scores": ["float"], "address": {"city": "str"}}
+        Output:
+            StructType([
+                StructField("name", StringType(), True),
+                StructField("age", IntegerType(), True),
+                StructField("scores", ArrayType(DoubleType()), True),
+                StructField("address", StructType([
+                    StructField("city", StringType(), True)
+                ]), True)
+            ])
     """
-    type_mapping = {
+    type_mapping: Dict[str, DataType] = {
         "str": StringType(),
         "int": IntegerType(),
         "float": DoubleType(),
@@ -74,11 +106,20 @@ def to_pyspark_schema(schema_dict):
         "long": LongType(),
         "binary": BinaryType(),
         "decimal": DecimalType(38, 18),
-        "unknown": StringType()
+        "null": StringType(),
+        "unknown": StringType(),
     }
 
-    def convert_type(value):
-        """Recursively converts types, handling nested dicts and lists."""
+    def convert_type(value: Any) -> DataType:
+        """
+        Recursively converts type descriptors to PySpark data types.
+
+        Args:
+            value (Any): The type descriptor (str, dict, list).
+
+        Returns:
+            DataType: The corresponding PySpark data type.
+        """
         if isinstance(value, dict):  # Nested structure
             return StructType([StructField(k, convert_type(v), True) for k, v in value.items()])
         elif isinstance(value, list):  # List of elements (assume first element type)
@@ -87,5 +128,7 @@ def convert_type(value):
             return ArrayType(convert_type(value[0]))
         return type_mapping.get(value, StringType())  # Default to StringType
 
-    struct_fields = [StructField(field, convert_type(dtype), True) for field, dtype in schema_dict.items()]
-    return StructType(struct_fields)
\ No newline at end of file
+    struct_fields: List[StructField] = [
+        StructField(field, convert_type(dtype), True) for field, dtype in schema_dict.items()
+    ]
+    return StructType(struct_fields)

From ce161bcdf312db67c672456e72b6a89bbbe712e2 Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 15:06:38 +0000
Subject: [PATCH 08/10] add documentation

---
 README.md                                     | 198 +++++++-----------
 docs/api/core/async-iterator.md               |   2 +-
 docs/api/core/client.md                       |   2 +-
 docs/api/core/models.md                       |   2 +-
 docs/api/core/resource-provider.md            |   2 +-
 docs/api/core/source.md                       |   3 +
 docs/api/core/utils.md                        |   2 +-
 docs/api/resources/index.md                   |  33 +++
 docs/api/resources/list-items.md              |   4 +
 docs/getting-started.md                       |   0
 docs/guides/list-items.md                     |  94 +++++++++
 docs/index.md                                 |  88 ++++++--
 mkdocs.yml                                    |   5 +
 .../resources/list_items.py                   |  50 ++++-
 14 files changed, 342 insertions(+), 143 deletions(-)
 create mode 100644 docs/api/core/source.md
 create mode 100644 docs/api/resources/index.md
 create mode 100644 docs/api/resources/list-items.md
 create mode 100644 docs/getting-started.md
 create mode 100644 docs/guides/list-items.md

diff --git a/README.md b/README.md
index 4ea60d4..ebd5e3e 100644
--- a/README.md
+++ b/README.md
@@ -1,163 +1,123 @@
-# Apache PySpark Custom Data Source Template
 
-This repository provides a template for creating a custom data source for Apache PySpark. It is designed to help developers extend PySpark’s data source API to support custom data ingestion and storage mechanisms.
+# pyspark-msgraph-source
 
+A **PySpark DataSource** to seamlessly integrate and read data from **Microsoft Graph API**, enabling easy access to resources like **SharePoint List Items**, and more.
 
-## Motivation
-
-When developing custom PySpark data sources, I encountered several challenges that made the development process frustrating:
-
-1. **Environment Setup Complexity**: Setting up a development environment for PySpark data source development was unnecessarily complex, with multiple dependencies and version conflicts.
-
-2. **Test Data Management**: Managing test data and maintaining consistent test environments across different machines was challenging.
-
-3. **Debugging Issues**: The default setup made it difficult to debug custom data source code effectively, especially when dealing with Spark's distributed nature.
-
-4. **Documentation Gaps**: Existing documentation for custom data source development was scattered and often incomplete.
-
-This template repository aims to solve these pain points and provide a streamlined development experience.
-
+---
 
 ## Features
+- Entra ID Authentication
+Securely authenticate with Microsoft Graph using DefaultAzureCredential, supporting local development and production seamlessly.
 
-- Pre-configured development environment
-- Ready-to-use test infrastructure
-- Example implementation
-- Automated tests setup
-- Debug-friendly configuration
-
-## Getting Started
-
-Follow these steps to set up and use this repository:
+- Automatic Pagination Handling
+Fetches all paginated data from Microsoft Graph without manual intervention.
 
-### Prerequisites
+- Dynamic Schema Inference
+Automatically detects the schema of the resource by sampling data, so you don't need to define it manually.
 
-- Docker
-- Visual Studio Code
-- Python 3.11
+- Simple Configuration with .option()
+Easily configure resources and query parameters directly in your Spark read options, making it flexible and intuitive.
 
-### Creating a Repository from This Template
+- Zero External Ingestion Services
+No additional services like Azure Data Factory or Logic Apps are needed—directly ingest data into Spark from Microsoft Graph.
 
-To create a new repository based on this template:
+- Extensible Resource Providers
+Add custom resource providers to support more Microsoft Graph endpoints as needed.
 
-1. Go to the [GitHub repository](https://github.com/geekwhocodes/pyspark-custom-datasource-template).
-2. Click the **Use this template** button.
-3. Select **Create a new repository**.
-4. Choose a repository name, visibility (public or private), and click **Create repository from template**.
-5. Clone your new repository:
+- Pluggable Architecture
+Dynamically load resource providers without modifying core logic.
 
-    ```sh
-    git clone https://github.com/your-username/your-new-repository.git
-    cd your-new-repository
-    ```
+- Optimized for PySpark
+Designed to work natively with Spark's DataFrame API for big data processing.
 
-### Setup
+- Secure by Design
+Credentials and secrets are handled using Azure Identity best practices, avoiding hardcoding sensitive data.
 
-1. **Open the repository in Visual Studio Code:**
+---
 
-    ```sh
-    code .
-    ```
+## Installation
 
-2. **Build and start the development container:**
-
-    Open the command palette (Ctrl+Shift+P) and select `Remote-Containers: Reopen in Container`.
+```bash
+pip install pyspark-msgraph-source
+```
 
-3. **Initialize the environment:**
+---
 
-    The environment will be initialized automatically by running the `init-env.sh` script defined in the `devcontainer.json` file.
+## ⚡ Quickstart
 
-### Project Structure
+### 1. Authentication
 
-The project follows this structure:
+This package uses [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential).  
+Ensure you're authenticated:
 
+```bash
+az login
 ```
-.
-├── src/
-│   ├── fake_source/         # Default fake data source implementation
-│   │   ├── __init__.py
-│   │   ├── source.py        # Implementation of the fake data source
-│   │   ├── schema.py        # Schema definitions (if applicable)
-│   │   └── utils.py         # Helper functions (if needed)
-│   ├── tests/               # Unit tests for the custom data source
-│   │   ├── __init__.py
-│   │   ├── test_source.py   # Tests for the data source
-│   │   └── conftest.py      # Test configuration and fixtures
-├── .devcontainer/           # Development container setup files
-│   ├── Dockerfile
-│   ├── devcontainer.json
-├── |── scripts
-├── |   ├── init-env.sh              # Initialization script for setting up the environment
-├── pyproject.toml           # Project dependencies and build system configuration
-├── README.md                # Project documentation
-├── LICENSE                  # License file
-```
-
-### Usage
-
-By default, this template includes a **fake data source** that generates mock data. You can use it as-is or replace it with your own implementation.
 
-1. **Register the custom data source:**
-
-    ```python
-    from pyspark.sql import SparkSession
-    from fake_source.source import FakeDataSource
-
-    spark = SparkSession.builder.getOrCreate()
-    spark.dataSource.register(FakeDataSource)
-    ```
+Or set environment variables:
+```bash
+export AZURE_CLIENT_ID=<your-client-id>
+export AZURE_TENANT_ID=<your-tenant-id>
+export AZURE_CLIENT_SECRET=<your-client-secret>
+```
 
-2. **Read data using the custom data source:**
+### 2. Example Usage
 
-    ```python
-    df = spark.read.format("fake").load()
-    df.show()
-    ```
+```python
+from pyspark.sql import SparkSession
 
-3. **Run tests:**
+spark = SparkSession.builder \ 
+.appName("MSGraphExample") \ 
+.getOrCreate()
 
-    ```sh
-    pytest
-    ```
+from pyspark_msgraph_source.core.source import MSGraphDataSource
+spark.dataSource.register(MSGraphDataSource)
 
-### Customization
+df = spark.read.format("msgraph") \ 
+.option("resource", "list_items") \ 
+.option("site-id", "<YOUR_SITE_ID>") \ 
+.option("list-id", "<YOUR_LIST_ID>") \ 
+.option("top", 100) \ 
+.option("expand", "fields") \ 
+.load()
 
-To replace the fake data source with your own:
+df.show()
+```
 
-1. **Rename the package folder:**
+---
 
-    ```sh
-    mv src/fake_source src/your_datasource_name
-    ```
+## Supported Resources
 
-2. **Update imports in `source.py` and other files:**
+| Resource     | Description                 |
+|--------------|-----------------------------|
+| `list_items`| SharePoint List Items       |
+| *(more coming soon...)* |                 |
 
-    ```python
-    from your_datasource_name.source import CustomDataSource
-    ```
+---
 
-3. **Update `pyproject.toml` to reflect the new package name.**
+## Development
 
-4. **Modify the schema and options in `source.py` to fit your use case.**
+Coming soon...
 
-### References
-1. [Microsoft Learn - PySpark custom data sources](https://learn.microsoft.com/en-us/azure/databricks/pyspark/datasources)
+---
 
-### License
+## Troubleshooting
 
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+| Issue                          | Solution                                     |
+|---------------------------------|----------------------------------------------|
+| `ValueError: resource missing` | Add `.option("resource", "list_items")`     |
+| Empty dataframe                | Verify IDs, permissions, and access         |
+| Authentication failures        | Check Azure credentials and login status    |
 
-### Contact
+---
 
-For issues and questions, please use the GitHub Issues section.
+## 📄 License
 
+[MIT License](LICENSE)
 
-### Need Help Setting Up a Data Intelligence Platform with Databricks?
-If you need expert guidance on setting up a modern data intelligence platform using Databricks, we can help. Our consultancy specializes in:
+---
 
-- Custom data source development for Databricks and Apache Spark
-- Optimizing ETL pipelines for performance and scalability
-- Data governance and security using Unity Catalog
-- Building ML & AI solutions on Databricks
+## 📚 Resources
 
-🚀 [Contact us](https://www.linkedin.com/in/geekwhocodes/) for a consultation and take your data platform to the next level.
+- [Microsoft Graph API](https://learn.microsoft.com/en-us/graph/overview)
+- [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential)
diff --git a/docs/api/core/async-iterator.md b/docs/api/core/async-iterator.md
index d280bce..986e6d1 100644
--- a/docs/api/core/async-iterator.md
+++ b/docs/api/core/async-iterator.md
@@ -1,3 +1,3 @@
-# Core Engine
+# Async To Sync Iterator
 
 ::: pyspark_msgraph_source.core.async_iterator
diff --git a/docs/api/core/client.md b/docs/api/core/client.md
index 1b406a9..f8a05a2 100644
--- a/docs/api/core/client.md
+++ b/docs/api/core/client.md
@@ -1,3 +1,3 @@
-# Core Engine
+# Base Client
 
 ::: pyspark_msgraph_source.core.base_client
diff --git a/docs/api/core/models.md b/docs/api/core/models.md
index 396ba36..2d8f907 100644
--- a/docs/api/core/models.md
+++ b/docs/api/core/models.md
@@ -1,3 +1,3 @@
-# Core Engine
+# Core Models
 
 ::: pyspark_msgraph_source.core.models
diff --git a/docs/api/core/resource-provider.md b/docs/api/core/resource-provider.md
index ba837e5..a063ce1 100644
--- a/docs/api/core/resource-provider.md
+++ b/docs/api/core/resource-provider.md
@@ -1,3 +1,3 @@
-# Core Engine
+# Resouorce Provider
 
 ::: pyspark_msgraph_source.core.resource_provider
diff --git a/docs/api/core/source.md b/docs/api/core/source.md
new file mode 100644
index 0000000..3b05041
--- /dev/null
+++ b/docs/api/core/source.md
@@ -0,0 +1,3 @@
+# Source
+
+::: pyspark_msgraph_source.core.source
diff --git a/docs/api/core/utils.md b/docs/api/core/utils.md
index 231bd80..a51b054 100644
--- a/docs/api/core/utils.md
+++ b/docs/api/core/utils.md
@@ -1,3 +1,3 @@
-# Core Engine
+# Utils
 
 ::: pyspark_msgraph_source.core.utils
diff --git a/docs/api/resources/index.md b/docs/api/resources/index.md
new file mode 100644
index 0000000..ef87d05
--- /dev/null
+++ b/docs/api/resources/index.md
@@ -0,0 +1,33 @@
+
+# Available Resources
+
+This page lists the Microsoft Graph resources currently supported by the `pyspark-msgraph-source` connector.
+
+---
+
+## Supported Resources
+
+| Resource Name | Description | Read more |
+|---------------|-------------|------------------|
+| `list_items` | Retrieves items from a SharePoint List | [Configuration](list-items.md) |
+
+---
+
+## Adding New Resources
+
+Want to add support for more resources?  
+Check out the [Contributing Guide](contributing.md) to learn how to extend the connector!
+
+---
+
+## Notes
+- Resources may require specific Microsoft Graph API permissions.
+- Pagination, authentication, and schema inference are handled automatically.
+
+---
+
+## Request New Resources
+
+Is your desired resource not listed here?  
+Open an [issue](https://github.com/geekwhocodes/pyspark-msgraph-source/issues) to request it!
+
diff --git a/docs/api/resources/list-items.md b/docs/api/resources/list-items.md
new file mode 100644
index 0000000..78b6c83
--- /dev/null
+++ b/docs/api/resources/list-items.md
@@ -0,0 +1,4 @@
+# Resource - List Items
+
+
+::: pyspark_msgraph_source.resources.list_items
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 0000000..e69de29
diff --git a/docs/guides/list-items.md b/docs/guides/list-items.md
new file mode 100644
index 0000000..7b8a23b
--- /dev/null
+++ b/docs/guides/list-items.md
@@ -0,0 +1,94 @@
+# Reading SharePoint List Items with PySpark
+
+This guide explains how to read **List Items** from a **SharePoint List** using the `pyspark-msgraph-source` connector and Microsoft Graph API.
+
+---
+
+## Prerequisites
+- Microsoft Entra (Azure AD) authentication set up with permissions to access SharePoint lists.
+- Required Microsoft Graph API permissions:
+  - `Sites.Read.All`
+  - `Lists.Read`
+- Installed `pyspark-msgraph-source` package.
+- Initialized Spark session.
+
+---
+
+## 🔹 Supported Options for `list_items`
+
+| Option       | Description                                               | Required |
+|--------------|-----------------------------------------------------------|----------|
+| `resource`  | Resource name (must be `"list_items"`)                    | ✅ Yes   |
+| `site-id`   | The ID of the SharePoint site                              | ✅ Yes   |
+| `list-id`   | The ID of the list within the SharePoint site              | ✅ Yes   |
+| `top`       | (Optional) Number of records to fetch                      | ❌ No    |
+| `expand`    | (Optional) Related entities to expand (e.g., `"fields"`)   | ❌ No    |
+
+> **Note:** You can find `site-id` and `list-id` via Graph API explorer or SharePoint admin tools.
+
+---
+
+## Example Usage
+
+```python
+from pyspark_msgraph_source.core.source import MSGraphDataSource
+
+# Register the data source (typically required once)
+spark.dataSource.register(MSGraphDataSource)
+
+# Read data from Microsoft Graph
+df = spark.read.format("msgraph") \
+    .option("resource", "list_items") \
+        .option("site-id", "37d7dde8-0b6b-4b7c-a2fd-2e217f54a263") \
+        .option("list-id", "5ecf26db-0161-4069-b763-856217415099")  \  
+        .option("top", 111) \
+        .option("expand", "fields") \
+        .load()
+
+# Show the results
+df.show()
+```
+
+---
+
+## Explanation of Example
+- **`spark.read.format("msgraph")`**: Use the Microsoft Graph connector.
+- **`.option("resource", "list_items")`**: Specify the resource to fetch SharePoint list items.
+- **`.option("site-id", "...")` and `.option("list-id", "...")`**: Provide the SharePoint site and list IDs.
+- **`.option("top", 111)`**: Limit the number of records (optional).
+- **`.option("expand", "fields")`**: Retrieve additional field details (optional).
+- **`.load()`**: Execute the read operation.
+
+---
+
+## Schema Inference
+The connector automatically infers the schema by fetching a sample record from the API if you do not provide a schema.
+
+---
+
+## Error Handling
+- Missing or invalid `site-id` or `list-id` will raise a `ValueError`.
+- API permission errors will raise authentication exceptions.
+- Network or Microsoft Graph issues will raise clear, descriptive exceptions.
+
+---
+
+## Notes
+- Authentication is handled automatically via [**`DefaultAzureCredential`**](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential), supporting:
+  - Environment credentials
+  - Managed Identity
+  - Azure CLI login
+  - Visual Studio Code authentication
+
+- Use `.option("top", N)` to control the number of records retrieved for large datasets.
+- To retrieve custom fields, include `.option("expand", "fields")`.
+
+---
+
+## Troubleshooting
+
+| Issue                                  | Solution                                         |
+|-----------------------------------------|-------------------------------------------------|
+| `"resource is missing"` error          | Ensure `.option("resource", "list_items")`     |
+| Empty dataframe                        | Check permissions and ensure valid IDs         |
+| `"Unsupported resource name"` error    | Verify `"list_items"` is supported             |
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index 000ea34..5752edb 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,17 +1,81 @@
-# Welcome to MkDocs
 
-For full documentation visit [mkdocs.org](https://www.mkdocs.org).
+# Welcome to **PySpark Microsoft Graph Connector**
 
-## Commands
+Unlock seamless data access from **Microsoft Graph API** directly into **Apache Spark** using this connector designed for modern data pipelines.
 
-* `mkdocs new [dir-name]` - Create a new project.
-* `mkdocs serve` - Start the live-reloading docs server.
-* `mkdocs build` - Build the documentation site.
-* `mkdocs -h` - Print help message and exit.
+---
 
-## Project layout
+## Why Use This Connector?
 
-    mkdocs.yml    # The configuration file.
-    docs/
-        index.md  # The documentation homepage.
-        ...       # Other markdown pages, images and other files.
+Working with Microsoft 365 data—such as SharePoint, Teams, Users, and Planner—has traditionally required intermediate services like Azure Data Factory, Logic Apps, or manual exports. With **`pyspark-msgraph-source`**, you can:
+
+- Authenticate securely with **Entra ID** using `DefaultAzureCredential`  
+- Query any supported Microsoft Graph resource directly in Spark  
+- Automatically handle **pagination**, **dynamic schema inference**, and **large datasets**  
+- Streamline analytics on Microsoft 365 data without extra infrastructure
+
+---
+
+## What is Microsoft Graph?
+
+[Microsoft Graph](https://learn.microsoft.com/en-us/graph/overview) is the gateway to data and intelligence in Microsoft 365. It provides unified access to:
+
+- **Users**
+- **Groups**
+- **Calendars**
+- **SharePoint Lists**
+- **Teams Channels**
+- **Planner Tasks**
+- And much more!
+
+---
+
+## What Can You Build?
+
+- Reporting and analytics on SharePoint Lists
+- Business intelligence dashboards with Microsoft Teams activity
+- Enterprise insights from Entra ID (Azure AD)
+- And much more!
+
+---
+
+## How Does It Work?
+
+1. Configure your Microsoft Entra (Azure AD) application.
+2. Authenticate with `DefaultAzureCredential`.
+3. Load data into Spark using `.read.format("msgraph")`.
+4. Query, process, and analyze at scale.
+
+---
+
+## Example
+
+```python
+df = spark.read.format("msgraph") \
+    .option("resource", "list_items") \
+    .option("site-id", "<your-site-id>") \
+    .option("list-id", "<your-list-id>") \
+    .load()
+
+df.show()
+```
+
+---
+
+## Ready to Get Started?
+
+- Check out the [Getting Started Guide](getting-started.md)
+- Explore available [Resources](api/resources)
+- Learn how to [Contribute](contributing.md)  
+
+---
+
+## Need Help?
+
+- Open an [issue](https://github.com/geekwhocodes/pyspark-msgraph-source/issues)
+- Start a discussion with the community
+- Submit feature requests and improvements
+
+---
+
+Welcome aboard and happy querying! 🚀
diff --git a/mkdocs.yml b/mkdocs.yml
index ae842f2..5b48030 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -13,6 +13,9 @@ plugins:
 
 nav:
   - Home: index.md
+  - Guides:
+      - List Items: guides/list-items.md
+  - Available Resource: api/resources/index.md
   - API Reference:
       - Overview: api/index.md
       - Core:
@@ -22,3 +25,5 @@ nav:
           - Models: api/core/models.md
           - Async Iterator: api/core/async-iterator.md
           - Utils: api/core/utils.md
+      - Resources: 
+        - List Items: api/resources/list-items.md
diff --git a/src/pyspark_msgraph_source/resources/list_items.py b/src/pyspark_msgraph_source/resources/list_items.py
index e3d1293..153b2c1 100644
--- a/src/pyspark_msgraph_source/resources/list_items.py
+++ b/src/pyspark_msgraph_source/resources/list_items.py
@@ -5,19 +5,55 @@
 from pyspark_msgraph_source.core.base_client import BaseResourceProvider
 from pyspark_msgraph_source.core.models import BaseResource
 
+logger = logging.getLogger(__name__)
+
 
 class ListItemsResourceProvider(BaseResourceProvider):
+    """
+    Resource provider for fetching list items from Microsoft Graph API.
+    
+    See Also:
+        https://learn.microsoft.com/en-us/graph/api/listitem-list?view=graph-rest-1.0:
+        https://learn.microsoft.com/en-us/graph/api/listitem-list?view=graph-rest-1.0
+
+
+    This provider handles the setup of the `list_items` resource,
+    configuring the request builder and mapping options to the required parameters.
+
+    Args:
+        options (Dict[str, str]): Connector options, typically containing 
+            site ID, list ID, and any query parameters.
+
+    Example:
+        provider = ListItemsResourceProvider(options)
+        for record in provider.iter_records():
+            print(record)
+    """
 
     def __init__(self, options: Dict[str, str]):
+        """
+        Initializes the ListItemsResourceProvider.
+
+        Args:
+            options (Dict[str, str]): Connector options required to configure 
+                the resource and authenticate requests.
+        """
         self.options = options
         super().__init__(options)
-    
-    @cached_property 
+
+    @cached_property
     def resource(self) -> BaseResource:
-        return BaseResource(
-        name="list_items",
-        resource_name="items",
-        request_builder_module="sites.item.lists.item.items.items_request_builder"
-    ).map_options_to_params(self.options)
+        """
+        Returns the BaseResource configuration for list items.
 
+        This sets up the request builder path and resource name
+        required to make API calls to retrieve list items.
 
+        Returns:
+            BaseResource: Configured resource with mapped options.
+        """
+        return BaseResource(
+            name="list_items",
+            resource_name="items",
+            request_builder_module="sites.item.lists.item.items.items_request_builder"
+        ).map_options_to_params(self.options)

From 81591c9cf2dc9a7127bbd188b77fe823e236afef Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 15:14:47 +0000
Subject: [PATCH 09/10] getting started

---
 README.md                 | 14 +++++++++
 docs/getting-started.md   | 61 +++++++++++++++++++++++++++++++++++++++
 docs/guides/list-items.md |  2 +-
 mkdocs.yml                |  1 +
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ebd5e3e..a1c65d3 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,20 @@ df = spark.read.format("msgraph") \
 .load()
 
 df.show()
+
+# with schema
+
+df = spark.read.format("msgraph") \ 
+.option("resource", "list_items") \ 
+.option("site-id", "<YOUR_SITE_ID>") \ 
+.option("list-id", "<YOUR_LIST_ID>") \ 
+.option("top", 100) \ 
+.option("expand", "fields") \ 
+.schema("id string, Title string")
+.load()
+
+df.show()
+
 ```
 
 ---
diff --git a/docs/getting-started.md b/docs/getting-started.md
index e69de29..8288dc3 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -0,0 +1,61 @@
+## Installation
+
+```bash
+pip install pyspark-msgraph-source
+```
+
+---
+
+### 1. Authentication
+
+This package uses [DefaultAzureCredential](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential).  
+Ensure you're authenticated:
+
+```bash
+az login
+```
+
+Or set environment variables:
+```bash
+export AZURE_CLIENT_ID=<your-client-id>
+export AZURE_TENANT_ID=<your-tenant-id>
+export AZURE_CLIENT_SECRET=<your-client-secret>
+```
+
+### 2. Example Usage
+
+```python
+from pyspark.sql import SparkSession
+
+spark = SparkSession.builder \ 
+.appName("MSGraphExample") \ 
+.getOrCreate()
+
+from pyspark_msgraph_source.core.source import MSGraphDataSource
+spark.dataSource.register(MSGraphDataSource)
+
+df = spark.read.format("msgraph") \ 
+.option("resource", "list_items") \ 
+.option("site-id", "<YOUR_SITE_ID>") \ 
+.option("list-id", "<YOUR_LIST_ID>") \ 
+.option("top", 100) \ 
+.option("expand", "fields") \ 
+.load()
+
+df.show()
+
+# with schema
+
+df = spark.read.format("msgraph") \ 
+.option("resource", "list_items") \ 
+.option("site-id", "<YOUR_SITE_ID>") \ 
+.option("list-id", "<YOUR_LIST_ID>") \ 
+.option("top", 100) \ 
+.option("expand", "fields") \ 
+.schema("id string, Title string")
+.load()
+
+df.show()
+
+
+```
\ No newline at end of file
diff --git a/docs/guides/list-items.md b/docs/guides/list-items.md
index 7b8a23b..778bb11 100644
--- a/docs/guides/list-items.md
+++ b/docs/guides/list-items.md
@@ -14,7 +14,7 @@ This guide explains how to read **List Items** from a **SharePoint List** using
 
 ---
 
-## 🔹 Supported Options for `list_items`
+## Supported Options for `list_items`
 
 | Option       | Description                                               | Required |
 |--------------|-----------------------------------------------------------|----------|
diff --git a/mkdocs.yml b/mkdocs.yml
index 5b48030..c91a532 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -13,6 +13,7 @@ plugins:
 
 nav:
   - Home: index.md
+  - Getting Started: getting-started.md
   - Guides:
       - List Items: guides/list-items.md
   - Available Resource: api/resources/index.md

From 8f65ad37b1f7da0d140a5e6ec63e542ec0d50e69 Mon Sep 17 00:00:00 2001
From: geekwhocodes <ganeshraskar@outlook.com>
Date: Wed, 5 Mar 2025 15:23:20 +0000
Subject: [PATCH 10/10] test workflow

---
 .github/workflows/test.yml | 40 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..7a5ceba
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,40 @@
+name: Publish to Test PyPI
+
+on:
+    push:
+      branches:
+        - 'feature*'
+
+jobs:
+  test-and-publish:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run tests
+        run: poetry run pytest
+
+      - name: Build the package
+        run: poetry build
+
+      - name: Publish to Test PyPI
+        env:
+          POETRY_PYPI_TOKEN_TESTPYPI: ${{ secrets.TEST_PYPI_TOKEN }}
+        run: |
+          poetry config repositories.testpypi https://test.pypi.org/legacy/
+          poetry publish -r testpypi --build