From ce620edd94813d64dacd60b1b1baeb30d158e66a Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Fri, 17 Apr 2026 16:26:45 +0530 Subject: [PATCH 1/9] sharepoint UI --- backend/connector_v2/views.py | 7 +++++-- .../filesystems/sharepoint/sharepoint.py | 18 +++++++++--------- .../sharepoint/static/json_schema.json | 1 + 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/backend/connector_v2/views.py b/backend/connector_v2/views.py index 09a33be983..a8268acb37 100644 --- a/backend/connector_v2/views.py +++ b/backend/connector_v2/views.py @@ -90,12 +90,15 @@ def _get_connector_metadata(self, connector_id: str) -> dict[str, str] | None: # Only use OAuth flow if connector supports it AND oauth_key is provided if ConnectorInstance.supportsOAuth(connector_id=connector_id) and oauth_key: - connector_metadata = ConnectorAuthHelper.get_oauth_creds_from_cache( + oauth_tokens = ConnectorAuthHelper.get_oauth_creds_from_cache( cache_key=oauth_key, delete_key=False, # Don't delete yet - wait for successful operation ) - if connector_metadata is None: + if oauth_tokens is None: raise MissingParamException(param=ConnectorAuthKey.OAUTH_KEY) + # Preserve non-secret form fields (e.g. site_url connector Sharepoint) + form_metadata = self.request.data.get(CIKey.CONNECTOR_METADATA) or {} + connector_metadata = {**form_metadata, **oauth_tokens} else: connector_metadata = self.request.data.get(CIKey.CONNECTOR_METADATA) return connector_metadata diff --git a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/sharepoint.py b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/sharepoint.py index 3eacdac30d..e8acd44f31 100644 --- a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/sharepoint.py +++ b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/sharepoint.py @@ -137,8 +137,8 @@ def _get_drive(self) -> Any: ctx = self._get_context() if self.drive_id: - # Specific drive by ID - self._drive = ctx.drives.get_by_id(self.drive_id) + # Specific drive by ID — EntityCollection uses bracket indexing. + self._drive = ctx.drives[self.drive_id] elif self.site_url and "sharepoint.com" in self.site_url.lower(): # SharePoint site - get default document library self._drive = self._get_sharepoint_site_drive(ctx) @@ -149,15 +149,15 @@ def _get_drive(self) -> Any: return self._drive def _get_sharepoint_site_drive(self, ctx: Any) -> Any: - """Get drive from SharePoint site URL.""" + """Get drive from SharePoint site URL. + + Uses the library's get_by_url, which maps an absolute site URL to the + Graph API's ``/sites/{hostname}:/{server-relative-path}`` addressing. + """ from urllib.parse import urlparse - parsed = urlparse(self.site_url) - # Extract site path from URL like - # https://tenant.sharepoint.com/sites/sitename - site_path = parsed.path.rstrip("/") - if site_path: - return ctx.sites.get_by_path(site_path).drive + if urlparse(self.site_url).path.strip("/"): + return ctx.sites.get_by_url(self.site_url).drive return ctx.sites.root.drive def _get_onedrive_drive(self, ctx: Any) -> Any: diff --git a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json index d83f84c8ae..5ede08913e 100644 --- a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json +++ b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json @@ -22,6 +22,7 @@ "drive_id": { "type": "string", "title": "Drive ID", + "format": "password", "description": "Specific Drive/Document Library ID. Leave empty to use the default drive." }, "auth_type": { From 0896b9179cf98e209ded4bf997664ce7826184ff Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Fri, 17 Apr 2026 19:46:59 +0530 Subject: [PATCH 2/9] sharepoint UI --- backend/connector_v2/serializers.py | 8 ++++---- .../filesystems/sharepoint/static/json_schema.json | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/backend/connector_v2/serializers.py b/backend/connector_v2/serializers.py index 5517bc5257..86e8734807 100644 --- a/backend/connector_v2/serializers.py +++ b/backend/connector_v2/serializers.py @@ -53,10 +53,10 @@ def save(self, **kwargs): # type: ignore oauth_credentials=kwargs[CIKey.CONNECTOR_METADATA], ) kwargs[CIKey.CONNECTOR_AUTH] = connector_oauth - ( - kwargs[CIKey.CONNECTOR_METADATA], - refresh_status, - ) = connector_oauth.get_and_refresh_tokens() + # Discard return value: ConnectorAuth.extra_data is shared across + # every connector with the same (provider, uid) and would overwrite + # this connector's form fields (site_url, drive_id). + connector_oauth.get_and_refresh_tokens() except Exception as exc: logger.error( "Error while obtaining ConnectorAuth for connector id " diff --git a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json index 5ede08913e..6070cbd840 100644 --- a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json +++ b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json @@ -22,7 +22,6 @@ "drive_id": { "type": "string", "title": "Drive ID", - "format": "password", "description": "Specific Drive/Document Library ID. Leave empty to use the default drive." }, "auth_type": { @@ -64,8 +63,7 @@ "user_email": { "type": "string", "title": "User Email", - "format": "password", - "description": "User's email address. Required ONLY for OneDrive with Client Credentials (not needed for SharePoint).", + "description": "Required only to access OneDrive with Client Credentials (e.g., user@company.com). Leave empty when accessing a SharePoint site via Site URL.", "examples": [ "user@company.onmicrosoft.com", "user@company.com" From 529ed5925353c623588e7214ddffdcae3c436d4f Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Mon, 20 Apr 2026 15:44:26 +0530 Subject: [PATCH 3/9] sharepoint connector_name fill --- backend/connector_v2/views.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/backend/connector_v2/views.py b/backend/connector_v2/views.py index a8268acb37..f140ae4aa5 100644 --- a/backend/connector_v2/views.py +++ b/backend/connector_v2/views.py @@ -5,7 +5,9 @@ from connector_auth_v2.constants import ConnectorAuthKey from connector_auth_v2.exceptions import CacheMissException, MissingParamException from connector_auth_v2.pipeline.common import ConnectorAuthHelper -from connector_processor.exceptions import OAuthTimeOut +from connector_processor.connector_processor import ConnectorProcessor +from connector_processor.constants import ConnectorKeys +from connector_processor.exceptions import InvalidConnectorID, OAuthTimeOut from django.db import IntegrityError from django.db.models import ProtectedError, QuerySet from permissions.permission import IsOwner, IsOwnerOrSharedUserOrSharedToOrg @@ -103,6 +105,35 @@ def _get_connector_metadata(self, connector_id: str) -> dict[str, str] | None: connector_metadata = self.request.data.get(CIKey.CONNECTOR_METADATA) return connector_metadata + def _fill_default_connector_name(self, request_data: Any) -> None: + """Fill ``connector_name`` from the schema default when missing. + + Defense-in-depth for an intermittent frontend race where the RJSF + default does not reach the POST body for OAuth connector creation, + causing the serializer to 400 on ``connector_name: required``. + """ + if request_data.get(CIKey.CONNECTOR_NAME): + return + connector_id = request_data.get(CIKey.CONNECTOR_ID) + if not connector_id: + return + try: + schema_details = ConnectorProcessor.get_json_schema(connector_id=connector_id) + except InvalidConnectorID: + return + default_name = ( + schema_details.get(ConnectorKeys.JSON_SCHEMA, {}) + .get("properties", {}) + .get("connectorName", {}) + .get("default") + ) + if not default_name: + return + request_data[CIKey.CONNECTOR_NAME] = default_name + logger.info( + "Filled missing connector_name with schema default for %s", connector_id + ) + def _cleanup_oauth_cache(self, connector_id: str) -> None: """Clean up OAuth cache after successful operation.""" if not ConnectorInstance.supportsOAuth(connector_id=connector_id): @@ -169,6 +200,7 @@ def perform_create(self, serializer: ConnectorInstanceSerializer) -> None: def create(self, request: Any) -> Response: # Overriding default exception behavior + self._fill_default_connector_name(request.data) serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) try: From 17fb6075ccaf2e93505f8bf58ff6d651e0076d3a Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Mon, 20 Apr 2026 15:49:44 +0530 Subject: [PATCH 4/9] sharepoint connector_name json schema --- .../connectors/filesystems/sharepoint/static/json_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json index 6070cbd840..0276413a46 100644 --- a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json +++ b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json @@ -63,6 +63,7 @@ "user_email": { "type": "string", "title": "User Email", + "format": "password", "description": "Required only to access OneDrive with Client Credentials (e.g., user@company.com). Leave empty when accessing a SharePoint site via Site URL.", "examples": [ "user@company.onmicrosoft.com", From 2214d4b93d9cb437511e423ac48694e28a7ed6a1 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Mon, 20 Apr 2026 16:14:45 +0530 Subject: [PATCH 5/9] UN-3414 [FIX] Use request.data.copy() to avoid QueryDict immutability DRF's request.data is an immutable QueryDict for multipart / form-encoded POSTs; directly mutating it inside _fill_default_connector_name would raise AttributeError for any non-JSON caller. Copy to a mutable dict once in create() and feed that copy to both the helper and the serializer. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/connector_v2/views.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/connector_v2/views.py b/backend/connector_v2/views.py index f140ae4aa5..c125cdd859 100644 --- a/backend/connector_v2/views.py +++ b/backend/connector_v2/views.py @@ -200,8 +200,9 @@ def perform_create(self, serializer: ConnectorInstanceSerializer) -> None: def create(self, request: Any) -> Response: # Overriding default exception behavior - self._fill_default_connector_name(request.data) - serializer = self.get_serializer(data=request.data) + data = request.data.copy() + self._fill_default_connector_name(data) + serializer = self.get_serializer(data=data) serializer.is_valid(raise_exception=True) try: self.perform_create(serializer) From 54415fec49c0fe31a92b5a31b321c0c2dc5d9fe4 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Mon, 20 Apr 2026 20:30:31 +0530 Subject: [PATCH 6/9] sharepoint connector_name json schema --- backend/connector_auth_v2/models.py | 14 ++++-- backend/connector_v2/fields.py | 22 +++++---- backend/connector_v2/serializers.py | 75 +++++++++++++++++++++++++++-- backend/connector_v2/views.py | 39 ++------------- 4 files changed, 98 insertions(+), 52 deletions(-) diff --git a/backend/connector_auth_v2/models.py b/backend/connector_auth_v2/models.py index cd36e75cce..e8e20587eb 100644 --- a/backend/connector_auth_v2/models.py +++ b/backend/connector_auth_v2/models.py @@ -96,11 +96,19 @@ def get_and_refresh_tokens(self, request: Request = None) -> tuple[JSONField, bo refreshed_token = True related_connector_instances = self.connectorinstance_set.all() for connector_instance in related_connector_instances: - connector_instance.connector_metadata = self.extra_data + # Merge so per-instance form fields (e.g. site_url, drive_id) + # survive a token refresh; refreshed token keys win on conflict. + existing_metadata = connector_instance.connector_metadata or {} + connector_instance.connector_metadata = { + **existing_metadata, + **self.extra_data, + } connector_instance.save() logger.info( - f"Refreshed access token for connector {connector_instance.id}, " - f"provider: {self.provider}, uid: {self.uid}" + "Refreshed access token for connector %s, provider: %s, uid: %s", + connector_instance.id, + self.provider, + self.uid, ) return self.extra_data, refreshed_token diff --git a/backend/connector_v2/fields.py b/backend/connector_v2/fields.py index 2a0f18c549..856bd39409 100644 --- a/backend/connector_v2/fields.py +++ b/backend/connector_v2/fields.py @@ -25,17 +25,21 @@ def from_db_value(self, value, expression, connection): # type: ignore refresh_after_str, SocialAuthConstants.REFRESH_AFTER_FORMAT ) if datetime.now() > refresh_after: - metadata = self._refresh_tokens(provider, uid) + metadata = self._refresh_tokens(provider, uid, metadata) return metadata - def _refresh_tokens(self, provider: str, uid: str) -> dict[str, str]: - """Retrieves PSA object and refreshes the token if necessary.""" + def _refresh_tokens( + self, provider: str, uid: str, existing_metadata: dict[str, str] + ) -> dict[str, str]: + """Retrieves PSA object and refreshes the token if necessary. + + Merges refreshed token fields over existing metadata so per-instance + form fields (e.g. site_url, drive_id) are not dropped on read. + """ connector_auth: ConnectorAuth = ConnectorAuth.get_social_auth( provider=provider, uid=uid ) - if connector_auth: - ( - connector_metadata, - _, - ) = connector_auth.get_and_refresh_tokens() - return connector_metadata # type: ignore + if not connector_auth: + return existing_metadata + refreshed_metadata, _ = connector_auth.get_and_refresh_tokens() + return {**existing_metadata, **refreshed_metadata} diff --git a/backend/connector_v2/serializers.py b/backend/connector_v2/serializers.py index 86e8734807..fef43ab14d 100644 --- a/backend/connector_v2/serializers.py +++ b/backend/connector_v2/serializers.py @@ -2,11 +2,12 @@ from collections import OrderedDict from typing import Any +from connector_auth_v2.constants import SocialAuthConstants from connector_auth_v2.models import ConnectorAuth from connector_auth_v2.pipeline.common import ConnectorAuthHelper from connector_processor.connector_processor import ConnectorProcessor from connector_processor.constants import ConnectorKeys -from connector_processor.exceptions import OAuthTimeOut +from connector_processor.exceptions import InvalidConnectorID, OAuthTimeOut from rest_framework.serializers import CharField, SerializerMethodField from utils.fields import EncryptedBinaryFieldSerializer from utils.input_sanitizer import validate_name_field @@ -19,6 +20,23 @@ logger = logging.getLogger(__name__) +# OAuth token-specific keys that are safe to merge across connectors sharing the +# same (provider, uid). Anything outside this set (e.g. provider-specific +# enrichment fields stored in ConnectorAuth.extra_data) must NOT leak into a +# connector's per-instance metadata, which owns form fields like site_url. +_OAUTH_TOKEN_KEYS: frozenset[str] = frozenset( + { + SocialAuthConstants.ACCESS_TOKEN, + SocialAuthConstants.REFRESH_TOKEN, + SocialAuthConstants.TOKEN_TYPE, + SocialAuthConstants.EXPIRES, + SocialAuthConstants.AUTH_TIME, + SocialAuthConstants.REFRESH_AFTER, + "expires_in", + "scope", + } +) + class ConnectorInstanceSerializer(AuditSerializer): connector_metadata = EncryptedBinaryFieldSerializer(required=False, allow_null=True) @@ -28,10 +46,45 @@ class ConnectorInstanceSerializer(AuditSerializer): class Meta: model = ConnectorInstance fields = "__all__" + extra_kwargs = {"connector_name": {"required": False}} def validate_connector_name(self, value: str) -> str: return validate_name_field(value, field_name="Connector name") + def validate(self, attrs: dict[str, Any]) -> dict[str, Any]: + """Backfill ``connector_name`` from the JSON schema default when absent. + + Defense-in-depth: the frontend RJSF form seeds ``connector_name`` from + the schema default, but callers (including staging OAuth flows) have + been observed to POST without it. If the connector schema declares a + default name, use it rather than raising a 400. + """ + attrs = super().validate(attrs) + if not attrs.get(CIKey.CONNECTOR_NAME): + connector_id = attrs.get(CIKey.CONNECTOR_ID) + if connector_id: + default_name = self._get_schema_default_connector_name(connector_id) + if default_name: + attrs[CIKey.CONNECTOR_NAME] = default_name + logger.info( + "Filled missing connector_name with schema default for %s", + connector_id, + ) + return attrs + + @staticmethod + def _get_schema_default_connector_name(connector_id: str) -> str | None: + try: + schema_details = ConnectorProcessor.get_json_schema(connector_id=connector_id) + except InvalidConnectorID: + return None + return ( + schema_details.get(ConnectorKeys.JSON_SCHEMA, {}) + .get("properties", {}) + .get("connectorName", {}) + .get("default") + ) + def save(self, **kwargs): # type: ignore user = self.context.get("request").user or None connector_id: str = kwargs[CIKey.CONNECTOR_ID] @@ -53,10 +106,22 @@ def save(self, **kwargs): # type: ignore oauth_credentials=kwargs[CIKey.CONNECTOR_METADATA], ) kwargs[CIKey.CONNECTOR_AUTH] = connector_oauth - # Discard return value: ConnectorAuth.extra_data is shared across - # every connector with the same (provider, uid) and would overwrite - # this connector's form fields (site_url, drive_id). - connector_oauth.get_and_refresh_tokens() + # Merge refreshed token fields (whitelist) back into this + # connector's metadata so ``super().save(**kwargs)`` does not + # overwrite the fresh token the sibling-loop just persisted. + # Whitelisting preserves per-connector form fields (site_url, + # drive_id) that must not be leaked across connectors sharing + # the same (provider, uid). + refreshed_metadata, _ = connector_oauth.get_and_refresh_tokens() + token_updates = { + key: refreshed_metadata[key] + for key in _OAUTH_TOKEN_KEYS + if refreshed_metadata.get(key) is not None + } + kwargs[CIKey.CONNECTOR_METADATA] = { + **(kwargs.get(CIKey.CONNECTOR_METADATA) or {}), + **token_updates, + } except Exception as exc: logger.error( "Error while obtaining ConnectorAuth for connector id " diff --git a/backend/connector_v2/views.py b/backend/connector_v2/views.py index c125cdd859..37525e4d5a 100644 --- a/backend/connector_v2/views.py +++ b/backend/connector_v2/views.py @@ -5,9 +5,7 @@ from connector_auth_v2.constants import ConnectorAuthKey from connector_auth_v2.exceptions import CacheMissException, MissingParamException from connector_auth_v2.pipeline.common import ConnectorAuthHelper -from connector_processor.connector_processor import ConnectorProcessor -from connector_processor.constants import ConnectorKeys -from connector_processor.exceptions import InvalidConnectorID, OAuthTimeOut +from connector_processor.exceptions import OAuthTimeOut from django.db import IntegrityError from django.db.models import ProtectedError, QuerySet from permissions.permission import IsOwner, IsOwnerOrSharedUserOrSharedToOrg @@ -100,40 +98,13 @@ def _get_connector_metadata(self, connector_id: str) -> dict[str, str] | None: raise MissingParamException(param=ConnectorAuthKey.OAUTH_KEY) # Preserve non-secret form fields (e.g. site_url connector Sharepoint) form_metadata = self.request.data.get(CIKey.CONNECTOR_METADATA) or {} + if not isinstance(form_metadata, dict): + form_metadata = {} connector_metadata = {**form_metadata, **oauth_tokens} else: connector_metadata = self.request.data.get(CIKey.CONNECTOR_METADATA) return connector_metadata - def _fill_default_connector_name(self, request_data: Any) -> None: - """Fill ``connector_name`` from the schema default when missing. - - Defense-in-depth for an intermittent frontend race where the RJSF - default does not reach the POST body for OAuth connector creation, - causing the serializer to 400 on ``connector_name: required``. - """ - if request_data.get(CIKey.CONNECTOR_NAME): - return - connector_id = request_data.get(CIKey.CONNECTOR_ID) - if not connector_id: - return - try: - schema_details = ConnectorProcessor.get_json_schema(connector_id=connector_id) - except InvalidConnectorID: - return - default_name = ( - schema_details.get(ConnectorKeys.JSON_SCHEMA, {}) - .get("properties", {}) - .get("connectorName", {}) - .get("default") - ) - if not default_name: - return - request_data[CIKey.CONNECTOR_NAME] = default_name - logger.info( - "Filled missing connector_name with schema default for %s", connector_id - ) - def _cleanup_oauth_cache(self, connector_id: str) -> None: """Clean up OAuth cache after successful operation.""" if not ConnectorInstance.supportsOAuth(connector_id=connector_id): @@ -200,9 +171,7 @@ def perform_create(self, serializer: ConnectorInstanceSerializer) -> None: def create(self, request: Any) -> Response: # Overriding default exception behavior - data = request.data.copy() - self._fill_default_connector_name(data) - serializer = self.get_serializer(data=data) + serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) try: self.perform_create(serializer) From e36b3d3f6ddda6402b5e9de43d6c5e4b6e24eaa1 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Mon, 20 Apr 2026 21:09:58 +0530 Subject: [PATCH 7/9] sharepoint connector_name json schema --- backend/connector_auth_v2/constants.py | 18 +++++++++ backend/connector_auth_v2/models.py | 16 ++++++-- backend/connector_v2/fields.py | 15 ++++++-- backend/connector_v2/serializers.py | 53 +++++++++++--------------- 4 files changed, 63 insertions(+), 39 deletions(-) diff --git a/backend/connector_auth_v2/constants.py b/backend/connector_auth_v2/constants.py index 886968d87b..2508df7edd 100644 --- a/backend/connector_auth_v2/constants.py +++ b/backend/connector_auth_v2/constants.py @@ -16,3 +16,21 @@ class SocialAuthConstants: GOOGLE_OAUTH = "google-oauth2" GOOGLE_TOKEN_EXPIRY_FORMAT = "%d/%m/%Y %H:%M:%S" + + +# OAuth token-specific keys safe to merge across connectors sharing the same +# (provider, uid). Anything outside this set (form fields like site_url, +# drive_id, or provider-specific enrichment stored in ConnectorAuth.extra_data) +# must NOT leak between connectors. +OAUTH_TOKEN_KEYS: frozenset[str] = frozenset( + { + SocialAuthConstants.ACCESS_TOKEN, + SocialAuthConstants.REFRESH_TOKEN, + SocialAuthConstants.TOKEN_TYPE, + SocialAuthConstants.EXPIRES, + SocialAuthConstants.AUTH_TIME, + SocialAuthConstants.REFRESH_AFTER, + "expires_in", + "scope", + } +) diff --git a/backend/connector_auth_v2/models.py b/backend/connector_auth_v2/models.py index e8e20587eb..a92630c41a 100644 --- a/backend/connector_auth_v2/models.py +++ b/backend/connector_auth_v2/models.py @@ -10,7 +10,7 @@ from social_django.models import AbstractUserSocialAuth, DjangoStorage from social_django.strategy import DjangoStrategy -from connector_auth_v2.constants import SocialAuthConstants +from connector_auth_v2.constants import OAUTH_TOKEN_KEYS, SocialAuthConstants from connector_auth_v2.pipeline.google import GoogleAuthHelper logger = logging.getLogger(__name__) @@ -96,12 +96,20 @@ def get_and_refresh_tokens(self, request: Request = None) -> tuple[JSONField, bo refreshed_token = True related_connector_instances = self.connectorinstance_set.all() for connector_instance in related_connector_instances: - # Merge so per-instance form fields (e.g. site_url, drive_id) - # survive a token refresh; refreshed token keys win on conflict. + # Whitelist-merge: only OAuth token keys flow from the shared + # extra_data into each sibling's metadata. Non-token keys (form + # fields like site_url, drive_id, or provider-enrichment stored + # in extra_data) must NOT leak between connectors sharing the + # same (provider, uid). existing_metadata = connector_instance.connector_metadata or {} + token_updates = { + key: self.extra_data[key] + for key in OAUTH_TOKEN_KEYS + if self.extra_data.get(key) is not None + } connector_instance.connector_metadata = { **existing_metadata, - **self.extra_data, + **token_updates, } connector_instance.save() logger.info( diff --git a/backend/connector_v2/fields.py b/backend/connector_v2/fields.py index 856bd39409..d3a65d91b7 100644 --- a/backend/connector_v2/fields.py +++ b/backend/connector_v2/fields.py @@ -1,7 +1,7 @@ import logging from datetime import datetime -from connector_auth_v2.constants import SocialAuthConstants +from connector_auth_v2.constants import OAUTH_TOKEN_KEYS, SocialAuthConstants from connector_auth_v2.models import ConnectorAuth from django.db import models @@ -33,8 +33,10 @@ def _refresh_tokens( ) -> dict[str, str]: """Retrieves PSA object and refreshes the token if necessary. - Merges refreshed token fields over existing metadata so per-instance - form fields (e.g. site_url, drive_id) are not dropped on read. + Whitelist-merges refreshed token fields over existing metadata so + per-instance form fields (e.g. site_url, drive_id) are preserved on + read and non-token keys from the shared ConnectorAuth.extra_data + cannot leak into a connector's metadata. """ connector_auth: ConnectorAuth = ConnectorAuth.get_social_auth( provider=provider, uid=uid @@ -42,4 +44,9 @@ def _refresh_tokens( if not connector_auth: return existing_metadata refreshed_metadata, _ = connector_auth.get_and_refresh_tokens() - return {**existing_metadata, **refreshed_metadata} + token_updates = { + key: refreshed_metadata[key] + for key in OAUTH_TOKEN_KEYS + if refreshed_metadata.get(key) is not None + } + return {**existing_metadata, **token_updates} diff --git a/backend/connector_v2/serializers.py b/backend/connector_v2/serializers.py index fef43ab14d..c94e04f22b 100644 --- a/backend/connector_v2/serializers.py +++ b/backend/connector_v2/serializers.py @@ -2,13 +2,13 @@ from collections import OrderedDict from typing import Any -from connector_auth_v2.constants import SocialAuthConstants +from connector_auth_v2.constants import OAUTH_TOKEN_KEYS from connector_auth_v2.models import ConnectorAuth from connector_auth_v2.pipeline.common import ConnectorAuthHelper from connector_processor.connector_processor import ConnectorProcessor from connector_processor.constants import ConnectorKeys from connector_processor.exceptions import InvalidConnectorID, OAuthTimeOut -from rest_framework.serializers import CharField, SerializerMethodField +from rest_framework.serializers import CharField, SerializerMethodField, ValidationError from utils.fields import EncryptedBinaryFieldSerializer from utils.input_sanitizer import validate_name_field @@ -20,23 +20,6 @@ logger = logging.getLogger(__name__) -# OAuth token-specific keys that are safe to merge across connectors sharing the -# same (provider, uid). Anything outside this set (e.g. provider-specific -# enrichment fields stored in ConnectorAuth.extra_data) must NOT leak into a -# connector's per-instance metadata, which owns form fields like site_url. -_OAUTH_TOKEN_KEYS: frozenset[str] = frozenset( - { - SocialAuthConstants.ACCESS_TOKEN, - SocialAuthConstants.REFRESH_TOKEN, - SocialAuthConstants.TOKEN_TYPE, - SocialAuthConstants.EXPIRES, - SocialAuthConstants.AUTH_TIME, - SocialAuthConstants.REFRESH_AFTER, - "expires_in", - "scope", - } -) - class ConnectorInstanceSerializer(AuditSerializer): connector_metadata = EncryptedBinaryFieldSerializer(required=False, allow_null=True) @@ -57,19 +40,27 @@ def validate(self, attrs: dict[str, Any]) -> dict[str, Any]: Defense-in-depth: the frontend RJSF form seeds ``connector_name`` from the schema default, but callers (including staging OAuth flows) have been observed to POST without it. If the connector schema declares a - default name, use it rather than raising a 400. + default name, use it. Otherwise raise a 400 explicitly rather than + letting the missing value reach the DB and surface as an + ``IntegrityError`` (the model enforces ``null=False``). """ attrs = super().validate(attrs) - if not attrs.get(CIKey.CONNECTOR_NAME): - connector_id = attrs.get(CIKey.CONNECTOR_ID) - if connector_id: - default_name = self._get_schema_default_connector_name(connector_id) - if default_name: - attrs[CIKey.CONNECTOR_NAME] = default_name - logger.info( - "Filled missing connector_name with schema default for %s", - connector_id, - ) + if attrs.get(CIKey.CONNECTOR_NAME): + return attrs + + connector_id = attrs.get(CIKey.CONNECTOR_ID) + default_name = ( + self._get_schema_default_connector_name(connector_id) + if connector_id + else None + ) + if not default_name: + raise ValidationError({CIKey.CONNECTOR_NAME: "This field is required."}) + attrs[CIKey.CONNECTOR_NAME] = default_name + logger.info( + "Filled missing connector_name with schema default for %s", + connector_id, + ) return attrs @staticmethod @@ -115,7 +106,7 @@ def save(self, **kwargs): # type: ignore refreshed_metadata, _ = connector_oauth.get_and_refresh_tokens() token_updates = { key: refreshed_metadata[key] - for key in _OAUTH_TOKEN_KEYS + for key in OAUTH_TOKEN_KEYS if refreshed_metadata.get(key) is not None } kwargs[CIKey.CONNECTOR_METADATA] = { From e50746918942e742c6bcba8038b5e9aaf8d2bfc2 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Mon, 20 Apr 2026 21:20:30 +0530 Subject: [PATCH 8/9] greptile partial updatye --- backend/connector_v2/serializers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/connector_v2/serializers.py b/backend/connector_v2/serializers.py index c94e04f22b..d639c923a1 100644 --- a/backend/connector_v2/serializers.py +++ b/backend/connector_v2/serializers.py @@ -43,9 +43,13 @@ def validate(self, attrs: dict[str, Any]) -> dict[str, Any]: default name, use it. Otherwise raise a 400 explicitly rather than letting the missing value reach the DB and surface as an ``IntegrityError`` (the model enforces ``null=False``). + + Skipped entirely on partial updates (PATCH): the existing DB row + already has a valid name, and backfilling would overwrite a + user-renamed connector with the schema default. """ attrs = super().validate(attrs) - if attrs.get(CIKey.CONNECTOR_NAME): + if attrs.get(CIKey.CONNECTOR_NAME) or self.partial: return attrs connector_id = attrs.get(CIKey.CONNECTOR_ID) From 261f7249b36a2eda2b93b0c667df83b8cec2882b Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Tue, 21 Apr 2026 12:46:59 +0530 Subject: [PATCH 9/9] json schema small change --- .../connectors/filesystems/sharepoint/static/json_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json index 0276413a46..e1f0757e36 100644 --- a/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json +++ b/unstract/connectors/src/unstract/connectors/filesystems/sharepoint/static/json_schema.json @@ -22,6 +22,7 @@ "drive_id": { "type": "string", "title": "Drive ID", + "format": "password", "description": "Specific Drive/Document Library ID. Leave empty to use the default drive." }, "auth_type": {