@@ -48,6 +48,7 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
4848 commit : str | None = config .commit
4949 branch : str | None = config .branch
5050 partial_clone : bool = config .subpath != "/"
51+ include_submodules : bool = config .include_submodules
5152
5253 # Create parent directory if it doesn't exist
5354 await ensure_directory (Path (local_path ).parent )
@@ -62,7 +63,8 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
6263 clone_cmd += ["-c" , create_git_auth_header (token , url = url )]
6364
6465 clone_cmd += ["clone" , "--single-branch" ]
65- # TODO: Re-enable --recurse-submodules when submodule support is needed
66+ if include_submodules :
67+ clone_cmd += ["--recurse-submodules" ]
6668
6769 if partial_clone :
6870 clone_cmd += ["--filter=blob:none" , "--sparse" ]
@@ -80,15 +82,40 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
8082
8183 # Checkout the subpath if it is a partial clone
8284 if partial_clone :
83- subpath = config .subpath .lstrip ("/" )
84- if config .blob :
85- # When ingesting from a file url (blob/branch/path/file.txt), we need to remove the file name.
86- subpath = str (Path (subpath ).parent .as_posix ())
87-
88- checkout_cmd = create_git_command (["git" ], local_path , url , token )
89- await run_command (* checkout_cmd , "sparse-checkout" , "set" , subpath )
85+ await _checkout_partial_clone (config , local_path , url , token )
9086
9187 # Checkout the commit if it is provided
9288 if commit :
9389 checkout_cmd = create_git_command (["git" ], local_path , url , token )
9490 await run_command (* checkout_cmd , "checkout" , commit )
91+
92+
93+ def _checkout_partial_clone (config : CloneConfig , local_path : str , url : str , token : str | None ) -> None :
94+ """Handle sparse-checkout for partial clones.
95+
96+ This helper function sets the sparse-checkout configuration for a partial clone,
97+ optionally adjusting the subpath if ingesting from a file URL.
98+
99+ Parameters
100+ ----------
101+ config : CloneConfig
102+ The configuration for cloning the repository, including subpath and blob flag.
103+ local_path : str
104+ The local path where the repository has been cloned.
105+ url : str
106+ The URL of the repository.
107+ token : str | None
108+ GitHub personal access token (PAT) for accessing private repositories.
109+ Can also be set via the ``GITHUB_TOKEN`` environment variable.
110+
111+ Returns
112+ -------
113+ None
114+
115+ """
116+ subpath = config .subpath .lstrip ("/" )
117+ if config .blob :
118+ # When ingesting from a file url (blob/branch/path/file.txt), we need to remove the file name.
119+ subpath = str (Path (subpath ).parent .as_posix ())
120+ checkout_cmd = create_git_command (["git" ], local_path , url , token )
121+ return run_command (* checkout_cmd , "sparse-checkout" , "set" , subpath )
0 commit comments