Skip to content

Commit

Permalink
Support for code files parse (infiniflow#789)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
  • Loading branch information
dashi6174 committed May 15, 2024
1 parent 12b4c56 commit 6ff63ee
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion api/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def filename_type(filename):
return FileType.PDF.value

if re.match(
r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md)$", filename):
r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename):
return FileType.DOC.value

if re.match(
Expand Down
2 changes: 1 addition & 1 deletion rag/app/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
excel_parser = ExcelParser()
sections = [(excel_parser.html(binary), "")]

elif re.search(r"\.(txt|md)$", filename, re.IGNORECASE):
elif re.search(r"\.(txt|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
txt = ""
if binary:
Expand Down

0 comments on commit 6ff63ee

Please sign in to comment.