Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve reader speed by adjusting sharedStringTable usage #83

Merged
merged 3 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ MigrationBackup/
pkg/

tmp/
output/
.fsdocs/
/tests/FsSpreadsheet.JsNativeTests/fable/**/*.js
/tests/FsSpreadsheet.ExcelIO.Tests/TestFiles/WRITE_*.xlsx
Expand Down
15 changes: 15 additions & 0 deletions FsSpreadsheet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "JS", "JS", "{ADCF7D08-F2EE-
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TestUtils", "tests\TestUtils\TestUtils.fsproj", "{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Speedtest", "tests\Speedtest\Speedtest.fsproj", "{6DAD5C65-64CA-4ED4-B609-2D068F021024}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -191,6 +193,18 @@ Global
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x64.Build.0 = Release|Any CPU
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.ActiveCfg = Release|Any CPU
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -208,6 +222,7 @@ Global
{96E12F19-B25A-415E-B965-F9DE8D713C67} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{ADCF7D08-F2EE-4DFD-A96A-7E0134A1546F} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{6DAD5C65-64CA-4ED4-B609-2D068F021024} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {0EDE6697-0F13-4DB1-AC56-12C15A72D395}
Expand Down
6 changes: 6 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
### 5.1.2+21fd6e8 (Released 2024-1-30)
* Additions:
* [[#21fd6e8](https://github.com/CSBiology/FsSpreadsheet/commit/21fd6e82f6d9eab48b4197a56a5c9b9f06b51b0e)] improve reader speed by adjusting sharedStringTable usage
* Bugfixes:
* [[#66b6713](https://github.com/CSBiology/FsSpreadsheet/commit/66b6713381be3f133c58db7ca78fd36bd63c0ad1)] fix npm publish target

### 5.1.1+e7cc638 (Released 2024-1-26)
* Additions:
* [[#e7cc638](https://github.com/CSBiology/FsSpreadsheet/commit/e7cc638b170c570005b6cd8378d1e0ac31075be7)] improve rowWithRange SkipSearch performance
Expand Down
8 changes: 4 additions & 4 deletions build/ReleaseTasks.fs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ open Fake.Tools
open Fake.IO
open Fake.IO.Globbing.Operators

let createTag = BuildTask.create "CreateTag" [clean; build; runTests; pack] {
let createTag = BuildTask.create "CreateTag" [clean; build; runTests] {
if promptYesNo (sprintf "tagging branch with %s OK?" stableVersionTag ) then
Git.Branches.tag "" stableVersionTag
Git.Branches.pushTag "" projectRepo stableVersionTag
else
failwith "aborted"
}

let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests; packPrerelease] {
let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests] {
if promptYesNo (sprintf "tagging branch with %s OK?" prereleaseTag ) then
Git.Branches.tag "" prereleaseTag
Git.Branches.pushTag "" projectRepo prereleaseTag
Expand All @@ -32,7 +32,7 @@ let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseT
}


let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack] {
let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; packDotNet] {
let targets = (!! (sprintf "%s/*.*pkg" pkgDir ))
for target in targets do printfn "%A" target
let msg = sprintf "[.NET] release package with version %s?" stableVersionTag
Expand All @@ -45,7 +45,7 @@ let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack
else failwith "aborted"
}

let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packPrerelease] {
let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packDotNetPrerelease] {
let targets = (!! (sprintf "%s/*.*pkg" pkgDir ))
for target in targets do printfn "%A" target
let msg = sprintf "[.NET] release package with version %s?" prereleaseTag
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@fslab/fsspreadsheet",
"version": "5.1.1",
"version": "5.1.2",
"description": "Minimal spreadsheet creation and manipulation using exceljs io.",
"type": "module",
"main": "Xlsx.js",
Expand Down
19 changes: 8 additions & 11 deletions src/FsSpreadsheet.ExcelIO/Cell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ module Cell =
/// <summary>
/// Maps a Cell to the value string using a shared string table.
/// </summary>
let tryGetValue (sharedStringTable:SharedStringTable Option) (cell:Cell) =
let tryGetValue (sharedStringTable:SST Option) (cell:Cell) =
match cell |> tryGetType with
| Some (CellValues.SharedString) when sharedStringTable.IsSome->
let sharedStringTable = sharedStringTable.Value
Expand All @@ -251,8 +251,7 @@ module Cell =
|> Option.map (
CellValue.getValue
>> int
>> fun i -> SharedStringTable.getText i sharedStringTable
>> SharedStringTable.SharedStringItem.getText
>> fun i -> sharedStringTable.[i]
)

| _ ->
Expand All @@ -263,19 +262,17 @@ module Cell =
/// <summary>
/// Maps a Cell to the value string using a sharedStringTable.
/// </summary>
let getValue (sharedStringTable : SharedStringTable Option) (cell : Cell) =
let getValue (sharedStringTable : SST Option) (cell : Cell) =
match cell |> tryGetType with
| Some (CellValues.SharedString) when sharedStringTable.IsSome->
| Some (CellValues.SharedString) when sharedStringTable.IsSome ->
let sharedStringTable = sharedStringTable.Value
let sharedStringTableIndex =
cell
|> getCellValue
|> CellValue.getValue
|> int

sharedStringTable
|> SharedStringTable.getText sharedStringTableIndex
|> SharedStringTable.SharedStringItem.getText
sharedStringTable.[sharedStringTableIndex]
| _ ->
cell
|> getCellValue
Expand All @@ -291,15 +288,15 @@ module Cell =
/// <summary>
/// Includes a value from the sharedStringTable in Cell.CellValue.Text.
/// </summary>
let includeSharedStringValue (sharedStringTable:SharedStringTable) (cell:Cell) =
let includeSharedStringValue (sharedStringTable:SST) (cell:Cell) =
if not (isNull cell.DataType) then
match cell |> tryGetType with
| Some (CellValues.SharedString) ->
let index = int cell.InnerText
match sharedStringTable |> Seq.tryItem index with
match sharedStringTable |> Array.tryItem index with
| Some value ->
cell.DataType <- EnumValue(CellValues.String)
cell.CellValue.Text <- value.InnerText
cell.CellValue.Text <- value
| None ->
cell.CellValue.Text <- cell.InnerText
cell
Expand Down
46 changes: 41 additions & 5 deletions src/FsSpreadsheet.ExcelIO/FsExtensions.fs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ module FsExtensions =
/// <summary>
/// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value.
/// </summary>
static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (xlsxCell : Cell) =
let sst = Spreadsheet.tryGetSharedStringTable doc
static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) =
let cellValueString = Cell.getValue sst xlsxCell
let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices
let dt =
Expand Down Expand Up @@ -90,6 +89,42 @@ module FsExtensions =
//let dt, v = DataType.InferCellValue v
FsCell.createWithDataType dt (int row) (int col) (cellValue)

/// <summary>
/// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value.
/// </summary>
static member tryOfXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) =
Cell.tryGetValue sst xlsxCell
|> Option.map (fun cellValueString ->
let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices
let dt =
try DataType.ofXlsXCell doc xlsxCell
with _ -> DataType.Number // default is number
let mutable cellValue : obj = cellValueString
match dt with
| Date ->
try
// datetime is written as float counting days since 1900.
// We use the .NET helper because we really do not want to deal with datetime issues.
cellValue <- System.DateTime.FromOADate(float cellValueString)
with
| _ -> ()
| Boolean ->
// boolean is written as int/float either 0 or null
match cellValueString.ToLower() with
| "1" | "true" -> cellValue <- true
| "0" | "false" -> cellValue <- false
| _ -> ()
| Number ->
try
cellValue <- float cellValueString
with
| _ ->
()
| Empty | String -> ()
//let dt, v = DataType.InferCellValue v
FsCell.createWithDataType dt (int row) (int col) (cellValue)
)

static member toXlsxCell (doc : Packaging.SpreadsheetDocument) (cell : FsCell) =
Cell.fromValueWithDataType doc (uint32 cell.ColumnNumber) (uint32 cell.RowNumber) (cell.ValueAsString()) cell.DataType

Expand Down Expand Up @@ -201,7 +236,7 @@ module FsExtensions =
/// Creates an FsWorkbook from a given SpreadsheetDocument.
/// </summary>
static member fromSpreadsheetDocument (doc : Packaging.SpreadsheetDocument) =
let sst = Spreadsheet.tryGetSharedStringTable doc
let sst = Spreadsheet.tryGetSharedStringTable doc |> Option.map SharedStringTable.toSST
let xlsxWorkbookPart = Spreadsheet.getWorkbookPart doc
let xlsxSheets =
try
Expand All @@ -227,8 +262,9 @@ module FsExtensions =
fun xlsxSheet ->
let sheetId = Sheet.getID xlsxSheet
let xlsxCells =
Spreadsheet.getCellsBySheetID sheetId doc
|> Seq.map (FsCell.ofXlsxCell doc)
Spreadsheet.getCellsBySheetID sheetId doc true
|> Seq.toArray
|> Array.choose (FsCell.tryOfXlsxCell doc sst)
let assocXlsxTables =
xlsxTables
|> Seq.tryPick (fun (sid,ts) -> if sid = sheetId then Some ts else None)
Expand Down
2 changes: 1 addition & 1 deletion src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="DocumentFormat.OpenXml" Version="[2.12.3]" />
<PackageReference Include="DocumentFormat.OpenXml" Version="2.16.0" />
<None Include="..\..\docs\img\logo.png" Pack="true" PackagePath="\" />
</ItemGroup>

Expand Down
12 changes: 6 additions & 6 deletions src/FsSpreadsheet.ExcelIO/Row.fs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ module Row =
row)

/// If the row contains a value at the given index, returns it. Returns none if not.
let tryGetValueAt (sst : SharedStringTable Option) index (row : Row) =
let tryGetValueAt (sst : SST Option) index (row : Row) =
row
|> tryGetCellAt index
|> Option.bind (Cell.tryGetValue sst)
Expand Down Expand Up @@ -257,7 +257,7 @@ module Row =
|> extendSpanRight offset

/// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable.
let getIndexedValues (sst : SharedStringTable Option) (row : Row) =
let getIndexedValues (sst : SST Option) (row : Row) =
row
|> toCellSeq
|> Seq.choose (fun cell ->
Expand All @@ -273,18 +273,18 @@ module Row =


/// Maps the cells of the given row to the value strings.
let getRowValues (sst : SharedStringTable Option) (row : Row) =
let getRowValues (sst : SST Option) (row : Row) =
row
|> toCellSeq
|> Seq.choose (Cell.tryGetValue sst)

/// Maps each cell of the given row to each respective value strings if it exists, else returns None.
let tryGetRowValues (sst : SharedStringTable option) (row : Row) =
let tryGetRowValues (sst : SST option) (row : Row) =
toCellSeq row
|> Seq.map (Cell.tryGetValue sst)

/// Maps the cells of the given row to the value strings for all existing cells.
let getPresentRowValues (sst : SharedStringTable option) (row : Row) =
let getPresentRowValues (sst : SST option) (row : Row) =
toCellSeq row
|> Seq.choose (Cell.tryGetValue sst)

Expand Down Expand Up @@ -369,6 +369,6 @@ module Row =
|> appendCell cell

/// Includes a value from a sharedStringTable in the cells of the row.
let includeSharedStringValue (sst : SharedStringTable) (row : Row) =
let includeSharedStringValue (sst : SST) (row : Row) =
row
|> mapCells (Cell.includeSharedStringValue sst)
7 changes: 6 additions & 1 deletion src/FsSpreadsheet.ExcelIO/SharedStringTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ open DocumentFormat.OpenXml.Packaging
open DocumentFormat.OpenXml


type SST = string []

/// Functions for working with SharedStringTables.
module SharedStringTable =

Expand Down Expand Up @@ -99,6 +101,9 @@ module SharedStringTable =
try spreadsheetDocument.WorkbookPart.SharedStringTablePart.SharedStringTable |> Some
with | _ -> None


let toSST (sst : SharedStringTable) =
sst.Elements<SharedStringItem>()
|> Seq.toArray
|> Array.map SharedStringItem.getText


14 changes: 7 additions & 7 deletions src/FsSpreadsheet.ExcelIO/SheetData.fs
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,19 @@ module SheetData =


/// Gets the string value of the cell at the given 1-based column and rowIndex, if it exists, else returns None.
let tryGetRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let tryGetRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> tryGetRowAt rowIndex
|> Option.map (Row.getRowValues sst)

/// Gets the string values of the row at the given 1-based rowIndex.
let getRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let getRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> getRowAt rowIndex
|> Row.getRowValues sst

/// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable, if it exists, else returns None.
let tryGetIndexedRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let tryGetIndexedRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> tryGetRowAt rowIndex
|> Option.map (Row.getIndexedValues sst)
Expand Down Expand Up @@ -214,13 +214,13 @@ module SheetData =
|> Row.getCellAt columnIndex

/// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable.
let getCellValueAt (sst : SharedStringTable Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) =
let getCellValueAt (sst : SST Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) =
sheetData
|> getCellAt rowIndex columnIndex
|> Cell.getValue sst

/// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable if it exists. Else returns None.
let tryGetCellValueAt (sst : SharedStringTable Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) =
let tryGetCellValueAt (sst : SST Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) =
sheetData
|> tryGetCellAt rowIndex columnIndex
|> Option.bind (Cell.tryGetValue sst)
Expand Down Expand Up @@ -279,7 +279,7 @@ module SheetData =
sheet

/// Includes a value from sharedStringTable in the cells of the rows of the sheetData
let includeSharedStringValue (sharedStringTable : SharedStringTable) (sheetData : SheetData) =
let includeSharedStringValue (sharedStringTable : SST) (sheetData : SheetData) =
sheetData
|> mapRows (Row.includeSharedStringValue sharedStringTable)

Expand All @@ -288,7 +288,7 @@ module SheetData =
//----------------------------------------------------------------------------------------------------------------------

/// Reads the values of all cells from a sheetData and a sharedStringTable and converts them into a sparse matrix. Values are stored sparsely in a dictionary, with the key being a row index and column index tuple.
let toSparseValueMatrix (sst : SharedStringTable) sheetData =
let toSparseValueMatrix (sst : SST) sheetData =
let rows = getRows sheetData
let noOfRows = countRows sheetData
let noOfCols =
Expand Down