Skip to content

Commit

Permalink
Merge pull request #83 from fslaborg/speed
Browse files Browse the repository at this point in the history
Improve reader speed by adjusting sharedStringTable usage
  • Loading branch information
HLWeil committed Feb 2, 2024
2 parents fc5f16f + 2b88258 commit 5c18c78
Show file tree
Hide file tree
Showing 20 changed files with 198 additions and 64 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ MigrationBackup/
pkg/

tmp/
output/
.fsdocs/
/tests/FsSpreadsheet.JsNativeTests/fable/**/*.js
/tests/FsSpreadsheet.ExcelIO.Tests/TestFiles/WRITE_*.xlsx
Expand Down
15 changes: 15 additions & 0 deletions FsSpreadsheet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "JS", "JS", "{ADCF7D08-F2EE-
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TestUtils", "tests\TestUtils\TestUtils.fsproj", "{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Speedtest", "tests\Speedtest\Speedtest.fsproj", "{6DAD5C65-64CA-4ED4-B609-2D068F021024}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -191,6 +193,18 @@ Global
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x64.Build.0 = Release|Any CPU
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.ActiveCfg = Release|Any CPU
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -208,6 +222,7 @@ Global
{96E12F19-B25A-415E-B965-F9DE8D713C67} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{ADCF7D08-F2EE-4DFD-A96A-7E0134A1546F} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{6DAD5C65-64CA-4ED4-B609-2D068F021024} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {0EDE6697-0F13-4DB1-AC56-12C15A72D395}
Expand Down
6 changes: 6 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
### 5.1.2+21fd6e8 (Released 2024-1-30)
* Additions:
* [[#21fd6e8](https://github.com/CSBiology/FsSpreadsheet/commit/21fd6e82f6d9eab48b4197a56a5c9b9f06b51b0e)] improve reader speed by adjusting sharedStringTable usage
* Bugfixes:
* [[#66b6713](https://github.com/CSBiology/FsSpreadsheet/commit/66b6713381be3f133c58db7ca78fd36bd63c0ad1)] fix npm publish target

### 5.1.1+e7cc638 (Released 2024-1-26)
* Additions:
* [[#e7cc638](https://github.com/CSBiology/FsSpreadsheet/commit/e7cc638b170c570005b6cd8378d1e0ac31075be7)] improve rowWithRange SkipSearch performance
Expand Down
8 changes: 4 additions & 4 deletions build/ReleaseTasks.fs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ open Fake.Tools
open Fake.IO
open Fake.IO.Globbing.Operators

let createTag = BuildTask.create "CreateTag" [clean; build; runTests; pack] {
let createTag = BuildTask.create "CreateTag" [clean; build; runTests] {
if promptYesNo (sprintf "tagging branch with %s OK?" stableVersionTag ) then
Git.Branches.tag "" stableVersionTag
Git.Branches.pushTag "" projectRepo stableVersionTag
else
failwith "aborted"
}

let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests; packPrerelease] {
let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests] {
if promptYesNo (sprintf "tagging branch with %s OK?" prereleaseTag ) then
Git.Branches.tag "" prereleaseTag
Git.Branches.pushTag "" projectRepo prereleaseTag
Expand All @@ -32,7 +32,7 @@ let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseT
}


let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack] {
let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; packDotNet] {
let targets = (!! (sprintf "%s/*.*pkg" pkgDir ))
for target in targets do printfn "%A" target
let msg = sprintf "[.NET] release package with version %s?" stableVersionTag
Expand All @@ -45,7 +45,7 @@ let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack
else failwith "aborted"
}

let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packPrerelease] {
let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packDotNetPrerelease] {
let targets = (!! (sprintf "%s/*.*pkg" pkgDir ))
for target in targets do printfn "%A" target
let msg = sprintf "[.NET] release package with version %s?" prereleaseTag
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@fslab/fsspreadsheet",
"version": "5.1.1",
"version": "5.1.2",
"description": "Minimal spreadsheet creation and manipulation using exceljs io.",
"type": "module",
"main": "Xlsx.js",
Expand Down
19 changes: 8 additions & 11 deletions src/FsSpreadsheet.ExcelIO/Cell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ module Cell =
/// <summary>
/// Maps a Cell to the value string using a shared string table.
/// </summary>
let tryGetValue (sharedStringTable:SharedStringTable Option) (cell:Cell) =
let tryGetValue (sharedStringTable:SST Option) (cell:Cell) =
match cell |> tryGetType with
| Some (CellValues.SharedString) when sharedStringTable.IsSome->
let sharedStringTable = sharedStringTable.Value
Expand All @@ -251,8 +251,7 @@ module Cell =
|> Option.map (
CellValue.getValue
>> int
>> fun i -> SharedStringTable.getText i sharedStringTable
>> SharedStringTable.SharedStringItem.getText
>> fun i -> sharedStringTable.[i]
)

| _ ->
Expand All @@ -263,19 +262,17 @@ module Cell =
/// <summary>
/// Maps a Cell to the value string using a sharedStringTable.
/// </summary>
let getValue (sharedStringTable : SharedStringTable Option) (cell : Cell) =
let getValue (sharedStringTable : SST Option) (cell : Cell) =
match cell |> tryGetType with
| Some (CellValues.SharedString) when sharedStringTable.IsSome->
| Some (CellValues.SharedString) when sharedStringTable.IsSome ->
let sharedStringTable = sharedStringTable.Value
let sharedStringTableIndex =
cell
|> getCellValue
|> CellValue.getValue
|> int

sharedStringTable
|> SharedStringTable.getText sharedStringTableIndex
|> SharedStringTable.SharedStringItem.getText
sharedStringTable.[sharedStringTableIndex]
| _ ->
cell
|> getCellValue
Expand All @@ -291,15 +288,15 @@ module Cell =
/// <summary>
/// Includes a value from the sharedStringTable in Cell.CellValue.Text.
/// </summary>
let includeSharedStringValue (sharedStringTable:SharedStringTable) (cell:Cell) =
let includeSharedStringValue (sharedStringTable:SST) (cell:Cell) =
if not (isNull cell.DataType) then
match cell |> tryGetType with
| Some (CellValues.SharedString) ->
let index = int cell.InnerText
match sharedStringTable |> Seq.tryItem index with
match sharedStringTable |> Array.tryItem index with
| Some value ->
cell.DataType <- EnumValue(CellValues.String)
cell.CellValue.Text <- value.InnerText
cell.CellValue.Text <- value
| None ->
cell.CellValue.Text <- cell.InnerText
cell
Expand Down
46 changes: 41 additions & 5 deletions src/FsSpreadsheet.ExcelIO/FsExtensions.fs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ module FsExtensions =
/// <summary>
/// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value.
/// </summary>
static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (xlsxCell : Cell) =
let sst = Spreadsheet.tryGetSharedStringTable doc
static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) =
let cellValueString = Cell.getValue sst xlsxCell
let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices
let dt =
Expand Down Expand Up @@ -90,6 +89,42 @@ module FsExtensions =
//let dt, v = DataType.InferCellValue v
FsCell.createWithDataType dt (int row) (int col) (cellValue)

/// <summary>
/// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value.
/// </summary>
static member tryOfXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) =
Cell.tryGetValue sst xlsxCell
|> Option.map (fun cellValueString ->
let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices
let dt =
try DataType.ofXlsXCell doc xlsxCell
with _ -> DataType.Number // default is number
let mutable cellValue : obj = cellValueString
match dt with
| Date ->
try
// datetime is written as float counting days since 1900.
// We use the .NET helper because we really do not want to deal with datetime issues.
cellValue <- System.DateTime.FromOADate(float cellValueString)
with
| _ -> ()
| Boolean ->
// boolean is written as int/float either 0 or null
match cellValueString.ToLower() with
| "1" | "true" -> cellValue <- true
| "0" | "false" -> cellValue <- false
| _ -> ()
| Number ->
try
cellValue <- float cellValueString
with
| _ ->
()
| Empty | String -> ()
//let dt, v = DataType.InferCellValue v
FsCell.createWithDataType dt (int row) (int col) (cellValue)
)

static member toXlsxCell (doc : Packaging.SpreadsheetDocument) (cell : FsCell) =
Cell.fromValueWithDataType doc (uint32 cell.ColumnNumber) (uint32 cell.RowNumber) (cell.ValueAsString()) cell.DataType

Expand Down Expand Up @@ -201,7 +236,7 @@ module FsExtensions =
/// Creates an FsWorkbook from a given SpreadsheetDocument.
/// </summary>
static member fromSpreadsheetDocument (doc : Packaging.SpreadsheetDocument) =
let sst = Spreadsheet.tryGetSharedStringTable doc
let sst = Spreadsheet.tryGetSharedStringTable doc |> Option.map SharedStringTable.toSST
let xlsxWorkbookPart = Spreadsheet.getWorkbookPart doc
let xlsxSheets =
try
Expand All @@ -227,8 +262,9 @@ module FsExtensions =
fun xlsxSheet ->
let sheetId = Sheet.getID xlsxSheet
let xlsxCells =
Spreadsheet.getCellsBySheetID sheetId doc
|> Seq.map (FsCell.ofXlsxCell doc)
Spreadsheet.getCellsBySheetID sheetId doc true
|> Seq.toArray
|> Array.choose (FsCell.tryOfXlsxCell doc sst)
let assocXlsxTables =
xlsxTables
|> Seq.tryPick (fun (sid,ts) -> if sid = sheetId then Some ts else None)
Expand Down
2 changes: 1 addition & 1 deletion src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="DocumentFormat.OpenXml" Version="[2.12.3]" />
<PackageReference Include="DocumentFormat.OpenXml" Version="2.16.0" />
<None Include="..\..\docs\img\logo.png" Pack="true" PackagePath="\" />
</ItemGroup>

Expand Down
12 changes: 6 additions & 6 deletions src/FsSpreadsheet.ExcelIO/Row.fs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ module Row =
row)

/// If the row contains a value at the given index, returns it. Returns none if not.
let tryGetValueAt (sst : SharedStringTable Option) index (row : Row) =
let tryGetValueAt (sst : SST Option) index (row : Row) =
row
|> tryGetCellAt index
|> Option.bind (Cell.tryGetValue sst)
Expand Down Expand Up @@ -257,7 +257,7 @@ module Row =
|> extendSpanRight offset

/// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable.
let getIndexedValues (sst : SharedStringTable Option) (row : Row) =
let getIndexedValues (sst : SST Option) (row : Row) =
row
|> toCellSeq
|> Seq.choose (fun cell ->
Expand All @@ -273,18 +273,18 @@ module Row =


/// Maps the cells of the given row to the value strings.
let getRowValues (sst : SharedStringTable Option) (row : Row) =
let getRowValues (sst : SST Option) (row : Row) =
row
|> toCellSeq
|> Seq.choose (Cell.tryGetValue sst)

/// Maps each cell of the given row to each respective value strings if it exists, else returns None.
let tryGetRowValues (sst : SharedStringTable option) (row : Row) =
let tryGetRowValues (sst : SST option) (row : Row) =
toCellSeq row
|> Seq.map (Cell.tryGetValue sst)

/// Maps the cells of the given row to the value strings for all existing cells.
let getPresentRowValues (sst : SharedStringTable option) (row : Row) =
let getPresentRowValues (sst : SST option) (row : Row) =
toCellSeq row
|> Seq.choose (Cell.tryGetValue sst)

Expand Down Expand Up @@ -369,6 +369,6 @@ module Row =
|> appendCell cell

/// Includes a value from a sharedStringTable in the cells of the row.
let includeSharedStringValue (sst : SharedStringTable) (row : Row) =
let includeSharedStringValue (sst : SST) (row : Row) =
row
|> mapCells (Cell.includeSharedStringValue sst)
7 changes: 6 additions & 1 deletion src/FsSpreadsheet.ExcelIO/SharedStringTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ open DocumentFormat.OpenXml.Packaging
open DocumentFormat.OpenXml


type SST = string []

/// Functions for working with SharedStringTables.
module SharedStringTable =

Expand Down Expand Up @@ -99,6 +101,9 @@ module SharedStringTable =
try spreadsheetDocument.WorkbookPart.SharedStringTablePart.SharedStringTable |> Some
with | _ -> None


let toSST (sst : SharedStringTable) =
sst.Elements<SharedStringItem>()
|> Seq.toArray
|> Array.map SharedStringItem.getText


14 changes: 7 additions & 7 deletions src/FsSpreadsheet.ExcelIO/SheetData.fs
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,19 @@ module SheetData =


/// Gets the string value of the cell at the given 1-based column and rowIndex, if it exists, else returns None.
let tryGetRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let tryGetRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> tryGetRowAt rowIndex
|> Option.map (Row.getRowValues sst)

/// Gets the string values of the row at the given 1-based rowIndex.
let getRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let getRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> getRowAt rowIndex
|> Row.getRowValues sst

/// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable, if it exists, else returns None.
let tryGetIndexedRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let tryGetIndexedRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> tryGetRowAt rowIndex
|> Option.map (Row.getIndexedValues sst)
Expand Down Expand Up @@ -214,13 +214,13 @@ module SheetData =
|> Row.getCellAt columnIndex

/// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable.
let getCellValueAt (sst : SharedStringTable Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) =
let getCellValueAt (sst : SST Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) =
sheetData
|> getCellAt rowIndex columnIndex
|> Cell.getValue sst

/// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable if it exists. Else returns None.
let tryGetCellValueAt (sst : SharedStringTable Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) =
let tryGetCellValueAt (sst : SST Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) =
sheetData
|> tryGetCellAt rowIndex columnIndex
|> Option.bind (Cell.tryGetValue sst)
Expand Down Expand Up @@ -279,7 +279,7 @@ module SheetData =
sheet

/// Includes a value from sharedStringTable in the cells of the rows of the sheetData
let includeSharedStringValue (sharedStringTable : SharedStringTable) (sheetData : SheetData) =
let includeSharedStringValue (sharedStringTable : SST) (sheetData : SheetData) =
sheetData
|> mapRows (Row.includeSharedStringValue sharedStringTable)

Expand All @@ -288,7 +288,7 @@ module SheetData =
//----------------------------------------------------------------------------------------------------------------------

/// Reads the values of all cells from a sheetData and a sharedStringTable and converts them into a sparse matrix. Values are stored sparsely in a dictionary, with the key being a row index and column index tuple.
let toSparseValueMatrix (sst : SharedStringTable) sheetData =
let toSparseValueMatrix (sst : SST) sheetData =
let rows = getRows sheetData
let noOfRows = countRows sheetData
let noOfCols =
Expand Down
Loading

0 comments on commit 5c18c78

Please sign in to comment.