Skip to content

Commit

Permalink
improve reader speed by adjusting sharedStringTable usage
Browse files Browse the repository at this point in the history
  • Loading branch information
HLWeil committed Jan 30, 2024
1 parent 66b6713 commit 21fd6e8
Show file tree
Hide file tree
Showing 15 changed files with 130 additions and 44 deletions.
15 changes: 15 additions & 0 deletions FsSpreadsheet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "JS", "JS", "{ADCF7D08-F2EE-
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TestUtils", "tests\TestUtils\TestUtils.fsproj", "{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Speedtest", "tests\Speedtest\Speedtest.fsproj", "{6DAD5C65-64CA-4ED4-B609-2D068F021024}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -191,6 +193,18 @@ Global
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x64.Build.0 = Release|Any CPU
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.ActiveCfg = Release|Any CPU
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.ActiveCfg = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.Build.0 = Debug|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.Build.0 = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.ActiveCfg = Release|Any CPU
{6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -208,6 +222,7 @@ Global
{96E12F19-B25A-415E-B965-F9DE8D713C67} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{ADCF7D08-F2EE-4DFD-A96A-7E0134A1546F} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{60678E53-EDC4-4ADE-A9EE-B194BDC76B37} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
{6DAD5C65-64CA-4ED4-B609-2D068F021024} = {F77AD108-C6B4-46BB-B7BC-13573F45F876}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {0EDE6697-0F13-4DB1-AC56-12C15A72D395}
Expand Down
8 changes: 4 additions & 4 deletions build/ReleaseTasks.fs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ open Fake.Tools
open Fake.IO
open Fake.IO.Globbing.Operators

let createTag = BuildTask.create "CreateTag" [clean; build; runTests; pack] {
let createTag = BuildTask.create "CreateTag" [clean; build; runTests] {
if promptYesNo (sprintf "tagging branch with %s OK?" stableVersionTag ) then
Git.Branches.tag "" stableVersionTag
Git.Branches.pushTag "" projectRepo stableVersionTag
else
failwith "aborted"
}

let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests; packPrerelease] {
let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests] {
if promptYesNo (sprintf "tagging branch with %s OK?" prereleaseTag ) then
Git.Branches.tag "" prereleaseTag
Git.Branches.pushTag "" projectRepo prereleaseTag
Expand All @@ -32,7 +32,7 @@ let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseT
}


let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack] {
let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; packDotNet] {
let targets = (!! (sprintf "%s/*.*pkg" pkgDir ))
for target in targets do printfn "%A" target
let msg = sprintf "[.NET] release package with version %s?" stableVersionTag
Expand All @@ -45,7 +45,7 @@ let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack
else failwith "aborted"
}

let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packPrerelease] {
let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packDotNetPrerelease] {
let targets = (!! (sprintf "%s/*.*pkg" pkgDir ))
for target in targets do printfn "%A" target
let msg = sprintf "[.NET] release package with version %s?" prereleaseTag
Expand Down
17 changes: 7 additions & 10 deletions src/FsSpreadsheet.ExcelIO/Cell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ module Cell =
/// <summary>
/// Maps a Cell to the value string using a shared string table.
/// </summary>
let tryGetValue (sharedStringTable:SharedStringTable Option) (cell:Cell) =
let tryGetValue (sharedStringTable:SST Option) (cell:Cell) =
match cell |> tryGetType with
| Some (CellValues.SharedString) when sharedStringTable.IsSome->
let sharedStringTable = sharedStringTable.Value
Expand All @@ -251,8 +251,7 @@ module Cell =
|> Option.map (
CellValue.getValue
>> int
>> fun i -> SharedStringTable.getText i sharedStringTable
>> SharedStringTable.SharedStringItem.getText
>> fun i -> sharedStringTable.[i]
)

| _ ->
Expand All @@ -263,7 +262,7 @@ module Cell =
/// <summary>
/// Maps a Cell to the value string using a sharedStringTable.
/// </summary>
let getValue (sharedStringTable : SharedStringTable Option) (cell : Cell) =
let getValue (sharedStringTable : SST Option) (cell : Cell) =
match cell |> tryGetType with
| Some (CellValues.SharedString) when sharedStringTable.IsSome->
let sharedStringTable = sharedStringTable.Value
Expand All @@ -273,9 +272,7 @@ module Cell =
|> CellValue.getValue
|> int

sharedStringTable
|> SharedStringTable.getText sharedStringTableIndex
|> SharedStringTable.SharedStringItem.getText
sharedStringTable.[sharedStringTableIndex]
| _ ->
cell
|> getCellValue
Expand All @@ -291,15 +288,15 @@ module Cell =
/// <summary>
/// Includes a value from the sharedStringTable in Cell.CellValue.Text.
/// </summary>
let includeSharedStringValue (sharedStringTable:SharedStringTable) (cell:Cell) =
let includeSharedStringValue (sharedStringTable:SST) (cell:Cell) =
if not (isNull cell.DataType) then
match cell |> tryGetType with
| Some (CellValues.SharedString) ->
let index = int cell.InnerText
match sharedStringTable |> Seq.tryItem index with
match sharedStringTable |> Array.tryItem index with
| Some value ->
cell.DataType <- EnumValue(CellValues.String)
cell.CellValue.Text <- value.InnerText
cell.CellValue.Text <- value
| None ->
cell.CellValue.Text <- cell.InnerText
cell
Expand Down
8 changes: 4 additions & 4 deletions src/FsSpreadsheet.ExcelIO/FsExtensions.fs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ module FsExtensions =
/// <summary>
/// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value.
/// </summary>
static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (xlsxCell : Cell) =
let sst = Spreadsheet.tryGetSharedStringTable doc
static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) =
let cellValueString = Cell.getValue sst xlsxCell
let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices
let dt =
Expand Down Expand Up @@ -201,7 +200,7 @@ module FsExtensions =
/// Creates an FsWorkbook from a given SpreadsheetDocument.
/// </summary>
static member fromSpreadsheetDocument (doc : Packaging.SpreadsheetDocument) =
let sst = Spreadsheet.tryGetSharedStringTable doc
let sst = Spreadsheet.tryGetSharedStringTable doc |> Option.map SharedStringTable.toSST
let xlsxWorkbookPart = Spreadsheet.getWorkbookPart doc
let xlsxSheets =
try
Expand All @@ -228,7 +227,8 @@ module FsExtensions =
let sheetId = Sheet.getID xlsxSheet
let xlsxCells =
Spreadsheet.getCellsBySheetID sheetId doc
|> Seq.map (FsCell.ofXlsxCell doc)
|> Seq.toArray
|> Array.map (FsCell.ofXlsxCell doc sst)
let assocXlsxTables =
xlsxTables
|> Seq.tryPick (fun (sid,ts) -> if sid = sheetId then Some ts else None)
Expand Down
2 changes: 1 addition & 1 deletion src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="DocumentFormat.OpenXml" Version="[2.12.3]" />
<PackageReference Include="DocumentFormat.OpenXml" Version="2.16.0" />
<None Include="..\..\docs\img\logo.png" Pack="true" PackagePath="\" />
</ItemGroup>

Expand Down
12 changes: 6 additions & 6 deletions src/FsSpreadsheet.ExcelIO/Row.fs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ module Row =
row)

/// If the row contains a value at the given index, returns it. Returns none if not.
let tryGetValueAt (sst : SharedStringTable Option) index (row : Row) =
let tryGetValueAt (sst : SST Option) index (row : Row) =
row
|> tryGetCellAt index
|> Option.bind (Cell.tryGetValue sst)
Expand Down Expand Up @@ -257,7 +257,7 @@ module Row =
|> extendSpanRight offset

/// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable.
let getIndexedValues (sst : SharedStringTable Option) (row : Row) =
let getIndexedValues (sst : SST Option) (row : Row) =
row
|> toCellSeq
|> Seq.choose (fun cell ->
Expand All @@ -273,18 +273,18 @@ module Row =


/// Maps the cells of the given row to the value strings.
let getRowValues (sst : SharedStringTable Option) (row : Row) =
let getRowValues (sst : SST Option) (row : Row) =
row
|> toCellSeq
|> Seq.choose (Cell.tryGetValue sst)

/// Maps each cell of the given row to each respective value strings if it exists, else returns None.
let tryGetRowValues (sst : SharedStringTable option) (row : Row) =
let tryGetRowValues (sst : SST option) (row : Row) =
toCellSeq row
|> Seq.map (Cell.tryGetValue sst)

/// Maps the cells of the given row to the value strings for all existing cells.
let getPresentRowValues (sst : SharedStringTable option) (row : Row) =
let getPresentRowValues (sst : SST option) (row : Row) =
toCellSeq row
|> Seq.choose (Cell.tryGetValue sst)

Expand Down Expand Up @@ -369,6 +369,6 @@ module Row =
|> appendCell cell

/// Includes a value from a sharedStringTable in the cells of the row.
let includeSharedStringValue (sst : SharedStringTable) (row : Row) =
let includeSharedStringValue (sst : SST) (row : Row) =
row
|> mapCells (Cell.includeSharedStringValue sst)
7 changes: 6 additions & 1 deletion src/FsSpreadsheet.ExcelIO/SharedStringTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ open DocumentFormat.OpenXml.Packaging
open DocumentFormat.OpenXml


type SST = string []

/// Functions for working with SharedStringTables.
module SharedStringTable =

Expand Down Expand Up @@ -99,6 +101,9 @@ module SharedStringTable =
try spreadsheetDocument.WorkbookPart.SharedStringTablePart.SharedStringTable |> Some
with | _ -> None


let toSST (sst : SharedStringTable) =
sst.Elements<SharedStringItem>()
|> Seq.toArray
|> Array.map SharedStringItem.getText


14 changes: 7 additions & 7 deletions src/FsSpreadsheet.ExcelIO/SheetData.fs
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,19 @@ module SheetData =


/// Gets the string value of the cell at the given 1-based column and rowIndex, if it exists, else returns None.
let tryGetRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let tryGetRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> tryGetRowAt rowIndex
|> Option.map (Row.getRowValues sst)

/// Gets the string values of the row at the given 1-based rowIndex.
let getRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let getRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> getRowAt rowIndex
|> Row.getRowValues sst

/// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable, if it exists, else returns None.
let tryGetIndexedRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) =
let tryGetIndexedRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) =
sheet
|> tryGetRowAt rowIndex
|> Option.map (Row.getIndexedValues sst)
Expand Down Expand Up @@ -214,13 +214,13 @@ module SheetData =
|> Row.getCellAt columnIndex

/// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable.
let getCellValueAt (sst : SharedStringTable Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) =
let getCellValueAt (sst : SST Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) =
sheetData
|> getCellAt rowIndex columnIndex
|> Cell.getValue sst

/// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable if it exists. Else returns None.
let tryGetCellValueAt (sst : SharedStringTable Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) =
let tryGetCellValueAt (sst : SST Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) =
sheetData
|> tryGetCellAt rowIndex columnIndex
|> Option.bind (Cell.tryGetValue sst)
Expand Down Expand Up @@ -279,7 +279,7 @@ module SheetData =
sheet

/// Includes a value from sharedStringTable in the cells of the rows of the sheetData
let includeSharedStringValue (sharedStringTable : SharedStringTable) (sheetData : SheetData) =
let includeSharedStringValue (sharedStringTable : SST) (sheetData : SheetData) =
sheetData
|> mapRows (Row.includeSharedStringValue sharedStringTable)

Expand All @@ -288,7 +288,7 @@ module SheetData =
//----------------------------------------------------------------------------------------------------------------------

/// Reads the values of all cells from a sheetData and a sharedStringTable and converts them into a sparse matrix. Values are stored sparsely in a dictionary, with the key being a row index and column index tuple.
let toSparseValueMatrix (sst : SharedStringTable) sheetData =
let toSparseValueMatrix (sst : SST) sheetData =
let rows = getRows sheetData
let noOfRows = countRows sheetData
let noOfCols =
Expand Down
8 changes: 6 additions & 2 deletions src/FsSpreadsheet.ExcelIO/Spreadsheet.fs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ module Spreadsheet =
let workbookPart = spreadsheetDocument.WorkbookPart
let worksheetPart = Worksheet.WorksheetPart.getByID sheet.Id.Value workbookPart
let stringTablePart = getOrInitSharedStringTablePart spreadsheetDocument
let sst = SharedStringTable.toSST stringTablePart.SharedStringTable
seq {
use reader = OpenXmlReader.Create(worksheetPart)

Expand All @@ -183,7 +184,7 @@ module Spreadsheet =
row.Elements()
|> Seq.iter (fun item ->
let cell = item :?> Cell
Cell.includeSharedStringValue stringTablePart.SharedStringTable cell |> ignore
Cell.includeSharedStringValue sst cell |> ignore
)
yield row
}
Expand All @@ -195,9 +196,12 @@ module Spreadsheet =
let getCellsBySheet (sheet : Sheet) (spreadsheetDocument : SpreadsheetDocument) =
let workbookPart = spreadsheetDocument.WorkbookPart
let worksheetPart = Worksheet.WorksheetPart.getByID sheet.Id.Value workbookPart

let includeSSV =
match tryGetSharedStringTable spreadsheetDocument with
| Some sst -> Cell.includeSharedStringValue sst
| Some sst ->
let sstArray = sst |> SharedStringTable.toSST
Cell.includeSharedStringValue sstArray
| None -> id
seq {
use reader = OpenXmlReader.Create(worksheetPart)
Expand Down
10 changes: 5 additions & 5 deletions src/FsSpreadsheet.ExcelIO/Table.fs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ module Table =
worksheetPart

/// Create a table object by an area. If the first row of this area contains values in the given sheet, these are chosen as headers for the table and a table is returned.
let tryCreateWithExistingHeaders (sst : SharedStringTable Option) sheetData name area =
let tryCreateWithExistingHeaders (sst : SST Option) sheetData name area =
if Area.isCorrect area then
try
let columns =
Expand Down Expand Up @@ -301,7 +301,7 @@ module Table =
|> Seq.tryFind (TableColumn.getName >> (=) name)

/// If a column with the given header exists in the table, returns its values. Else returns None.
let tryGetColumnValuesByColumnHeader (sst : SharedStringTable Option) sheetData columnHeader (table : Table) =
let tryGetColumnValuesByColumnHeader (sst : SST Option) sheetData columnHeader (table : Table) =
let area = getArea table
table.TableColumns
|> TableColumns.getTableColumns
Expand All @@ -313,7 +313,7 @@ module Table =
)

/// If a column with the given header exists in the table, returns its indexed values. Else returns None.
let tryGetIndexedColumnValuesByColumnHeader (sst : SharedStringTable Option) sheetData columnHeader (table : Table) =
let tryGetIndexedColumnValuesByColumnHeader (sst : SST Option) sheetData columnHeader (table : Table) =
let area = getArea table
table.TableColumns
|> TableColumns.getTableColumns
Expand All @@ -330,7 +330,7 @@ module Table =
)

/// If a key column and a value with the given header exist in the table, returns a tuple list of keys and values (else returns None). Missing values get replaced with the given default value.
let tryGetKeyValuesByColumnHeaders (sst : SharedStringTable Option) sheetData keyColHeader valColHeader defaultValue (table : Table) =
let tryGetKeyValuesByColumnHeaders (sst : SST Option) sheetData keyColHeader valColHeader defaultValue (table : Table) =
let area = getArea table
let tableCols =
table.TableColumns
Expand All @@ -349,7 +349,7 @@ module Table =
| _ -> None

/// Reads a complete table. Values are stored sparsely in a dictionary, with the key being a row index and column header tuple.
let toSparseValueMatrix (sst : SharedStringTable Option) sheetData (table : Table) =
let toSparseValueMatrix (sst : SST Option) sheetData (table : Table) =
let area = getArea table
let dictionary = System.Collections.Generic.Dictionary<int*string,string>()
[Area.leftBoundary area .. Area.rightBoundary area]
Expand Down
3 changes: 2 additions & 1 deletion tests/FsSpreadsheet.ExcelIO.Tests/FsWorkbook.fs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ let performance =
sw.Start()
let wb = FsWorkbook.fromXlsxFile(p)
sw.Stop()
Expect.isLessThan sw.Elapsed.Milliseconds 2000 "Elapsed time should be less than 2000ms"
let elapsed = sw.Elapsed.Milliseconds
Expect.isLessThan elapsed 2000 $"Elapsed time should be less than 2000ms, but was {elapsed}ms"
Expect.equal (wb.GetWorksheetAt(1).Rows.Count) 153991 "Row count should be 153991"

)
Expand Down
4 changes: 2 additions & 2 deletions tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Cell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ let testFilePath = System.IO.Path.Combine(__SOURCE_DIRECTORY__, "../data", "test
let ssdFox = Packaging.SpreadsheetDocument.Open(testFilePath, false)
let wbpFox = ssdFox.WorkbookPart
let sstpFox = wbpFox.SharedStringTablePart
let sstFox = sstpFox.SharedStringTable
let sstFoxInnerText = sstFox.InnerText
let sstFox = SharedStringTable.toSST sstpFox.SharedStringTable
let sstFoxInnerText = sstpFox.SharedStringTable.InnerText
let wsp1Fox = (wbpFox.WorksheetParts |> Array.ofSeq)[0]
let cbsi1Fox = wsp1Fox.Worksheet.Descendants<Spreadsheet.Cell>() |> Array.ofSeq
let nullCell = Cell.create (Some Spreadsheet.CellValues.Error) "A1" (Cell.CellValue.create "")
Expand Down
Loading

0 comments on commit 21fd6e8

Please sign in to comment.