Skip to content

Commit

Permalink
refactoring and dependencies version bump
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack Dermody committed Feb 5, 2024
1 parent 87b6f5f commit 2012398
Show file tree
Hide file tree
Showing 47 changed files with 893 additions and 284 deletions.
2 changes: 1 addition & 1 deletion Benchmarks/Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
<PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion BrightData.Cuda/BrightData.Cuda.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.2.1" />
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.2.2" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion BrightData.Parquet/BrightData.Parquet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Parquet.Net" Version="4.16.2" />
<PackageReference Include="Parquet.Net" Version="4.23.4" />
</ItemGroup>

<ItemGroup>
Expand Down
18 changes: 9 additions & 9 deletions BrightData.Parquet/ExtensionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace BrightData.Parquet
{
public static class ExtensionMethods
{
public static async Task<IDataTable> LoadFromParquet(this BrightDataContext context, Stream inputStream, Stream? outputStream)
public static async Task<IDataTable> CreateTableFromParquet(this BrightDataContext context, Stream inputStream, Stream? outputStream)
{
var reader = await ParquetReader.CreateAsync(inputStream);
var fields = reader.Schema.DataFields;
Expand Down Expand Up @@ -70,7 +70,9 @@ public static async Task<IDataTable> LoadFromParquet(this BrightDataContext cont

public static async Task WriteAsParquet(this IDataTable dataTable, Stream output)
{
var fields = dataTable.ColumnMetaData.Zip(dataTable.ColumnTypes).Select((x, i) => new DataField(x.First.GetName($"Column {i + 1}"), x.Second.GetDataType(), false, false));
var fields = dataTable.ColumnMetaData
.Zip(dataTable.ColumnTypes).Select((x, i) => new DataField(x.First.GetName($"Column {i + 1}"), x.Second.GetDataType(), false, false))
.ToArray();
var schema = new ParquetSchema(fields);
var columns = dataTable.GetColumns();
var firstColumn = columns[0];
Expand All @@ -79,18 +81,16 @@ public static async Task WriteAsParquet(this IDataTable dataTable, Stream output
using var writer = await ParquetWriter.CreateAsync(schema, output);
writer.CompressionMethod = CompressionMethod.Gzip;
writer.CompressionLevel = System.IO.Compression.CompressionLevel.Optimal;
for (var i = 0; i < firstColumn.BlockCount; i++) {
using ParquetRowGroupWriter blockWriter = writer.CreateRowGroup();
for (uint i = 0; i < firstColumn.BlockCount; i++) {
using var blockWriter = writer.CreateRowGroup();
foreach (var column in columns) {

var metaData = column.MetaData.AllKeys.ToDictionary(x => x, x => column.MetaData.Get(x)?.ToString() ?? "");
var array = await column.GetBlock(i);
await blockWriter.WriteColumnAsync(new DataColumn(fields[i], array), metaData);
}
//await groupWriter.WriteColumnAsync(idColumn);
//await groupWriter.WriteColumnAsync(cityColumn);
}
}

//static DataColumn GetColumnBlock()

static ICompositeBuffer CreateColumn(ParquetRowGroupReader reader, DataField field, MetaData columnMetaData, IBuildDataTables builder)
{
foreach(var (key, value) in reader.GetCustomMetadata(field))
Expand Down
10 changes: 5 additions & 5 deletions BrightData.UnitTests/BrightData.UnitTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="FluentAssertions" Version="6.10.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
<PackageReference Include="FluentAssertions" Version="6.12.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="MKL.NET.win-x64" Version="2022.0.0.115" />
<PackageReference Include="xunit" Version="2.4.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
<PackageReference Include="xunit" Version="2.6.6" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="coverlet.collector" Version="3.2.0">
<PackageReference Include="coverlet.collector" Version="6.0.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand Down
4 changes: 2 additions & 2 deletions BrightData.UnitTests/DataTableTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ public async Task TableConfusionMatrix()
converted.ColumnTypes[0].Should().Be(columnType);
converted.ColumnTypes[1].Should().Be(columnType);

await foreach (var (b1, b2) in converted.Enumerate<T, T>())
b1.Should().Be(b2);
await foreach (var row in converted.Enumerate<T, T>())
row.C1.Should().Be(row.C2);
}

[Fact]
Expand Down
2 changes: 1 addition & 1 deletion BrightData/BrightData.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.2.1" />
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.2.2" />
<PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="6.0.0" />
</ItemGroup>

Expand Down
97 changes: 37 additions & 60 deletions BrightData/BrightData.xml
Original file line number Diff line number Diff line change
Expand Up @@ -715,64 +715,6 @@
<param name="input"></param>
<param name="output"></param>
</member>
<member name="T:BrightData.Buffer.Operations.Conversion.NumericUnmanagedConversion`2">
<summary>
Converts numbers to unmanaged types
</summary>
<typeparam name="FT"></typeparam>
<typeparam name="T"></typeparam>
<param name="input"></param>
<param name="output"></param>
</member>
<member name="M:BrightData.Buffer.Operations.Conversion.NumericUnmanagedConversion`2.#ctor(BrightData.IReadOnlyBuffer{`0},BrightData.IAppendToBuffer{`1})">
<summary>
Converts numbers to unmanaged types
</summary>
<typeparam name="FT"></typeparam>
<typeparam name="T"></typeparam>
<param name="input"></param>
<param name="output"></param>
</member>
<member name="T:BrightData.Buffer.Operations.Conversion.OneHotConversion`1">
<summary>
One hot conversion
</summary>
<typeparam name="T"></typeparam>
<param name="input"></param>
<param name="indexer"></param>
<param name="output"></param>
</member>
<member name="M:BrightData.Buffer.Operations.Conversion.OneHotConversion`1.#ctor(BrightData.IReadOnlyBuffer{`0},BrightData.ICanIndex{`0},BrightData.IAppendToBuffer{BrightData.LinearAlgebra.ReadOnly.ReadOnlyVector})">
<summary>
One hot conversion
</summary>
<typeparam name="T"></typeparam>
<param name="input"></param>
<param name="indexer"></param>
<param name="output"></param>
</member>
<member name="T:BrightData.Buffer.Operations.Conversion.ToCategoricalIndexConversion`1">
<summary>
Maps each item to a consistent index (category)
</summary>
<typeparam name="T"></typeparam>
</member>
<member name="T:BrightData.Buffer.Operations.Conversion.ToStringConversion`1">
<summary>
Converts to a string
</summary>
<typeparam name="T"></typeparam>
<param name="input"></param>
<param name="output"></param>
</member>
<member name="M:BrightData.Buffer.Operations.Conversion.ToStringConversion`1.#ctor(BrightData.IReadOnlyBuffer{`0},BrightData.IAppendToBuffer{System.String})">
<summary>
Converts to a string
</summary>
<typeparam name="T"></typeparam>
<param name="input"></param>
<param name="output"></param>
</member>
<member name="T:BrightData.Buffer.Operations.Helper.SimpleNumericAnalysis`1">
<summary>
Casts to double to perform numerical analysis
Expand Down Expand Up @@ -843,13 +785,13 @@
Vectorisation of booleans
</summary>
</member>
<member name="T:BrightData.Buffer.Operations.Vectorisation.CategoricalIndexVectorisation`1">
<member name="T:BrightData.Buffer.Operations.Vectorisation.CategoricalIndexVectoriser`1">
<summary>
Vectorisation to single index
</summary>
<typeparam name="T"></typeparam>
</member>
<member name="M:BrightData.Buffer.Operations.Vectorisation.CategoricalIndexVectorisation`1.#ctor">
<member name="M:BrightData.Buffer.Operations.Vectorisation.CategoricalIndexVectoriser`1.#ctor">
<summary>
Vectorisation to single index
</summary>
Expand Down Expand Up @@ -955,6 +897,22 @@
<typeparam name="TT"></typeparam>
<param name="from"></param>
</member>
<member name="T:BrightData.Buffer.ReadOnly.Converter.CategoricalIndexConverter`1">
<summary>
Converts the values in the buffer to a single categorical index
</summary>
<typeparam name="T"></typeparam>
<param name="from"></param>
<param name="indexer"></param>
</member>
<member name="M:BrightData.Buffer.ReadOnly.Converter.CategoricalIndexConverter`1.#ctor(BrightData.IReadOnlyBuffer{`0},BrightData.ICanIndex{`0})">
<summary>
Converts the values in the buffer to a single categorical index
</summary>
<typeparam name="T"></typeparam>
<param name="from"></param>
<param name="indexer"></param>
</member>
<member name="T:BrightData.Buffer.ReadOnly.Converter.NormalizationConverter`1">
<summary>
Converts via a normalisation model
Expand Down Expand Up @@ -2910,6 +2868,14 @@
<param name="isCategorical"></param>
<returns></returns>
</member>
<member name="M:BrightData.ExtensionMethods.SetIsOneHot(BrightData.Types.MetaData,System.Boolean)">
<summary>
Sets this as one hot encoded
</summary>
<param name="metaData"></param>
<param name="isOneHotEncoded"></param>
<returns></returns>
</member>
<member name="M:BrightData.ExtensionMethods.SetName(BrightData.Types.MetaData,System.String)">
<summary>
Sets the name
Expand Down Expand Up @@ -7330,6 +7296,17 @@
<param name="ct">Cancellation token (optional)</param>
<returns></returns>
</member>
<member name="T:BrightData.ICanIndex">
<summary>
Maps objects to a consistent index
</summary>
</member>
<member name="M:BrightData.ICanIndex.GetMapping">
<summary>
Returns the mapping
</summary>
<returns></returns>
</member>
<member name="T:BrightData.ICanIndex`1">
<summary>
Maps objects of type T to an index
Expand Down
12 changes: 4 additions & 8 deletions BrightData/Buffer/Composite/CompositeBufferBase.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using BrightData.DataTable.Helper;
using BrightData.Helper;
using BrightData.Types;

Expand All @@ -14,7 +16,7 @@ namespace BrightData.Buffer.Composite
/// </summary>
/// <typeparam name="T"></typeparam>
/// <typeparam name="BT"></typeparam>
internal abstract class CompositeBufferBase<T, BT> : ICompositeBuffer<T>
internal abstract class CompositeBufferBase<T, BT> : TypedBufferBase<T>, ICompositeBuffer<T>
where T : notnull
where BT : ICompositeBufferBlock<T>
{
Expand Down Expand Up @@ -130,13 +132,7 @@ await foreach(var item in EnumerateAllTyped())
yield return item;
}

public async Task<ReadOnlyMemory<object>> GetBlock(uint blockIndex)
{
var block = await GetTypedBlock(blockIndex);
return block.AsObjects();
}

public virtual async Task<ReadOnlyMemory<T>> GetTypedBlock(uint blockIndex)
public override async Task<ReadOnlyMemory<T>> GetTypedBlock(uint blockIndex)
{
uint currentIndex = 0;

Expand Down

This file was deleted.

22 changes: 0 additions & 22 deletions BrightData/Buffer/Operations/Conversion/OneHotConversion.cs

This file was deleted.

This file was deleted.

14 changes: 0 additions & 14 deletions BrightData/Buffer/Operations/Conversion/ToStringConversion.cs

This file was deleted.

0 comments on commit 2012398

Please sign in to comment.