Skip to content

Commit

Permalink
Merge branch 'dotnet:main' into improvement6
Browse files Browse the repository at this point in the history
  • Loading branch information
Lehonti committed Aug 31, 2023
2 parents 609936c + aaf226c commit 547d22c
Show file tree
Hide file tree
Showing 38 changed files with 6,659 additions and 6,634 deletions.
29 changes: 25 additions & 4 deletions src/Microsoft.Data.Analysis/DataFrame.IO.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Data.Common;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

Expand Down Expand Up @@ -349,8 +350,8 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
private static DataFrame ReadCsvLinesIntoDataFrame(WrappedStreamReaderOrStringReader wrappedReader,
char separator = ',', bool header = true,
string[] columnNames = null, Type[] dataTypes = null,
long numberOfRowsToRead = -1, int guessRows = 10, bool addIndexColumn = false
)
long numberOfRowsToRead = -1, int guessRows = 10, bool addIndexColumn = false,
bool renameDuplicatedColumns = false)
{
if (dataTypes == null && guessRows <= 0)
{
Expand All @@ -376,6 +377,25 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
// First pass: schema and number of rows.
while ((fields = parser.ReadFields()) != null)
{
if (renameDuplicatedColumns)
{
var names = new Dictionary<string, int>();

for (int i = 0; i < fields.Length; i++)
{
if (names.TryGetValue(fields[i], out int index))
{
var newName = String.Format("{0}.{1}", fields[i], index);
names[fields[i]] = ++index;
fields[i] = newName;
}
else
{
names.Add(fields[i], 1);
}
}
}

if ((numberOfRowsToRead == -1) || rowline < numberOfRowsToRead)
{
if (linesForGuessType.Count < guessRows || (header && rowline == 0))
Expand Down Expand Up @@ -524,12 +544,13 @@ public TextReader GetTextReader()
/// <param name="guessRows">number of rows used to guess types</param>
/// <param name="addIndexColumn">add one column with the row index</param>
/// <param name="encoding">The character encoding. Defaults to UTF8 if not specified</param>
/// <param name="renameDuplicatedColumns">If set to true, columns with repeated names are auto-renamed.</param>
/// <returns><see cref="DataFrame"/></returns>
public static DataFrame LoadCsv(Stream csvStream,
char separator = ',', bool header = true,
string[] columnNames = null, Type[] dataTypes = null,
long numberOfRowsToRead = -1, int guessRows = 10, bool addIndexColumn = false,
Encoding encoding = null)
Encoding encoding = null, bool renameDuplicatedColumns = false)
{
if (!csvStream.CanSeek)
{
Expand All @@ -542,7 +563,7 @@ public TextReader GetTextReader()
}

WrappedStreamReaderOrStringReader wrappedStreamReaderOrStringReader = new WrappedStreamReaderOrStringReader(csvStream, encoding ?? Encoding.UTF8);
return ReadCsvLinesIntoDataFrame(wrappedStreamReaderOrStringReader, separator, header, columnNames, dataTypes, numberOfRowsToRead, guessRows, addIndexColumn);
return ReadCsvLinesIntoDataFrame(wrappedStreamReaderOrStringReader, separator, header, columnNames, dataTypes, numberOfRowsToRead, guessRows, addIndexColumn, renameDuplicatedColumns);
}

/// <summary>
Expand Down

0 comments on commit 547d22c

Please sign in to comment.