Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bep47 #526

Merged
merged 24 commits into from May 26, 2022
Merged

bep47 #526

Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
eddd97b
WIP: poking at bep0047
jpmikkers Apr 8, 2022
4035c26
Fix pad length calculation and unit test
jpmikkers Apr 9, 2022
476c5f5
Faster way to zero out padding buffers
jpmikkers Apr 9, 2022
3494a02
Also test pad file path
jpmikkers Apr 9, 2022
19e15be
Don't pad the last file
jpmikkers Apr 10, 2022
d4e9d7a
make padding files invisible to the higher layers, hashing not correc…
jpmikkers Apr 10, 2022
091e403
Correct padding property documentation
jpmikkers Apr 10, 2022
022aa32
Try to fix hashing for padded files
jpmikkers Apr 12, 2022
d4b7bf8
Empty files don't need padding
jpmikkers Apr 12, 2022
9c169c7
IPieceWriter implementations should only have to worry about non-padd…
jpmikkers Apr 12, 2022
d11dfef
Detect and reject pathological padding cases
jpmikkers Apr 12, 2022
002708a
TorrentFileTuple to internal
jpmikkers Apr 12, 2022
e927d6e
Hybrid loading and validation
jpmikkers Apr 13, 2022
c4845e2
md5sum test.
jpmikkers Apr 17, 2022
4c3d77c
copy over the pieces root hash too
alanmcgovern Apr 21, 2022
5a4ad14
Maintain the invariant that empty files are part of the same piece
alanmcgovern Apr 21, 2022
af4b6b2
Merge branch 'alanmcgovern:master' into bep0047
jpmikkers May 5, 2022
70ce1d0
[client] Fix a null warning
alanmcgovern May 22, 2022
527d3c1
[core] Fix MD5 hashing
alanmcgovern May 22, 2022
017a9eb
do md5sum for padding files
jpmikkers May 23, 2022
e043c93
Merge branch 'alanmcgovern:master' into bep0047
jpmikkers May 23, 2022
8dee08b
[core] Apply standard formatting to all files
alanmcgovern May 24, 2022
96fa486
[bep52] Fix an off-by-one calculation in end piece index
alanmcgovern May 24, 2022
09345a0
[bep47] Fix padding when file length == piecelength
alanmcgovern May 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -58,6 +58,8 @@ class TorrentFileInfo : ITorrentManagerFile

public long Length => TorrentFile.Length;

public long Padding => TorrentFile.Padding;

public ReadOnlyMemory<byte> PiecesRoot => TorrentFile.PiecesRoot;

public TorrentFileInfo (ITorrentFile torrentFile, string fullPath)
Expand Down
60 changes: 40 additions & 20 deletions src/MonoTorrent.Client/MonoTorrent/Torrent.cs
Expand Up @@ -426,7 +426,7 @@ void ProcessInfo (BEncodedDictionary dictionary, ref PieceHashesV1? hashesV1)
case ("files"):
// This is the list of files using the v1 torrent format.
// Only load if we have not processed filesv2
if (Files.Count == 0)
// if (Files.Count == 0) // TODO JMIK: merge v1 hashes info with v2 ??
jpmikkers marked this conversation as resolved.
Show resolved Hide resolved
Files = LoadTorrentFilesV1 ((BEncodedList) keypair.Value, PieceLength);
break;

Expand Down Expand Up @@ -468,7 +468,7 @@ void ProcessInfo (BEncodedDictionary dictionary, ref PieceHashesV1? hashesV1)
Size = length;
string path = Name;
int endPiece = Math.Min (hashesV1!.Count - 1, (int) ((Size + (PieceLength - 1)) / PieceLength));
Files = Array.AsReadOnly<ITorrentFile> (new[] { new TorrentFile (path, length, 0, endPiece, 0) });
Files = Array.AsReadOnly<ITorrentFile> (new[] { new TorrentFile (path, length, 0, endPiece, 0, TorrentFileAttributes.None, 0) });
}
}

Expand Down Expand Up @@ -616,30 +616,49 @@ void LoadInternal (BEncodedDictionary torrentInformation, RawInfoHashes infoHash
PieceHashes = new PieceHashes (InfoHashes.V1 is null ? null : hashesV1, InfoHashes.V2 is null ? null : hashesV2);
}

static TorrentFileAttributes AttrStringToAttributesEnum (string attr)
{
var result = TorrentFileAttributes.None;

if (attr.Contains ("l"))
result |= TorrentFileAttributes.Symlink;

if (attr.Contains ("x"))
result |= TorrentFileAttributes.Executable;

if (attr.Contains ("h"))
result |= TorrentFileAttributes.Hidden;

if (attr.Contains ("p"))
result |= TorrentFileAttributes.Padding;

return result;
}

static IList<ITorrentFile> LoadTorrentFilesV1 (BEncodedList list, int pieceLength)
{
var sb = new StringBuilder (32);

var files = new List<(string path, long length, ReadOnlyMemory<byte> md5sum, ReadOnlyMemory<byte> ed2k, ReadOnlyMemory<byte> sha1)> ();
var files = new List<TorrentFileTuple> ();
foreach (BEncodedDictionary dict in list) {
long length = 0;
string? path = null;
ReadOnlyMemory<byte> md5sum = default;
ReadOnlyMemory<byte> ed2k = default;
ReadOnlyMemory<byte> sha1 = default;
var tup = new TorrentFileTuple ();

foreach (KeyValuePair<BEncodedString, BEncodedValue> keypair in dict) {
switch (keypair.Key.Text) {
case ("attr"):
tup.attributes = AttrStringToAttributesEnum(keypair.Value.ToString());
break;

case ("sha1"):
sha1 = ((BEncodedString) keypair.Value).AsMemory ();
tup.sha1 = ((BEncodedString) keypair.Value).AsMemory ();
break;

case ("ed2k"):
ed2k = ((BEncodedString) keypair.Value).AsMemory ();
tup.ed2k = ((BEncodedString) keypair.Value).AsMemory ();
break;

case ("length"):
length = long.Parse (keypair.Value.ToString ()!);
tup.length = long.Parse (keypair.Value.ToString ()!);
break;

case ("path.utf-8"):
Expand All @@ -649,36 +668,37 @@ static IList<ITorrentFile> LoadTorrentFilesV1 (BEncodedList list, int pieceLengt
sb.Append (Path.DirectorySeparatorChar);
}
}
path = sb.ToString (0, sb.Length - 1);
tup.path = sb.ToString (0, sb.Length - 1);
sb.Remove (0, sb.Length);
break;

case ("path"):
if (string.IsNullOrEmpty (path)) {
if (string.IsNullOrEmpty (tup.path)) {
foreach (BEncodedString str in ((BEncodedList) keypair.Value)) {
if (!BEncodedString.IsNullOrEmpty (str)) {
sb.Append (str.Text);
sb.Append (Path.DirectorySeparatorChar);
}
}
path = sb.ToString (0, sb.Length - 1);
tup.path = sb.ToString (0, sb.Length - 1);
sb.Remove (0, sb.Length);
}
break;

case ("md5sum"):
md5sum = ((BEncodedString) keypair.Value).AsMemory ();
tup.md5sum = ((BEncodedString) keypair.Value).AsMemory ();
break;

default:
break; //FIXME: Log unknown values
}
}
if (path == null)
if (tup.path == null)
// FIXME: Log invalid paths somewhere?
continue;

PathValidator.Validate (path);
files.Add ((path, length, md5sum, ed2k, sha1));
PathValidator.Validate (tup.path);
files.Add(tup);
}

return Array.AsReadOnly<ITorrentFile> (TorrentFile.Create (pieceLength, files.ToArray ()));
Expand Down Expand Up @@ -717,12 +737,12 @@ static void LoadTorrentFilesV2 (string key, BEncodedDictionary data, List<ITorre
if (key == "") {
var length = ((BEncodedNumber) data["length"]).Number;
if (length == 0) {
files.Add (new TorrentFile (path, length, 0, 0, 0));
files.Add (new TorrentFile (path, length, 0, 0, 0, TorrentFileAttributes.None, 0));
} else {
totalPieces++;
var offsetInTorrent = (files.LastOrDefault ()?.OffsetInTorrent ?? 0) + (files.LastOrDefault ()?.Length ?? 0);
var piecesRoot = data.TryGetValue ("pieces root", out var value) ? ((BEncodedString) value).AsMemory () : ReadOnlyMemory<byte>.Empty;
files.Add (new TorrentFile (path, length, totalPieces, totalPieces + (int) (length / pieceLength), offsetInTorrent, piecesRoot));
files.Add (new TorrentFile (path, length, totalPieces, totalPieces + (int) (length / pieceLength), offsetInTorrent, piecesRoot, TorrentFileAttributes.None, 0));
totalPieces = files.Last ().EndPieceIndex;
}
} else {
Expand Down
72 changes: 56 additions & 16 deletions src/MonoTorrent.Client/MonoTorrent/TorrentCreator.cs
Expand Up @@ -55,6 +55,7 @@ internal class InputFile : ITorrentManagerFile
public byte[]? MD5 { get; set; }
public SemaphoreSlim Locker { get; } = new SemaphoreSlim (1, 1);
public long Length { get; set; }
public long Padding { get; set; } = 0;
public ReadOnlyMemory<byte> PiecesRoot { get; }

internal InputFile (string path, long length)
Expand Down Expand Up @@ -122,6 +123,7 @@ public static int RecommendedPieceSize (IEnumerable<FileMapping> files)

public List<string> GetrightHttpSeeds { get; }
public bool StoreMD5 { get; set; }
public bool UsePadding { get; set; } = false;

internal TimeSpan ReadAllData_DequeueBufferTime;
internal TimeSpan ReadAllData_EnqueueFilledBufferTime;
Expand Down Expand Up @@ -232,6 +234,11 @@ internal async Task<BEncodedDictionary> CreateAsync (string name, List<InputFile
info["name"] = (BEncodedString) name;
AddCommonStuff (torrent);

foreach(var file in files.Take(files.Count-1)) {
file.Padding = (UsePadding && file.Length>0) ? PieceLength - (file.Length % PieceLength) : 0;
}
files.Last ().Padding = 0;

info["pieces"] = (BEncodedString) await CalcPiecesHash (files, token);

if (files.Count == 1 && files[0].Path == name)
Expand Down Expand Up @@ -278,7 +285,7 @@ void AddCommonStuff (BEncodedDictionary torrent)

async Task<byte[]> CalcPiecesHash (List<InputFile> files, CancellationToken token)
{
long totalLength = files.Sum (t => t.Length);
long totalLength = files.Sum (t => (t.Length + t.Padding));
int pieceCount = (int) ((totalLength + PieceLength - 1) / PieceLength);

// If the torrent will not give us at least 8 pieces per thread, try fewer threads. Then just run it
Expand Down Expand Up @@ -310,7 +317,7 @@ async Task<byte[]> CalcPiecesHash (int startPiece, long totalBytesToRead, Synchr
var emptyBuffers = new AsyncProducerConsumerQueue<byte[]> (4);

// Make this buffer one element larger so it can fit the placeholder which indicates a file has been completely read.
var filledBuffers = new AsyncProducerConsumerQueue<(byte[]?, int, InputFile?)> (emptyBuffers.Capacity + 1);
var filledBuffers = new AsyncProducerConsumerQueue<(byte[]?, int, int, InputFile?)> (emptyBuffers.Capacity + 1);

// This is the IPieceWriter which we'll use to get our filestream. Each thread gets it's own writer.
using IPieceWriter writer = Factories.CreatePieceWriter (3);
Expand Down Expand Up @@ -358,41 +365,45 @@ async Task<byte[]> CalcPiecesHash (int startPiece, long totalBytesToRead, Synchr
return await hashAllTask;
}

async Task ReadAllDataAsync (long startOffset, long totalBytesToRead, Synchronizer synchronizer, IList<InputFile> files, IPieceWriter writer, AsyncProducerConsumerQueue<byte[]> emptyBuffers, AsyncProducerConsumerQueue<(byte[]?, int, InputFile?)> filledBuffers, CancellationToken token)
async Task ReadAllDataAsync (long startOffset, long totalBytesToRead, Synchronizer synchronizer, IList<InputFile> files, IPieceWriter writer, AsyncProducerConsumerQueue<byte[]> emptyBuffers, AsyncProducerConsumerQueue<(byte[]?, int, int, InputFile?)> filledBuffers, CancellationToken token)
{
await MainLoop.SwitchToThreadpool ();

await synchronizer.Self.Task;
foreach (var file in files) {
long fileRead = 0;
if (startOffset >= file.Length) {
startOffset -= file.Length;

// skip files that we already hashed
if (startOffset >= (file.Length+file.Padding)) {
startOffset -= (file.Length+file.Padding);
continue;
}

fileRead = startOffset;
startOffset = 0;

while (fileRead < file.Length && totalBytesToRead > 0) {
while (fileRead < (file.Length+file.Padding) && totalBytesToRead > 0) {
var timer = ValueStopwatch.StartNew ();
byte[] buffer = await emptyBuffers.DequeueAsync (token).ConfigureAwait (false);
ReadAllData_DequeueBufferTime += timer.Elapsed;

timer.Restart ();
int toRead = (int) Math.Min (buffer.Length, file.Length - fileRead);
int toRead = (int) Math.Min (buffer.Length, (file.Length+file.Padding) - fileRead);
toRead = (int) Math.Min (totalBytesToRead, toRead);

int read;
// 'read' is the total of file bytes + padding bytes that were read
// 'padding' is only the number of padding bytes that were read
// we need those two so the MD5 hasher can hash files without padding
// FIXME: thread safety
read = await writer.ReadAsync (file, fileRead, new Memory<byte> (buffer, 0, toRead));
(var read, var padding) = await writer.PaddingAwareReadAsyncForCreator (file, fileRead, new Memory<byte> (buffer, 0, toRead));
if (read != toRead)
throw new InvalidOperationException ("The required data could not be read from the file.");
fileRead += read;
totalBytesToRead -= read;
ReadAllData_ReadTime += timer.Elapsed;

timer.Restart ();
await filledBuffers.EnqueueAsync ((buffer, read, file), token);
await filledBuffers.EnqueueAsync ((buffer, read, padding, file), token);
ReadAllData_EnqueueFilledBufferTime += timer.Elapsed;

if (emptyBuffers.Count == 0 && synchronizer.Next != synchronizer.Self) {
Expand All @@ -404,10 +415,10 @@ async Task ReadAllDataAsync (long startOffset, long totalBytesToRead, Synchroniz
ReusableTaskCompletionSource<bool>? next = synchronizer.Next;
synchronizer.Disconnect ();
next!.SetResult (true);
await filledBuffers.EnqueueAsync ((null, 0, null), token);
await filledBuffers.EnqueueAsync ((null, 0, 0, null), token);
}

async Task<byte[]> HashAllDataAsync (long totalBytesToRead, AsyncProducerConsumerQueue<byte[]> emptyBuffers, AsyncProducerConsumerQueue<(byte[]?, int, InputFile?)> filledBuffers, CancellationToken token)
async Task<byte[]> HashAllDataAsync (long totalBytesToRead, AsyncProducerConsumerQueue<byte[]> emptyBuffers, AsyncProducerConsumerQueue<(byte[]?, int, int, InputFile?)> filledBuffers, CancellationToken token)
{
await MainLoop.SwitchToThreadpool ();

Expand All @@ -430,7 +441,7 @@ async Task<byte[]> HashAllDataAsync (long totalBytesToRead, AsyncProducerConsume
long totalRead = 0;
while (true) {
var timer = ValueStopwatch.StartNew ();
(byte[]? buffer, int count, InputFile? file) = await filledBuffers.DequeueAsync (token);
(byte[]? buffer, int count, int padding, InputFile? file) = await filledBuffers.DequeueAsync (token);
Hashing_DequeueFilledTime += timer.Elapsed;

// If the buffer and file are both null then all files have been fully read.
Expand All @@ -450,10 +461,10 @@ async Task<byte[]> HashAllDataAsync (long totalBytesToRead, AsyncProducerConsume
md5Hasher.Initialize ();
}
} else {
fileRead += count;
fileRead += (count - padding);
totalRead += count;

md5Hasher?.TransformBlock (buffer, 0, count, buffer, 0);
md5Hasher?.TransformBlock (buffer, 0, count - padding, buffer, 0);
int bufferRead = 0;

timer.Restart ();
Expand Down Expand Up @@ -487,7 +498,10 @@ async Task<byte[]> HashAllDataAsync (long totalBytesToRead, AsyncProducerConsume
void CreateMultiFileTorrent (BEncodedDictionary dictionary, List<InputFile> mappings)
{
var info = (BEncodedDictionary) dictionary["info"];
List<BEncodedValue> files = mappings.ConvertAll (ToFileInfoDict);
List<BEncodedValue> files = mappings
.Select (ToFileInfoDicts)
.SelectMany (x => x)
.ToList ();
info.Add ("files", new BEncodedList (files));
}

Expand All @@ -499,6 +513,13 @@ void CreateSingleFileTorrent (BEncodedDictionary dictionary, IList<InputFile> ma
infoDict["md5sum"] = (BEncodedString) mappings[0].MD5!;
}

// converts InputFile into one BEncodedDictionary when there's no padding, or two BEncodedDictionaries when there is.
static BEncodedValue[] ToFileInfoDicts (InputFile file)
{
return (file.Padding > 0) ?
new[] { ToFileInfoDict (file), ToPaddingFileInfoDict (file) } : new[] { ToFileInfoDict (file) };
}

static BEncodedValue ToFileInfoDict (InputFile file)
{
var fileDict = new BEncodedDictionary ();
Expand All @@ -516,6 +537,25 @@ static BEncodedValue ToFileInfoDict (InputFile file)
return fileDict;
}

static BEncodedValue ToPaddingFileInfoDict (InputFile file)
{
var fileDict = new BEncodedDictionary ();

var filePath = new BEncodedList ();
filePath.Add (new BEncodedString (".pad"));
filePath.Add (new BEncodedString ($"{file.Padding}"));

fileDict["length"] = new BEncodedNumber (file.Padding);
fileDict["path"] = filePath;

// TODO JMIK: do padding files have MD5 hashes? It's easy to add an all-zero MD5 hash calculation here
//if (file.MD5 != null)
// fileDict["md5sum"] = (BEncodedString) file.MD5;
jpmikkers marked this conversation as resolved.
Show resolved Hide resolved

fileDict["attr"] = (BEncodedString) "p";
return fileDict;
}

static void Validate (List<FileMapping> maps)
{
foreach (FileMapping map in maps)
Expand Down