Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect binary file by NULL byte #219

Merged
merged 2 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/Gitonomy/Git/Blob.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
*/
class Blob
{
/**
* @var int Size that git uses to look for NULL byte: https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193
*/
private const FIRST_FEW_BYTES = 8000;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a little comment that point to git source code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the comment. Is it clear enough?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perfect. Thanks


/**
* @var Repository
*/
Expand All @@ -39,6 +44,11 @@ class Blob
*/
protected $mimetype;

/**
* @var bool
*/
protected $text;

/**
* @param Repository $repository Repository where the blob is located
* @param string $hash Hash of the blob
Expand Down Expand Up @@ -89,6 +99,9 @@ public function getMimetype()
/**
* Determines if file is binary.
*
* Uses the same check that git uses to determine if a file is binary or not
* https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193
*
* @return bool
*/
public function isBinary()
Expand All @@ -99,10 +112,17 @@ public function isBinary()
/**
* Determines if file is text.
*
* Uses the same check that git uses to determine if a file is binary or not
* https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193
*
* @return bool
*/
public function isText()
{
return (bool) preg_match('#^text/|^application/xml#', $this->getMimetype());
if (null === $this->text) {
$this->text = !str_contains(substr($this->getContent(), 0, self::FIRST_FEW_BYTES), chr(0));
}

return $this->text;
}
}
17 changes: 13 additions & 4 deletions tests/Gitonomy/Git/Tests/BlobTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ public function getReadmeBlob($repository)
return $repository->getCommit(self::LONGFILE_COMMIT)->getTree()->resolvePath('README.md');
}

public function getImageBlob($repository)
{
return $repository->getCommit(self::LONGFILE_COMMIT)->getTree()->resolvePath('image.jpg');
}

/**
* @dataProvider provideFoobar
*/
Expand Down Expand Up @@ -67,16 +72,20 @@ public function testGetMimetype($repository)
*/
public function testIsText($repository)
{
$blob = $this->getReadmeBlob($repository);
$this->assertTrue($blob->isText());
$readmeBlob = $this->getReadmeBlob($repository);
$this->assertTrue($readmeBlob->isText());
$imageBlob = $this->getImageBlob($repository);
$this->assertFalse($imageBlob->isText());
}

/**
* @dataProvider provideFoobar
*/
public function testIsBinary($repository)
{
$blob = $this->getReadmeBlob($repository);
$this->assertFalse($blob->isBinary());
$readmeBlob = $this->getReadmeBlob($repository);
$this->assertFalse($readmeBlob->isBinary());
$imageBlob = $this->getImageBlob($repository);
$this->assertTrue($imageBlob->isBinary());
}
}
Loading