# Git2Hash
For a local git repo, extract the hashes and a synthetic VirusTotal link for all files in the commit history of the master branch.

The purpose of this is to extract information about files that have been deleted from the current master branch on GitHub.

This requires these modules in addition to pandas:

- gitpython
- filemagic

In [87]:
from git import Repo
import magic
import hashlib
import pandas as pd

In [95]:
path = '/tmp/sample'

In [96]:
def linkify_vt(val):
    return '<a target="_blank" href="{}">virustotal</a>'.format(val)

def git2hash(path):
    r = Repo(path)
    file_objects = {}
    for t in r.iter_trees():
        for blob in t.blobs:
            obj = blob.data_stream.read()
            with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
                mime_type = m.id_buffer(obj[:4194304])
            sha256 = hashlib.sha256(obj).hexdigest()
            sha1 = hashlib.sha1(obj).hexdigest()
            md5 = hashlib.md5(obj).hexdigest()
            file_objects[sha256] = {'sha256':sha256,
                                    'name':blob.name,
                                    'size':blob.size,
                                    'md5':md5,
                                    'sha1':sha1,
                                    'mime_type':mime_type,
                                    'vt':'https://www.virustotal.com/gui/file/{}/detection'.format(sha256)}
    df = pd.DataFrame(list(file_objects.values()),
                      columns=['name','mime_type','sha256','vt','size','md5','sha1'])

    
    return df.style.format({'vt': linkify_vt})

In [97]:
df = git2hash(path)

In [98]:
df

Unnamed: 0,name,mime_type,sha256,vt,size,md5,sha1
0,..exe,application/x-dosexec,c4eada327d83caebe0929b3aa638db533a2d30c4ef15a3dc4f445245dfd53797,virustotal,156104,a6763ae35acd41ec0f50bdfcc559d83b,7ad583aa228ab1cc01af4d69b8a1256d3ffbef23
1,S258745.cmd,text/x-msdos-batch,9635850e30112bd8427ece8a738bf8e93b37ebcd1c48c4ce501fa3924cdb8742,virustotal,1293,14791338aa4495f24044c3cecadaca50,565c1d84cc1091a2105f00d98d5a8018d27c7934
2,README.md,text/plain,a0c81baa03e82fbab08f1258c767f44fc26adc7f6833cb0c352668f1d7a70c74,virustotal,12,1edbbc0a486566abbff55e1da8218309,82be576aa21709abcb9994f238282bcfbd53cdb0
3,LICENSE,text/plain,1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5,virustotal,16725,9741c346eef56131163e13b9db1241b3,d22157abc0fc0b4ae96380c09528e23cf77290a9
4,.gitignore,text/plain,3ba2020da55f76620d61950cb88f0d6cc0d5852eae7b2c03f8487a64c87da959,virustotal,270,2b61ce16bc0d8f33d40ce4c8bc5e90cf,99d352abda5528933a0b16e017c9ef1c2a275951
