# Let's verify the downloaded files not broken

* I'll use MD5 and SHA1 hasusum to verify downloaded files and Kaggle Notebook's files are same.
* To use, you run this notebook in your local-machine, and check df_output.csv's hashsume are same in this notebook.
* Check your dataset's filepath in your local-machine. **Please check the 4th cell.**

## Calculate hashsum

In [None]:
import hashlib
import os
import pathlib
import platform
import sys
import warnings

import pandas as pd

from tqdm import tqdm

In [None]:
# Filter warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

In [None]:
# Get working directory
try:
    path_working_dir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
except:
    path_working_dir = os.path.abspath(str(pathlib.Path().resolve()))

### Change the next cell in your local-machine

In [None]:
# Set input directory
path_input  = '../input/rsna-intracranial-hemorrhage-detection'
path_output = './'
assert os.path.exists(path_input) == True

In [None]:
# Calc hashsum of files with selected file extension
list_output = []
list_ext    = ['.csv', '.dcm']
for (dirpath, dirnames, filenames) in os.walk(path_input):
    print(dirpath.replace(path_input, '.'), file=sys.stderr)
    for filename in tqdm(filenames):
        if max([filename.find(ext) for ext in list_ext]) > -1:
            with open(os.path.join(dirpath, filename), 'rb') as fp:
                fp_read = fp.read()
                list_output.append([os.path.join(dirpath.replace(path_input, '.'), filename), hashlib.md5(fp_read).hexdigest(), hashlib.sha1(fp_read).hexdigest()])

In [None]:
# Output result
df_output = pd.DataFrame(list_output, columns=['filename', 'md5', 'sha1'])
df_output = df_output.sort_values(by='filename')
df_output.to_csv(os.path.join(path_output, 'df_output.csv'), index=False)

In [None]:
df_output.head()

## Check df_output.csv's hashsum
* `md5sum` and `sha1sum` commands can use in Ubuntu
* `md5` and `openssl sha1` commands can use in OS X
* `certutil -hashfile df_output.csv MD5` and `certutil -hashfile df_output.csv SHA1` commands can use in Windows 7-10

In [None]:
!md5sum df_output.csv

In [None]:
!sha1sum df_output.csv

## Let's enjoy this competition! 🎉