<a href="https://colab.research.google.com/github/VEHEMENT2003/Python_projects/blob/main/file_scanner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()  # This will prompt you to upload the files


Saving strings.txt to strings.txt
Saving hashes.txt to hashes.txt


In [None]:
import sys
import subprocess
import os
from os import listdir
import argparse
from datetime import datetime

# To filter out unwanted arguments injected by the Colab environment
sys.argv = [arg for arg in sys.argv if not arg.startswith("-f")]

# Directory walk to identify files
def getFiles(directory, stringfile):
    files = []
    for path, subdirs, localfiles in os.walk(directory):
        for filename in localfiles:
            f = os.path.join(path, filename)
            files.append(f)
    files = list(set(files))

    deletefiles = [string for string in files if stringfile in string or "scanner.py" in string]
    for removefile in deletefiles:
        files.remove(removefile)

    return files

# Read malicious strings from file
def getStrings(stringfile):
    with open(stringfile, 'r') as sfile:
        strings = sfile.readlines()
    strings = [string.lower() for string in strings]
    strings = [string.rstrip() for string in strings]
    strings = list(set(strings))
    return strings

# Read malicious hashes from file
def getHashes(hashfile):
    with open(hashfile) as hfile:
        hashcontent = hfile.readlines()

    hashes = []
    for line in hashcontent:
        hashvalue = line.split(None, 1)[0]
        hashvalue = hashvalue.lower()
        hashes.append(hashvalue)
    hashes = list(set(hashes))

    return hashes

# Check for malicious strings
def checkStrings(strings, files):
    print("Scanning files against strings")
    badFiles = []

    for f in files:
        print("Scanning:", f)
        with open(f, 'r', encoding='utf-8', errors='ignore') as singlefile:
            filecontent = singlefile.readlines()
        filecontent = [line.lower() for line in filecontent]
        for s in strings:
            s = s.rstrip()
            if any(s in string for string in filecontent):
                print("\tMALICIOUS STRING DETECTED -", s)
                badFiles.append(f)
    print("")
    return badFiles

# Check for malicious hashes
def checkHash(hashes, files):
    print("Scanning files against hashes")
    badFiles = []

    for f in files:
        print("Scanning:", f)

        # Calculate MD5 hash
        command = "md5sum " + f + " | awk '{ print $1 }'"
        fileHash = subprocess.check_output(command, shell=True).decode('utf-8').rstrip()

        for h in hashes:
            if h == fileHash:
                print("\tMALICIOUS HASH DETECTED -", h)
                badFiles.append(f)
    print("")
    return badFiles

def main():
    startTime = datetime.now()
    print("Malware scanner\n")

    parser = argparse.ArgumentParser(description="Scan for malicious files")
    parser.add_argument("stringfile", type=str, help="File containing a list of malicious strings from known malicious files")
    parser.add_argument("-H", "--hashfile", type=str, help="File containing a list of MD5 hashes from known malicious files")
    parser.add_argument("-D", "--directory", type=str, help="The directory to scan; the default scan directory is the local directory", default=os.getcwd())
    args = parser.parse_args()

    stringfile = args.stringfile
    hashfile = args.hashfile
    directory = args.directory

    files = getFiles(directory, stringfile)
    print("Files identified:", len(files))
    print("-----")
    for f in files:
        print(f)
    print("")

    strings = getStrings(stringfile)
    print("Malicious strings loaded:", len(strings))
    print("-----")
    for s in strings:
        print(s)
    print("")

    badStringFiles = checkStrings(strings, files)

    if args.hashfile is not None:
        hashes = getHashes(hashfile)
        print("Malicious hashes loaded:", len(hashes))
        print("-----")
        for line in hashes:
            print(line)
        print("")

        badHashFiles = checkHash(hashes, files)

    if args.hashfile is not None:
        badFiles = badStringFiles + badHashFiles
    else:
        badFiles = badStringFiles
    badFiles = list(set(badFiles))
    print("Malicious files identified:", len(badFiles))
    print("-----")
    if len(badFiles) == 0:
        print("No malware found")
    for f in badFiles:
        print(f)
    print("")

    print("Time taken to run scan:", datetime.now() - startTime)

if __name__ == "__main__":
    main()


Malware scanner

Files identified: 23
-----
/content/.config/logs/2024.10.21/13.22.04.157217.log
/content/.config/configurations/config_default
/content/.config/logs/2024.10.21/13.21.33.039045.log
/content/.config/.last_opt_in_prompt.yaml
/content/hashes.txt
/content/.config/.last_survey_prompt.yaml
/content/.config/logs/2024.10.21/13.22.15.226603.log
/content/.config/logs/2024.10.21/13.22.15.869361.log
/content/sample_data/README.md
/content/sample_data/anscombe.json
/content/.config/gce
/content/.config/default_configs.db
/content/.config/.last_update_check.json
/content/sample_data/california_housing_test.csv
/content/sample_data/mnist_test.csv
/content/.config/logs/2024.10.21/13.21.53.990169.log
/content/strings.txt
/content/.config/active_config
/content/sample_data/california_housing_train.csv
/content/.config/config_sentinel
/content/sample_data/mnist_train_small.csv
/content/.config/hidden_gcloud_config_universe_descriptor_data_cache_configs.db
/content/.config/logs/2024.10.21/