In [2]:
# Install dependencies
!pip install python-magic python-docx pdfplumber requests

import os
import magic
import zipfile
import pdfplumber
import docx
import requests
from IPython.display import display
from google.colab import files

# OpenRouter API key here
API_KEY = "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"

# Upload file
uploaded = files.upload()
filename = list(uploaded.keys())[0]
print(f"Uploaded: {filename}")

# Detect FILE type
mime = magic.Magic(mime=True)
file_type = mime.from_file(filename)
print(f"Detected file type: {file_type}")

# -------- File Extractor Functions --------
def extract_pdf(file_path):
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""
    return text

def extract_docx(file_path):
    doc = docx.Document(file_path)
    return "\n".join([p.text for p in doc.paragraphs])

def extract_zip(file_path):
    text = ""
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall("temp_zip")
        for name in zip_ref.namelist():
            full_path = os.path.join("temp_zip", name)
            if os.path.isfile(full_path):
                text += extract_text_from_file(full_path)
    return text

def extract_strings_binary(file_path):
    with open(file_path, 'rb') as f:
        result = ""
        current = b""
        while byte := f.read(1):
            if 32 <= ord(byte) <= 126:
                current += byte
            else:
                if len(current) >= 4:
                    result += current.decode('utf-8', errors='ignore') + "\n"
                current = b""
    return result

def extract_text_from_file(file_path):
    mime_type = magic.Magic(mime=True).from_file(file_path)
    if mime_type == "application/pdf":
        return extract_pdf(file_path)
    elif mime_type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
        return extract_docx(file_path)
    elif mime_type == "application/zip":
        return extract_zip(file_path)
    elif "application/x-dosexec" in mime_type or "octet-stream" in mime_type:
        return extract_strings_binary(file_path)
    elif mime_type.startswith("text"):
        with open(file_path, 'r', errors='ignore') as f:
            return f.read()
    else:
        return f"[Binary or unknown file type: {mime_type}]"

# -------- LLM Scan Prompt --------
def scan_with_openrouter(model_id, text):
    prompt = f"""
You are a Antivirus software. Analyze the following file contents text and determine if there are any  malicious code, malware,miner,hacker code present in the textual content. only flag malicious when the condition is very  critical
also "This file may contain a mix of safe and unsafe code. Identify even small sections that may be insecure, even if most of the code appears normal."
Give answer in one work only: "Malicious" or "Safe",followed by a very very short explanation.

Content:
{text[:4000]}
"""

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "HTTP-Referer": "https://yourdomain.com",
        "X-Title": "Malware Detector Prototype",
        "Content-Type": "application/json"
    }

    payload = {
        "model": model_id,
        "messages": [{"role": "user", "content": prompt}],
    }

    response = requests.post("https://openrouter.ai/api/v1/chat/completions", json=payload, headers=headers)

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        return f"Error: {response.status_code} - {response.text}"

# -------- Call Each Model Independently --------
try:
    content = extract_text_from_file(filename)
    print("Extracted content (truncated):")
    print(content[:500])

    # Define model names (OpenRouter IDs)
    models = {
        "Meta: Llama 3.3 8B Instruct  ": "meta-llama/llama-3.3-8b-instruct:free",
        "Meta: Llama 4 Scout": "meta-llama/llama-4-scout:free",
        "Meta: Llama 4 Maverick": "meta-llama/llama-4-maverick:free"
    }
#--------------- Giving the decision ----------
    print("\n=== Scan Results ===")
    for model_name, model_id in models.items():
        print(f"\n🔍 Model: {model_name}")
        verdict = scan_with_openrouter(model_id, content)
        print(verdict)

except Exception as e:
    print(f"Error: {e}")


Collecting python-magic
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)
Downloading python_docx-1.1.2-py3-no

Saving 3ba should increase.txt to 3ba should increase.txt
Uploaded: 3ba should increase.txt
Detected file type: text/plain
Extracted content (truncated):
3ba should increase 
TW.

=== Scan Results ===

🔍 Model: Meta: Llama 3.3 8B Instruct  
Safe, appears to be a cryptic or nonsensical text with no clear malicious intent.

🔍 Model: Meta: Llama 4 Scout
Safe 

Explanation: The given content seems to be a fragment of a configuration or a simple text and does not appear to contain any obvious malicious code or commands that are typically associated with malware, miners, or hacker activities.

🔍 Model: Meta: Llama 4 Maverick
Safe. The content appears to be a partial sentence or phrase without any executable code.
