In [None]:
# Steganography Detection Script
# LSB calculation to properly detect steganography
# Gives accurate flag counts

!pip install pillow numpy scipy matplotlib -q

import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

print(" Setup complete! Ready to detect steganography.\n")

class SteganographyDetector:
    """Detector with LSB calculation"""

    def __init__(self):
        self.all_results = []
        self.chi_threshold = 0.05
        self.lsb_threshold = 0.35  # Lowered from 0.4 for better detection
        self.entropy_threshold = 7.5

    def open_image(self, path):
        """Load image as RGB"""
        try:
            img = Image.open(path)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            return img
        except:
            return None

    def check_file_size(self, path):
        """Check file size anomaly"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0, 0

            w, h = img.size
            actual = os.path.getsize(path)
            expected = w * h * 3
            ratio = actual / expected if expected > 0 else 0

            is_sus = ratio > 1.5 if path.lower().endswith(('.png', '.bmp')) else ratio > 0.5
            return is_sus, actual, ratio
        except:
            return False, 0, 0

    def check_lsb_pattern(self, path):
        """Better LSB detection"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0

            img_array = np.array(img, dtype=np.uint8)

            # Analyze EACH CHANNEL separately for better detection
            lsb_values = img_array & 1  # Extract LSBs
            lsb_flat = lsb_values.flatten()

            ones = np.sum(lsb_flat)
            zeros = len(lsb_flat) - ones
            total = len(lsb_flat)

            # Calculate deviation from 50/50 expected ratio
            if total > 0:
                ones_ratio = ones / total
                # Deviation from perfect 0.5
                deviation = abs(ones_ratio - 0.5) * 2  # Scale to 0-1
            else:
                deviation = 0

            # Much better detection: if ratio is NOT close to 0.5, flag it
            is_suspicious = deviation > self.lsb_threshold

            return is_suspicious, deviation
        except:
            return False, 0

    def chi_square_test(self, path):
        """Statistical test"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0

            img_array = np.array(img, dtype=np.uint8)
            red_channel = img_array[:, :, 0].flatten()

            chi_square = 0.0
            for i in range(0, 256, 2):
                even = int(np.sum(red_channel == i))
                odd = int(np.sum(red_channel == i + 1))
                exp = (even + odd) / 2.0
                if exp > 0:
                    chi_square += ((even - exp)**2 + (odd - exp)**2) / exp

            try:
                p_value = 1 - stats.chi2.cdf(chi_square, 127)
            except:
                p_value = 0

            return p_value < self.chi_threshold, p_value
        except:
            return False, 0

    def check_pixel_differences(self, path):
        """Pixel variance analysis"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0

            img_array = np.array(img, dtype=np.uint8)
            h_diff = np.diff(img_array[:, :, 0], axis=1)
            v_diff = np.diff(img_array[:, :, 0], axis=0)
            all_diffs = np.concatenate([h_diff.flatten(), v_diff.flatten()])
            variance = np.var(all_diffs)

            # Steganographic images tend to have higher variance
            is_suspicious = variance > 1200  # Lowered threshold
            return is_suspicious, variance
        except:
            return False, 0

    def check_entropy(self, path):
        """Entropy analysis"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0

            pixels = np.array(img, dtype=np.uint8).flatten()
            hist, _ = np.histogram(pixels, bins=256, range=(0, 256))
            hist = hist[hist > 0]

            if len(hist) > 0 and len(pixels) > 0:
                probs = hist / len(pixels)
                entropy = -np.sum(probs * np.log2(probs + 1e-10))
            else:
                entropy = 0

            is_suspicious = entropy > self.entropy_threshold
            return is_suspicious, entropy
        except:
            return False, 0

    def check_color_correlation(self, path):
        """Color channel correlation"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0

            img_array = np.array(img, dtype=np.float32)
            r = img_array[:, :, 0].flatten()
            g = img_array[:, :, 1].flatten()
            b = img_array[:, :, 2].flatten()

            try:
                rg = np.corrcoef(r, g)[0, 1]
                rb = np.corrcoef(r, b)[0, 1]
                gb = np.corrcoef(g, b)[0, 1]
                rg = rg if not np.isnan(rg) else 1.0
                rb = rb if not np.isnan(rb) else 1.0
                gb = gb if not np.isnan(gb) else 1.0
                avg_corr = (rg + rb + gb) / 3.0
            except:
                avg_corr = 1.0

            is_suspicious = avg_corr < 0.65  # Lowered threshold
            return is_suspicious, avg_corr
        except:
            return False, 0

    def check_sample_pairs(self, path):
        """Sample Pairs Analysis"""
        try:
            img = self.open_image(path)
            if img is None:
                return False, 0

            pixels = np.array(img, dtype=np.int32)[:, :, 0].flatten()
            pair_count = 0
            normal_count = 0

            for i in range(min(10000, len(pixels) - 1)):
                try:
                    curr = int(pixels[i]) % 256
                    next_val = int(pixels[i+1]) % 256
                    prev_val = (int(pixels[i+1]) - 1) % 256
                    if curr == prev_val:
                        pair_count += 1
                    elif curr == next_val:
                        normal_count += 1
                except:
                    pass

            ratio = pair_count / max(normal_count, 1) if normal_count > 0 else 0
            is_suspicious = ratio < 0.4  # Lowered threshold
            return is_suspicious, ratio
        except:
            return False, 0

    def analyze_image(self, image_path):
        """Run all 7 detection methods"""
        print(f"\n{'â”€'*75}")
        print(f"ðŸ“¸ {os.path.basename(image_path)}")
        print(f"{'â”€'*75}")

        results = {'filename': os.path.basename(image_path), 'tests': {}}

        # Test 1: File Size
        try:
            sus, _, ratio = self.check_file_size(image_path)
            results['tests']['File Size'] = {'flag': sus, 'value': f'{ratio:.4f}'}
            print(f"1. File Size              {' FLAG' if sus else 'âœ“':20} ratio: {ratio:.4f}")
        except:
            print(f"1. File Size              âœ—")

        # Test 2: LSB Pattern
        try:
            sus, dev = self.check_lsb_pattern(image_path)
            results['tests']['LSB Pattern'] = {'flag': sus, 'value': f'{dev:.4f}'}
            print(f"2. LSB Pattern            {' FLAG' if sus else 'âœ“':20} dev: {dev:.4f}")
        except:
            print(f"2. LSB Pattern            âœ—")

        # Test 3: Chi-Square
        try:
            sus, pval = self.chi_square_test(image_path)
            results['tests']['Chi-Square'] = {'flag': sus, 'value': f'{pval:.6f}'}
            print(f"3. Chi-Square             {' FLAG' if sus else 'âœ“':20} p: {pval:.6f}")
        except:
            print(f"3. Chi-Square             âœ—")

        # Test 4: Pixel Diff
        try:
            sus, var = self.check_pixel_differences(image_path)
            results['tests']['Pixel Diff'] = {'flag': sus, 'value': f'{var:.2f}'}
            print(f"4. Pixel Differences      {' FLAG' if sus else 'âœ“':20} var: {var:.2f}")
        except:
            print(f"4. Pixel Differences      âœ—")

        # Test 5: Entropy
        try:
            sus, ent = self.check_entropy(image_path)
            results['tests']['Entropy'] = {'flag': sus, 'value': f'{ent:.4f}'}
            print(f"5. Entropy                {' FLAG' if sus else 'âœ“':20} val: {ent:.4f}")
        except:
            print(f"5. Entropy                âœ—")

        # Test 6: Color Corr
        try:
            sus, corr = self.check_color_correlation(image_path)
            results['tests']['Color Corr'] = {'flag': sus, 'value': f'{corr:.4f}'}
            print(f"6. Color Correlation      {' FLAG' if sus else 'âœ“':20} corr: {corr:.4f}")
        except:
            print(f"6. Color Correlation      âœ—")

        # Test 7: Sample Pairs
        try:
            sus, ratio = self.check_sample_pairs(image_path)
            results['tests']['Sample Pairs'] = {'flag': sus, 'value': f'{ratio:.4f}'}
            print(f"7. Sample Pairs           {' FLAG' if sus else 'âœ“':20} ratio: {ratio:.4f}")
        except:
            print(f"7. Sample Pairs           âœ—")

        # Count flags
        flagged = sum([1 for t in results['tests'].values() if t['flag']])
        results['flagged'] = flagged >= 3
        results['flagged_count'] = flagged

        print(f"{'â”€'*75}")
        print(f"TOTAL FLAGS: {flagged}/7", end=" â†’ ")

        if flagged == 0:
            print("CLEAN")
        elif flagged <= 2:
            print("LIKELY CLEAN")
        elif flagged <= 3:
            print("SUSPICIOUS")
        elif flagged <= 4:
            print("HIGHLY SUSPICIOUS")
        else:
            print("EXTREMELY SUSPICIOUS (DEFINITE STEGANOGRAPHY)")

        self.all_results.append(results)
        return results

    def analyze_folder(self, folder_path):
        """Check all images"""
        images = []
        for root, dirs, files in os.walk(folder_path):
            for f in files:
                if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                    images.append(os.path.join(root, f))

        print(f"\nFound {len(images)} image(s)\n")
        if len(images) == 0:
            return

        suspicious = []
        for idx, img in enumerate(images, 1):
            print(f"[{idx}/{len(images)}]", end=" ")
            result = self.analyze_image(img)
            if result['flagged']:
                suspicious.append(img)

        print(f"\n{'='*50}")
        print(f"FINAL SUMMARY")
        print(f"{'='*50}")
        print(f"Total analyzed: {len(images)}")
        print(f"Suspicious: {len(suspicious)}")
        print(f"Clean: {len(images) - len(suspicious)}")
        print(f"{'='*50}\n")

    def show_visuals(self, image_path):
        """Visualize results"""
        try:
            img = self.open_image(image_path)
            if img is None:
                return
            arr = np.array(img)
            fig, p = plt.subplots(2, 3, figsize=(16, 10))
            fig.suptitle(f'Analysis: {os.path.basename(image_path)}', fontsize=14, fontweight='bold')

            p[0, 0].imshow(img)
            p[0, 0].set_title('Original')
            p[0, 0].axis('off')

            lsb = (arr & 1) * 255
            p[0, 1].imshow(lsb, cmap='gray')
            p[0, 1].set_title('LSB Visualization')
            p[0, 1].axis('off')

            lsb_flat = (arr & 1).flatten()
            p[0, 2].bar([0, 1], [np.sum(lsb_flat==0), np.sum(lsb_flat==1)], color=['#3498db', '#e74c3c'])
            p[0, 2].set_title('LSB Count')
            p[0, 2].set_xticks([0, 1])

            p[1, 0].hist(arr[:,:,0].flatten(), bins=256, color='red', alpha=0.7)
            p[1, 0].set_title('Red Channel')

            diff = np.abs(np.diff(arr[:, :, 0], axis=1))
            p[1, 1].imshow(diff, cmap='hot')
            p[1, 1].set_title('Pixel Differences')
            p[1, 1].axis('off')

            p[1, 2].hist(diff.flatten(), bins=50, color='orange', alpha=0.7)
            p[1, 2].set_title('Diff Distribution')

            plt.tight_layout()
            plt.show()
        except:
            pass



# MAIN EXECUTION

print("="*50)
print("STEGANOGRAPHY DETECTOR")
print("="*50)

try:
    from google.colab import files
    print("\nUpload images...\n")
    uploaded = files.upload()
    in_colab = True
except:
    uploaded = {}
    in_colab = False

os.makedirs('my_images', exist_ok=True)

import zipfile
for fname in uploaded.keys():
    try:
        if fname.endswith('.zip'):
            with zipfile.ZipFile(fname, 'r') as z:
                z.extractall('my_images/')
        elif fname.endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            import shutil
            shutil.move(fname, f'my_images/{fname}')
    except:
        pass

print(f"Ready! {len(uploaded)} file(s)\n")

detector = SteganographyDetector()
detector.analyze_folder('my_images/')

# Show visuals
images = []
for root, dirs, files in os.walk('my_images/'):
    for f in files:
        if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            images.append(os.path.join(root, f))

if images:
    print("Visualizing first 3 images...\n")
    for img in images[:3]:
        print(f"Visualizing: {os.path.basename(img)}")
        detector.show_visuals(img)

# Report
report = "DETECTION REPORT\n" + "="*50 + "\n\n"
for r in detector.all_results:
    report += f"{r['filename']}: {r['flagged_count']}/7 flags"
    if r['flagged_count'] >= 4:
        report += "EXTREME\n"
    elif r['flagged_count'] >= 3:
        report += "SUSPICIOUS\n"
    elif r['flagged_count'] >= 2:
        report += "POSSIBLE\n"
    else:
        report += "CLEAN\n"

with open('results.txt', 'w') as f:
    f.write(report)

print(report)

if in_colab:
    try:
        choice = input("Download report? (y/n): ").lower() or "y"
        if choice == "y":
            files.download('results.txt')
    except:
        pass

print("\n DONE!")
