<a href="https://colab.research.google.com/github/anwitarajendra/log-scanner-using-dfa/blob/main/DFA_log_scanner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:

!pip install streamlit boto3




#**DFA LOG SCANNER**

# Define Intrusion Patterns and Simulate DFA

In [1]:
import re


patterns = {
    'Login Failures': re.compile(r'(login failed.*?){3,}', re.IGNORECASE),
    'Unauthorized Access': re.compile(r'unauthorized access', re.IGNORECASE),
    'SQL Injection': re.compile(r'(select|union|insert).*from', re.IGNORECASE),
    'Suspicious IP': re.compile(r'192\.168\.1\.\d{1,3}', re.IGNORECASE),
}


# Sample Log Data for Testing

In [2]:

log_data = """
[10:02:14] login failed for user 'admin'
[10:02:16] login failed for user 'admin'
[10:02:19] login failed for user 'admin'
[10:02:21] login successful for user 'admin'
[10:03:05] unauthorized access attempt from IP 192.168.1.10
[10:03:10] SELECT * FROM users WHERE id=1
"""


#Run Pattern Matching on Sample Logs

In [3]:

for name, pattern in patterns.items():
    matches = pattern.findall(log_data)
    if matches:
        print(f"⚠️ {name}: {len(matches)} suspicious instance(s) found")
    else:
        print(f"✅ {name}: No issues found")


✅ Login Failures: No issues found
⚠️ Unauthorized Access: 1 suspicious instance(s) found
⚠️ SQL Injection: 1 suspicious instance(s) found
⚠️ Suspicious IP: 1 suspicious instance(s) found


#Simulate DFA to Detect Repeated Login Failures

In [4]:
def simulate_dfa(text, dfa_words):
    state = 0
    count = 0
    words = text.lower().split()
    for word in words:
        if word == dfa_words[state]:
            state += 1
            if state == len(dfa_words):
                count += 1
                state = 0
        else:
            state = 0
    return count



dfa_pattern = ['login', 'failed', 'login', 'failed', 'login', 'failed']
occurrences = simulate_dfa(log_data, dfa_pattern)
print(f"DFA match: {occurrences} suspicious login pattern(s) found")


DFA match: 0 suspicious login pattern(s) found


# Write Streamlit App to app.py

In [5]:
%%writefile app.py
import streamlit as st
import re


patterns = {
    'Login Failures (x3)': re.compile(r'(login failed.*?){3,}', re.IGNORECASE),
    'Unauthorized Access': re.compile(r'unauthorized access', re.IGNORECASE),
    'SQL Injection': re.compile(r'(select|union|insert).*from', re.IGNORECASE),
    'Suspicious IP': re.compile(r'192\.168\.1\.\d{1,3}', re.IGNORECASE),
}

st.title("🛡️ Intrusion Detection Demo (Finite Automata + Regex)")


log_input = st.text_area("Paste log data below", height=250)

if st.button("Scan Logs"):
    if not log_input.strip():
        st.warning("Please paste some log content first.")
    else:
        for name, pattern in patterns.items():
            matches = pattern.findall(log_input)
            if matches:
                st.error(f"⚠️ {name}: {len(matches)} match(es) found")
            else:
                st.error(f"🔐 {name}: suspicious patterns")


Writing app.py


# Install Required Packages

In [6]:
!pip install streamlit pyngrok


Collecting streamlit
  Downloading streamlit-1.47.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.12-py3-none-any.whl (26 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m90.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (

# Configure ngrok Authentication

In [7]:
from pyngrok import conf

conf.get_default().auth_token = "2zz8ENrte4PMTZNEA052Aok2iro_4waswxuoGBmpjrKe7PP72"


# Rewrite Final Streamlit App

In [8]:
%%writefile app.py
import streamlit as st
import re


patterns = {
    'Login Failures (x2)': re.compile(r'(login failed.*?){2,}', re.IGNORECASE),
    'Unauthorized Access': re.compile(r'unauthorized access', re.IGNORECASE),
    'SQL Injection': re.compile(r'(select|union|insert).*from', re.IGNORECASE),
    'Suspicious IP': re.compile(r'192\.168\.1\.\d{1,3}', re.IGNORECASE),
}

st.title("🛡️ Intrusion Detection System (Regex + Automata Style)")
log_input = st.text_area("Paste log content here:", height=250)

if st.button("Scan"):
    if not log_input.strip():
        st.warning("Please enter some log data.")
    else:
        for name, pattern in patterns.items():
            matches = pattern.findall(log_input)
            if matches:
                st.error(f"⚠️ {name}: {len(matches)} match(es) found")
            else:
                st.error(f"🔐 {name}: issues detected")


Overwriting app.py


In [9]:
try:
    public_url = ngrok.connect(port=8501)
    print("Streamlit URL:", public_url)
except Exception as e:
    print("❌ Ngrok connection failed:", e)


❌ Ngrok connection failed: name 'ngrok' is not defined


In [10]:
!killall ngrok


ngrok: no process found


In [11]:
from pyngrok import conf
conf.get_default().auth_token = "2zz8ENrte4PMTZNEA052Aok2iro_4waswxuoGBmpjrKe7PP72"


In [12]:
%%writefile app.py
import streamlit as st
import re


def simulate_dfa(text, dfa_words):
    state = 0
    count = 0
    words = re.findall(r'\w+', text.lower())
    for word in words:
        if word == dfa_words[state]:
            state += 1
            if state == len(dfa_words):
                count += 1
                state = 0
        else:
            state = 0
    return count


patterns = {
    'Unauthorized Access': re.compile(r'unauthorized access', re.IGNORECASE),
    'SQL Injection': re.compile(r'(select|union|insert).*from', re.IGNORECASE),
    'Suspicious IP': re.compile(r'192\.168\.1\.\d{1,3}', re.IGNORECASE),
}

st.title("🛡️ DFA Log Scanner")

log_input = st.text_area("Paste log data below", height=250)

if st.button("Scan Logs"):
    if not log_input.strip():
        st.warning("Please paste some log content first.")
    else:

        dfa_pattern = ['login', 'failed', 'login', 'failed', 'login', 'failed']
        occurrences = simulate_dfa(log_input, dfa_pattern)
        if occurrences > 0:
            st.error(f"⚠️ Login Failures (x3): {occurrences} suspicious pattern(s) found")
        else:
            st.error("🔐 Login Failures (x3): suspicious patterns")


        for name, pattern in patterns.items():
            matches = pattern.findall(log_input)
            if matches:
                st.error(f"⚠️ {name}: {len(matches)} match(es) found")
            else:
                st.error(f"🔐 {name}: suspicious patterns")



Overwriting app.py


# Launch Streamlit App via ngrok Tunnel

In [13]:
import threading
import time
from pyngrok import ngrok


public_url = ngrok.connect(8501, "http")
print("✅ Streamlit Public URL:", public_url)


def run_app():
    !streamlit run app.py

thread = threading.Thread(target=run_app)
thread.start()


time.sleep(5)



✅ Streamlit Public URL: NgrokTunnel: "https://f665116ef809.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.48.5.48:8501[0m
[0m
