# Quick Cybersecurity Analytics (Colab Demo)

This mini-notebook shows how to load simple web/auth logs with **pandas**, perform quick anomaly checks (e.g., **brute-force logins**, **404 scans**), and visualize suspicious activity. Use it as a 10–15 minute teaching demo.

**Instructions**
1. Upload `security_logs_demo.csv` (or place this notebook and CSV in the same directory).
2. Run cells top-to-bottom.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print('Libraries imported.')

## Load the CSV

In [None]:
try:
    df = pd.read_csv('security_logs_demo.csv')
except FileNotFoundError:
    print('Could not find security_logs_demo.csv. Upload it or adjust the path.')

df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values('timestamp')
df.head()

## Quick EDA

In [None]:
print('Rows:', len(df))
print('Time range:', df['timestamp'].min(), 'to', df['timestamp'].max())
print('\nStatus counts:\n', df['status'].value_counts().sort_index())
df['src_ip'].value_counts().head(10)

## Rule 1: Failed-login burst (possible brute force)

In [None]:
fail = df[(df['endpoint']=='/login') & (df['status']==401)].copy()
fail = fail.set_index('timestamp').sort_index()

WINDOW_MIN = 3
THRESH = 10
suspects = []
for ip, g in fail.groupby('src_ip'):
    times = g.index.to_series().sort_values()
    for i in range(len(times)):
        start_t = times.iloc[i]
        end_t = start_t + pd.Timedelta(minutes=WINDOW_MIN)
        count = times[(times>=start_t) & (times<=end_t)].shape[0]
        if count >= THRESH:
            suspects.append({'src_ip': ip, 'start': start_t, 'end': end_t, 'fail_count': count})
suspects_df = pd.DataFrame(suspects).drop_duplicates(subset=['src_ip','start','end'])
suspects_df.head()

## Rule 2: 404 scanning activity

In [None]:
scan = df[df['status']==404].copy()
WINDOW_MIN = 5
THRESH = 8
hits = []
for ip, g in scan.groupby('src_ip'):
    g = g.set_index('timestamp').sort_index()
    times = g.index.to_series()
    for i in range(len(times)):
        start = times.iloc[i]
        end = start + pd.Timedelta(minutes=WINDOW_MIN)
        sub = g[(g.index>=start) & (g.index<=end)]
        if sub['endpoint'].nunique() >= THRESH:
            hits.append({'src_ip': ip, 'start': start, 'end': end, 'unique_404_endpoints': sub['endpoint'].nunique()})
hits_df = pd.DataFrame(hits).drop_duplicates(subset=['src_ip','start','end'])
hits_df.head()

## Rule 3: Suspicious user agents

In [None]:
suspicious_strings = ['sqlmap', 'curl', 'python-requests']
ua_flag = df[df['user_agent'].str.contains('|'.join(suspicious_strings), case=False, na=False)]
ua_flag[['timestamp','src_ip','endpoint','status','user_agent']].head(20)

## Visualizations

In [None]:
ip_counts = df['src_ip'].value_counts().head(10)
plt.figure()
ip_counts.plot(kind='bar')
plt.title('Top Source IPs (by event count)')
plt.xlabel('Source IP')
plt.ylabel('Events')
plt.tight_layout()
plt.show()

df['minute'] = df['timestamp'].dt.floor('min')
ts = df.groupby(['minute','status']).size().unstack(fill_value=0)
plt.figure()
ts.plot()
plt.title('Events per minute by status')
plt.xlabel('Time')
plt.ylabel('Count')
plt.tight_layout()
plt.show()

## Summary

In [None]:
summary = {
    'total_events': int(len(df)),
    'unique_ips': int(df['src_ip'].nunique()),
    'failed_logins': int(((df['endpoint']=='/login') & (df['status']==401)).sum()),
    'not_found_events': int((df['status']==404).sum()),
}
summary