In [1]:
import re
import pandas as pd
import networkx as nx
import plotly.graph_objects as go

# Regex para parsing do Apache
log_pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[(?P<date>.*?)\] "(?P<method>\w+) (?P<url>\S+) HTTP/\d\.\d" (?P<status>\d{3}) (?P<size>\d+|-) "(?P<referrer>.*?)" "(?P<user_agent>.*?)"'

# Caminho do seu arquivo de log
log_file = 'logs-invasao.log'

# Carregar logs em DataFrame
entries = []
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        m = re.match(log_pattern, line)
        if m:
            entries.append(m.groupdict())

df = pd.DataFrame(entries)

# Converter datetime
df['datetime'] = pd.to_datetime(df['date'], format='%d/%b/%Y:%H:%M:%S %z')

# Criar coluna com granularidade de 1 hora
df['minute'] = df['datetime'].dt.floor('60min')  # floor to minute

# Verificar resultado
print("Registros lidos:", len(df))
print("Intervalos encontrados:", df['minute'].nunique())

print("Quantidade de registros por hora:")
print(df['minute'].value_counts().sort_index())

# Lista de minutos ordenados
all_minutes = sorted(df['minute'].unique())

# Gerar grafos e capturar dados de cada frame
frames_data = []

# Lista de status que queremos
status_desejados = ["404","200"]

for minute in all_minutes:
    df_minute = df[
        (df['minute'] == minute) &
        (df['status'].isin(status_desejados))
    ]

    #df_minute = df[df['minute'] == minute]
    
    # Criar grafo bipartido Status ↔ URL
    G = nx.Graph()
    for _, row in df_minute.iterrows():
        status = row['status']
        url = row['url']
        G.add_node(status, bipartite=0)
        G.add_node(url, bipartite=1)
        G.add_edge(status, url)
    
    # Separar nós
    status_nodes = sorted({n for n, d in G.nodes(data=True) if d['bipartite'] == 0})
    url_nodes = sorted({n for n in G if n not in status_nodes})
    
    # Criar posições fixas bipartidas
    pos = {}
    # Espaçamento vertical proporcional
    for i, node in enumerate(status_nodes):
        pos[node] = (-1, i)
    for i, node in enumerate(url_nodes):
        pos[node] = (+1, i * 0.0012)
    
    # Edges
    edge_x = []
    edge_y = []
    for e in G.edges():
        x0, y0 = pos[e[0]]
        x1, y1 = pos[e[1]]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]
    
    # Status nodes
    status_x = [pos[n][0] for n in status_nodes]
    status_y = [pos[n][1] for n in status_nodes]
    
    # URL nodes
    url_x = [pos[n][0] for n in url_nodes]
    url_y = [pos[n][1] for n in url_nodes]
    
    # Frame dict
    frames_data.append({
        'minute': minute,
        'edge_x': edge_x,
        'edge_y': edge_y,
        'status_x': status_x,
        'status_y': status_y,
        'status_labels': status_nodes,
        'url_x': url_x,
        'url_y': url_y,
        'url_labels': url_nodes
    })

# Criar figura
fig = go.Figure(
    data=[
        go.Scatter(
            x=frames_data[0]['edge_x'],
            y=frames_data[0]['edge_y'],
            mode='lines',
            line=dict(color='gray', width=1),
            hoverinfo='none'
        ),
        go.Scatter(
            x=frames_data[0]['status_x'],
            y=frames_data[0]['status_y'],
            mode='markers+text',
            marker=dict(size=40, color='salmon'),
            text=frames_data[0]['status_labels'],
            textposition='middle center',
            name='Status'
        ),
        go.Scatter(
            x=frames_data[0]['url_x'],
            y=frames_data[0]['url_y'],
            mode='markers',
            marker=dict(size=8, color='lightblue'),
            name='URLs'
        )
    ],
    layout=go.Layout(
        width=1200,
        height=900,
        title_text=f"Grafo Temporal Bipartido Frames por Hora (LOG INVASÃO) - {frames_data[0]['minute'].strftime('%d-%m-%Y %H:%M')}",
        showlegend=True,
        updatemenus=[
            dict(
                type="buttons",
                buttons=[
                    dict(
                        label="Play",
                        method="animate",
                        args=[
                            None,
                            {"frame": {"duration": 1000, "redraw": True},
                             "fromcurrent": True}
                        ]
                    ),
                    dict(
                        label="Pause",
                        method="animate",
                        args=[
                            [None],
                            {"frame": {"duration": 0, "redraw": False},
                             "mode": "immediate"}
                        ]
                    )
                ]
            )
        ]
    ),
    frames=[
        go.Frame(
            data=[
                go.Scatter(
                    x=frame['edge_x'],
                    y=frame['edge_y'],
                    mode='lines',
                    line=dict(color='gray', width=1),
                    hoverinfo='none'
                ),
                go.Scatter(
                    x=frame['status_x'],
                    y=frame['status_y'],
                    mode='markers+text',
                    marker=dict(size=40, color='salmon'),
                    text=frame['status_labels'],
                    textposition='middle center',
                    name='Status'
                ),
                go.Scatter(
                    x=frame['url_x'],
                    y=frame['url_y'],
                    mode='markers',
                    marker=dict(size=8, color='lightblue'),
                    name='URLs'
                )
            ],
            name=frame['minute'].strftime("%d-%m-%Y %H:%M"),
            layout=go.Layout(
                title_text=f"Grafo Temporal Bipartido Frames por Hora (LOG INVASÃO) - {frame['minute'].strftime('%d-%m-%Y %H:%M')}",
                width=1200,
                height=900
            )
        )
        for frame in frames_data
    ]
)

# Mostrar figura interativa
fig.show()

fig.write_html("Grafo-Temporal-Interativo-Bipartido-StatusxURL.html")

Registros lidos: 4851
Intervalos encontrados: 49
Quantidade de registros por hora:
minute
2025-05-10 07:00:00-03:00      14
2025-05-10 08:00:00-03:00      23
2025-05-10 09:00:00-03:00      32
2025-05-10 10:00:00-03:00     427
2025-05-10 11:00:00-03:00    1274
2025-05-10 12:00:00-03:00      43
2025-05-10 13:00:00-03:00      25
2025-05-10 14:00:00-03:00      25
2025-05-10 15:00:00-03:00      37
2025-05-10 16:00:00-03:00      27
2025-05-10 17:00:00-03:00      14
2025-05-10 18:00:00-03:00      18
2025-05-10 19:00:00-03:00      28
2025-05-10 20:00:00-03:00      26
2025-05-10 21:00:00-03:00     201
2025-05-10 22:00:00-03:00      35
2025-05-10 23:00:00-03:00      15
2025-05-11 00:00:00-03:00      10
2025-05-11 01:00:00-03:00      17
2025-05-11 02:00:00-03:00      24
2025-05-11 03:00:00-03:00     497
2025-05-11 04:00:00-03:00      28
2025-05-11 05:00:00-03:00      20
2025-05-11 06:00:00-03:00      22
2025-05-11 07:00:00-03:00      39
2025-05-11 08:00:00-03:00      52
2025-05-11 09:00:00-03:00 