In [None]:
import requests
import pandas as pd

ES_HOST = 'http://localhost:9200'
INDEX_NAME = 'web_logs'

def get_logs():
    url = f"{ES_HOST}/{INDEX_NAME}/_search"
    headers = {'Content-Type': 'application/json'}
    query = {"query": {"match_all": {}}}
    response = requests.get(url, headers=headers, data=json.dumps(query))
    if response.status_code == 200:
        hits = response.json()['hits']['hits']
        logs = [hit['_source'] for hit in hits]
        return pd.DataFrame(logs)
    else:
        print(f"Failed to fetch logs: {response.content}")
        return pd.DataFrame()

logs_df = get_logs()
logs_df.head()

In [None]:
import matplotlib.pyplot as plt

# Traffic patterns
logs_df['timestamp'] = pd.to_datetime(logs_df['timestamp'])
logs_df.set_index('timestamp', inplace=True)
traffic_patterns = logs_df.resample('H').size()

plt.figure(figsize=(12, 6))
traffic_patterns.plot()
plt.title('Traffic Patterns')
plt.xlabel('Time')
plt.ylabel('Number of Requests')
plt.show()

# Popular pages
popular_pages = logs_df['page'].value_counts()

plt.figure(figsize=(12, 6))
popular_pages.plot(kind='bar')
plt.title('Popular Pages')
plt.xlabel('Page')
plt.ylabel('Number of Requests')
plt.show()

# Anomalies (HTTP status codes)
status_counts = logs_df['status'].value_counts()

plt.figure(figsize=(12, 6))
status_counts.plot(kind='bar')
plt.title('HTTP Status Codes')
plt.xlabel('Status Code')
plt.ylabel('Number of Requests')
plt.show()