In [None]:
import pandas as pd

df = pd.read_csv('../data/login_logs.csv')
df.head()



In [None]:
# Filter failed logins
failed_logins = df[df['status'] == 'failed']
failed_logins.head()


In [None]:
top_failed_users = failed_logins['username'].value_counts().head(5)
print("Top 5 users with failed logins:")
print(top_failed_users)


In [None]:
if 'ip_address' in failed_logins.columns:
    top_failed_ips = failed_logins['ip_address'].value_counts().head(5)
    print("\nTop 5 IP addresses with failed logins:")
    print(top_failed_ips)


In [None]:
import pandas as pd

# Convert 'timestamp' column to datetime (if not already)
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

# Check if any conversion failed (NaT means failed conversion)
if df['timestamp'].isnull().any():
    print("Warning: Some timestamps couldn't be converted!")

# Then filter failed logins again after conversion
failed_logins = df[df['status'] == 'failed']

# Now set timestamp as the DataFrame index
failed_logins = failed_logins.set_index('timestamp')

# Confirm index is datetime
print(type(failed_logins.index))


In [None]:
import matplotlib.pyplot as plt

# Resample by minute and count failed logins per minute
failed_per_minute = failed_logins.resample('T').size()  # 'T' = minute frequency

# Plotting
plt.figure(figsize=(12,6))
failed_per_minute.plot(kind='bar', color='red')
plt.title('Failed Login Attempts Per Minute')
plt.xlabel('Time')
plt.ylabel('Number of Failed Logins')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()



In [None]:
failed_logins['username'].value_counts()



In [None]:
# Flag users with more than 2 failed logins
suspicious_users = failed_logins['username'].value_counts()
suspicious_users = suspicious_users[suspicious_users > 2]

print("🚨 Users with more than 2 failed logins:")
print(suspicious_users)

# Optional: Flag IPs too, with same lower threshold
if 'ip_address' in failed_logins.columns:
    suspicious_ips = failed_logins['ip_address'].value_counts()
    suspicious_ips = suspicious_ips[suspicious_ips > 2]

    print("\n🚨 IP addresses with more than 2 failed logins:")
    print(suspicious_ips)


In [None]:
import matplotlib.pyplot as plt

# Count successes and failures
status_counts = df['status'].value_counts()

# Plot a pie chart
plt.figure(figsize=(6,6))
status_counts.plot(
    kind='pie',
    autopct='%1.1f%%',
    startangle=90,
    ylabel='',         # hide the y‑label
    title='Login Outcome Breakdown'
)
plt.tight_layout()
plt.show()


In [None]:
# Total attempts
total = len(df)

# Total failed and successful
failed = len(df[df['status']=='failed'])
success = len(df[df['status']=='success'])

print("🛡️ Security Data Analytics Dashboard")
print(f"• Total login attempts : {total}")
print(f"• Failed login attempts: {failed}")
print(f"• Successful logins     : {success}\n")

# Top 3 users by failed count
top3_users = failed_logins['username'].value_counts().head(3)
print("• Top 3 users (failed logins):")
print(top3_users.to_string(), "\n")

# Top 3 IPs by failed count (if present)
if 'ip_address' in failed_logins.columns:
    top3_ips = failed_logins['ip_address'].value_counts().head(3)
    print("• Top 3 IPs (failed logins):")
    print(top3_ips.to_string())
