<a href="https://colab.research.google.com/github/darapanenichandana/my-app/blob/main/Untitled20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio

# Load dataset
df = pd.read_csv("/content/updated_cybersecurity_attacks.csv.zip")

# Convert 'Timestamp' to datetime
if 'Timestamp' in df.columns:
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')

# Print dataset overview
print("Dataset Overview:")
print(df.info())
print(df.head())

# Print basic statistical summary
print("\nBasic Statistical Summary:")
print(df.describe())

# Calculate average packet length if column exists
if 'Packet Length' in df.columns:
    avg_packet_length = df['Packet Length'].mean()
    print(f"\nAverage Packet Length: {avg_packet_length}")

# Extracting date-related information
if 'Timestamp' in df.columns:
    df['Day'] = df['Timestamp'].dt.date
    df['Week'] = df['Timestamp'].dt.isocalendar().week
    df['Month'] = df['Timestamp'].dt.strftime('%Y-%m')  # Year-Month format
    df['Year'] = df['Timestamp'].dt.year
    df['DayOfWeek'] = df['Timestamp'].dt.dayofweek  # For weekday analysis

    # Counting threats per day
    daily_threats = df.groupby('Day').size().reset_index(name='Count')

    # Counting threats per week
    weekly_threats = df.groupby(['Year', 'Week']).size().reset_index(name='Count')

    # Counting threats per month
    monthly_threats = df.groupby('Month').size().reset_index(name='Count')

    # Counting threats per year
    yearly_threats = df.groupby('Year').size().reset_index(name='Count')

    # Plot daily threats
    fig_daily = px.bar(daily_threats, x='Day', y='Count', title='Threats per Day')
    fig_daily.show()

    # Plot weekly threats
    fig_weekly = px.bar(weekly_threats, x='Week', y='Count', color='Year', title='Threats per Week')
    fig_weekly.show()

    # Plot monthly threats
    fig_monthly = px.bar(monthly_threats, x='Month', y='Count', title='Threats per Month')
    fig_monthly.show()

    # Plot yearly threats
    fig_yearly = px.bar(yearly_threats, x='Year', y='Count', title='Threats per Year')
    fig_yearly.show()

# 1. Top Traffic Types
if 'Traffic Type' in df.columns:
    traffic_type_counts = df['Traffic Type'].value_counts().reset_index()
    traffic_type_counts.columns = ['Traffic Type', 'Count']
    fig_traffic = px.bar(traffic_type_counts, x='Traffic Type', y='Count',
                         title='Top Traffic Types', color='Traffic Type')
    fig_traffic.show()

# 2. Top Attack Types
if 'Attack Type' in df.columns:
    attack_type_counts = df['Attack Type'].value_counts().reset_index()
    attack_type_counts.columns = ['Attack Type', 'Count']
    fig_attack = px.bar(attack_type_counts, x='Attack Type', y='Count',
                        title='Top Attack Types', color='Attack Type')
    fig_attack.show()

# 3. Attack Frequency Over Time (Monthly)
if 'Month' in df.columns:
    monthly_attacks = df.groupby('Month').size().reset_index(name='Count')
    fig_monthly_attacks = px.line(monthly_attacks, x='Month', y='Count',
                                  title='Attack Frequency Over Time (Monthly)', markers=True)
    fig_monthly_attacks.show()

# 4. Severity Level Distribution
if 'Severity Level' in df.columns:
    severity_counts = df['Severity Level'].value_counts().reset_index()
    severity_counts.columns = ['Severity Level', 'Count']
    fig_severity = px.pie(severity_counts, values='Count', names='Severity Level',
                          title='Severity Level Distribution')
    fig_severity.show()

# 5. Geographical Attack Analysis (Top 10 Locations)
if 'Geo-location Data' in df.columns:
    geo_counts = df['Geo-location Data'].value_counts().reset_index().head(10)
    geo_counts.columns = ['Geo-location Data', 'Count']
    fig_geo = px.bar(geo_counts, x='Geo-location Data', y='Count',
                     title='Top 10 Attack Locations', color='Geo-location Data')
    fig_geo.show()


Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40000 entries, 0 to 39999
Data columns (total 34 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Unnamed: 0              40000 non-null  int64         
 1   Timestamp               40000 non-null  datetime64[ns]
 2   Source IP Address       40000 non-null  object        
 3   Destination IP Address  40000 non-null  object        
 4   Source Port             40000 non-null  int64         
 5   Destination Port        40000 non-null  int64         
 6   Protocol                40000 non-null  object        
 7   Packet Length           40000 non-null  int64         
 8   Packet Type             40000 non-null  object        
 9   Traffic Type            40000 non-null  object        
 10  Payload Data            40000 non-null  object        
 11  Malware Indicators      40000 non-null  object        
 12  Anomaly Scores          4000

In [75]:
pip install -U kaleido


