<a href="https://colab.research.google.com/github/ahmedyasser7/DEPI_GP/blob/main/visuallization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Visualization

In [None]:
import pandas as pd
import numpy as np

import datetime
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import folium
from folium.plugins import MarkerCluster, HeatMap
import pandas as pd

#import sqlite3


pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

**Quick view on Accidents**

In [None]:
df_accidents = pd.read_csv("accident cleaned data.csv")

In [None]:
df_accidents.head()

In [None]:
df_accidents.info()

In [None]:
df_accidents.shape

In [None]:
df_accidents.describe(include="all")

In [None]:
df_accidents.isna().sum()

**Quick view on Vehicles**

In [None]:
df_vechiles = pd.read_csv("Vechicles cleaned data.csv")

In [None]:
df_vechiles.head()

In [None]:
df_vechiles.info()

In [None]:
df_vechiles.shape

In [None]:
df_vechiles.describe(include="all")

In [None]:
df_vechiles.isna().sum()

**Quick view on Casualties**

In [None]:
df_casu= pd.read_csv("casualties cleaned data.csv")

In [None]:
df_casu.head()

In [None]:
df_casu.info()

In [None]:
df_casu.shape

In [None]:
df_casu.describe(include="all")

In [None]:
df_casu.isna().sum()

In [None]:
df = pd.DataFrame(df_accidents)

**Heatmap**

In [None]:
dataplot = sns.heatmap(df_casu.corr(numeric_only=True), cmap="YlGnBu", annot=True,fmt=".1f",linewidths=2)

In [None]:
# Interactive histogram for Number of Vehicles
fig_vehicles = px.histogram(df, x='Number_of_Vehicles', nbins=15, title='Distribution of Number of Vehicles Involved in Accidents',
                             labels={'Number_of_Vehicles': 'Number of Vehicles'},
                             histnorm='percent')
fig_vehicles.show()

In [None]:
# Interactive histogram for Number of Casualties
fig_casualties = px.histogram(df, x='Number_of_Casualties', nbins=15, title='Distribution of Number of Casualties',
                               labels={'Number_of_Casualties': 'Number of Casualties'},
                               histnorm='percent')
fig_casualties.show()

In [None]:
# Count accidents per day
accidents_per_day = df.groupby('Date').size().reset_index(name='Number_of_Accidents')

# Interactive line plot for accidents over time
fig_time = px.line(accidents_per_day, x='Date', y='Number_of_Accidents', title='Number of Accidents Over Time')
fig_time.update_layout(xaxis_title='Date', yaxis_title='Number of Accidents')
fig_time.show()

In [None]:
# Interactive bar plot for accidents by day of the week
fig_weekday = px.bar(df, x='Day_of_Week', title='Accidents by Day of the Week',
                     labels={'Day_of_Week': 'Day of the Week'},
                     color='Number_of_Casualties',
                     color_continuous_scale=px.colors.sequential.Viridis)
fig_weekday.update_layout(yaxis_title='Number of Accidents', xaxis_title='Day of the Week')
fig_weekday.show()

In [None]:
# Convert Time to a numeric value for easier plotting

# Interactive histogram for accidents by time of day
fig_time_day = px.histogram(df, x='Time', nbins=24, title='Accidents by Time of Day',
                             labels={'Time_Hours': 'Time (Hours)'},
                             histnorm='percent')
fig_time_day.show()

In [None]:
# Interactive box plot for casualties by speed limit
fig_speed_casualties = px.box(df, x='Speed_limit', y='Number_of_Casualties',
                               title='Number of Casualties by Speed Limit',
                               labels={'Speed_limit': 'Speed Limit (mph)', 'Number_of_Casualties': 'Number of Casualties'})
fig_speed_casualties.show()

In [None]:
# Interactive bar plot for accidents by weather conditions
fig_weather_conditions = px.histogram(df, x='Weather_Conditions', color='Number_of_Casualties',
                                       title='Accidents by Weather Conditions',
                                       labels={'Weather_Conditions': 'Weather Conditions'},
                                       histnorm='percent')
fig_weather_conditions.show()

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df, annot=True, cmap='Purples', fmt='d',
            cbar_kws={'label': 'Number of Accidents'})
plt.title('Heatmap of Accident Severity by Road Surface Conditions at Junctions')
plt.xlabel('Accident Severity')
plt.ylabel('Road Surface Conditions')

road_surface_conditions = [
    'Dry',
    'Wet or damp',
    'Snow',
    'Frost or ice',
    'Flood over 3cm. deep'
]
plt.yticks(ticks=range(len(road_surface_conditions)), labels=road_surface_conditions)

plt.xticks(ticks=[0.5, 1.5, 2.5],
           labels=['Slight (1)', 'Serious (2)', 'Fatal (3)'],
           rotation=0)
plt.show()

In [None]:
# Plotting casualty severity distribution
fig_severity = px.histogram(df, x='Casualty_Severity', title='Distribution of Casualty Severity')
fig_severity.show()

In [None]:
# Plotting age distribution of casualties
fig_age = px.histogram(df, x='Age_of_Casualty', title='Age Distribution of Casualties')
fig_age.show()

In [None]:
# Plotting casualty type distribution
fig_type = px.histogram(df, x='Casualty_Type', title='Distribution of Casualty Types')
fig_type.show()

In [None]:
# Sample DataFrame creation (replace this with your actual DataFrame)


df = pd.DataFrame(df_accidents)

# Create a folium map centered on the average location
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=12)

# Define severity levels and corresponding colors
severity_levels = {
    3: {'label': 'Low Severity', 'color': 'green', 'icon': 'check'},
    2: {'label': 'Medium Severity', 'color': 'orange', 'icon': 'exclamation-sign'},
    1: {'label': 'High Severity', 'color': 'red', 'icon': 'remove'}
}

# Create a FeatureGroup for each severity level
for severity, info in severity_levels.items():
    # Create a FeatureGroup with a name for the layer control
    fg = folium.FeatureGroup(name=info['label'])

    # Optionally, add a MarkerCluster to each FeatureGroup
    marker_cluster = MarkerCluster().add_to(fg)

    # Filter data for the current severity level
    severity_data = df[df['Accident_Severity'] == severity]

    # Add markers to the cluster within the FeatureGroup
    for _, row in severity_data.iterrows():
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=folium.Popup(html=(
                f"<b>Accident Index:</b> {row['Accident_Index']}<br>"
                f"<b>Date:</b> {row['Date']}<br>"
                f"<b>Time:</b> {row['Time']}<br>"
                f"<b>Number of Vehicles:</b> {row['Number_of_Vehicles']}<br>"
                f"<b>Number of Casualties:</b> {row['Number_of_Casualties']}<br>"
                f"<b>Speed Limit:</b> {row['Speed_limit']} mph"
            ), max_width=300),
            icon=folium.Icon(color=info['color'], icon=info['icon'])
        ).add_to(marker_cluster)

    # Add the FeatureGroup to the map
    m.add_child(fg)

# Add a HeatMap layer to visualize density of accidents
heat_data = [[row['Latitude'], row['Longitude']] for index, row in df.iterrows()]
HeatMap(heat_data, radius=15).add_to(m)

# Add a LayerControl to toggle between severity layers
folium.LayerControl().add_to(m)

# Save the map to an HTML file
# m.save('accidents_map_with_severity_layers.html')

# To display the map in a Jupyter Notebook, simply use:
m

In [None]:
# Sample DataFrame creation (replace this with your actual DataFrame)
# df = pd.DataFrame(df_accidents)

# Sample structure of the DataFrame


df = pd.DataFrame(df_accidents)

# Ensure the column is of string type
df['Local_Authority_(District)'] = df['Local_Authority_(District)'].astype(str)

# Step 1: Bin the Accident Severity
severity_labels = {3: 'Low', 2: 'Medium', 1: 'High'}
df['Accident_Severity_Label'] = df['Accident_Severity'].map(severity_labels)

# Step 2: Function to filter the DataFrame based on user input
def filter_df(search_term):
    return df[df['Local_Authority_(District)'].str.contains(search_term, case=False, na=False)]

# Example: Getting user input
search_input = input("Enter a district name to search: ")
filtered_df = filter_df(search_input)

# Step 3: Create the Treemap
if not filtered_df.empty:
    fig = px.treemap(
        filtered_df,
        path=['Local_Authority_(District)', 'Local_Authority_(Highway)', 'Accident_Severity_Label'],
        values='Number_of_Casualties',
        color='Accident_Severity',
        color_continuous_scale='RdYlGn',
        title=f'Treemap of Accidents by Local Authority District {search_input}',
        hover_data={'Number_of_Vehicles': True, 'Number_of_Casualties': True}
    )
    fig.show()
else:
    print(f"No results found for '{search_input}'. Please try another district name.")

In [None]:
df = pd.DataFrame(df_accidents)

# Step 1: Map the numeric area types to labels
area_labels = {1: 'Urban', 2: 'Rural'}
df['Area_Label'] = df['Urban_or_Rural_Area'].map(area_labels)

# Step 2: Filter out the "Unallocated" areas (Urban_or_Rural_Area = 3)
df_filtered = df[df['Urban_or_Rural_Area'] != 3]

# Step 3: Count the number of accidents in Urban and Rural areas
urban_rural_counts = df_filtered['Area_Label'].value_counts()

# Step 4: Create the bar plot
plt.figure(figsize=(8, 6))
urban_rural_counts.plot(kind='bar', color=['skyblue', 'lightgreen'])

# Step 5: Add labels and title
plt.title('Accidents in Urban vs Rural Areas (Excluding Unallocated)', fontsize=16)
plt.xlabel('Area Type', fontsize=14)
plt.ylabel('Number of Accidents', fontsize=14)
plt.xticks(rotation=0)  # Keep labels horizontal for better readability

# Step 6: Show the plot
plt.tight_layout()
plt.show()

## Heatmap of Accident Locations Based on Severity

#### Columns Used:
- Accident Severity
- Location Easting OSGR, Location Northing OSGR
- Number of Vehicles
- Road Type

In [None]:
Heatmap_df = df_accidents[['Accident_Severity','Number_of_Casualties','Local_Authority_(District)','Local_Authority_(Highway)','Number_of_Vehicles','Longitude','Latitude']]
#Heatmap_df.to_csv('Heatmap.csv', index=False)

In [None]:
fig = px.bar(df_accidents,
             x='Local_Authority_(District)',
             y='Number_of_Casualties',
             color='Accident_Severity',
             barmode='group',
             facet_col='Local_Authority_(Highway)',
             labels={'Accident_Severity': 'Severity Level'},
             title='Accidents by Severity, Highway, and Local Authority')

# Show the figure
fig.show()

In [None]:
# Group the data by Accident_Severity, Local_Authority_(District), and Local_Authority_(Highway)
grouped_df = Hdf.groupby(['Accident_Severity', 'Local_Authority_(District)', 'Local_Authority_(Highway)']).agg({
    'Number_of_Casualties': 'sum',
    'Number_of_Vehicles': 'sum'
}).reset_index()

# Rename columns for clarity
grouped_df.columns = ['Accident_Severity', 'District', 'Highway', 'Total_Casualties', 'Total_Vehicles']

# Display the grouped data
print(grouped_df)


In [None]:
fig = px.bar(df_accidents,
             x='Local_Authority_(District)',
             y='Number_of_Casualties',
             color='Accident_Severity',
             labels={'Accident_Severity': 'Severity Level'},
             title='Accidents by Severity and Local Authority')

# Show the figure
fig.show()

In [None]:
df = px.data.medals_wide(indexed=True)
fig = px.imshow(df)
fig = px.imshow(df, text_auto=True)
fig.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Sample 100 rows from the cleaned DataFrame
Heatmap_df = Heatmap_df.sample(n=100, random_state=1)

# Sample severity mapping
severity_map = {
    1: 'Fatal',
    2: 'Serious',
    3: 'Slight'
}

# Adding Severity_Label to the DataFrame
Heatmap_df['Severity_Label'] = Heatmap_df['Accident_Severity'].map(severity_map)

# Step 3: Create separate heatmaps for vehicles and casualties for each severity level
for severity in severity_map.values():
    # Filter data for each severity
    severity_data = Heatmap_df[Heatmap_df['Severity_Label'] == severity]

    # Create a pivot table for the number of vehicles
    vehicles_heatmap_data = severity_data.pivot_table(
        index='Local_Authority_(District)',
        columns='Local_Authority_(Highway)',
        values='Number_of_Vehicles',
        aggfunc='sum',
        fill_value=0
    )

    # Create a pivot table for the number of casualties
    casualties_heatmap_data = severity_data.pivot_table(
        index='Local_Authority_(District)',
        columns='Local_Authority_(Highway)',
        values='Number_of_Casualties',
        aggfunc='sum',
        fill_value=0
    )

    # Plotting for Number of Vehicles
    plt.figure(figsize=(15, 12))  # Adjusted figure size for better visualization
    sns.heatmap(vehicles_heatmap_data,
                cmap='YlGnBu',
                annot=True,
                fmt='0.1f',
                linecolor='black',
                linewidths=1,
                annot_kws={"size": 20},  # Adjusted annotation font size
                cbar=True)
    plt.title(f'Heatmap of Vehicles Involved in {severity} Accidents', fontsize=24)
    plt.xlabel('Location Easting OSGR', fontsize=20)
    plt.ylabel('Location Northing OSGR', fontsize=20)
    plt.xticks(fontsize=16)  # X-axis tick labels font size
    plt.yticks(fontsize=16)  # Y-axis tick labels font size
    plt.show()  # Display the heatmap

    # Plotting for Number of Casualties
    plt.figure(figsize=(15, 12))  # Keep the size reasonable
    sns.heatmap(casualties_heatmap_data,
                cmap='YlOrRd',
                annot=True,
                fmt='0.1f',
                linecolor='black',
                linewidths=1,
                annot_kws={"size": 20},  # Adjusted annotation font size
                cbar=True)
    plt.title(f'Heatmap of Casualties in {severity} Accidents', fontsize=24)
    plt.xlabel('Location Easting OSGR', fontsize=20)
    plt.ylabel('Location Northing OSGR', fontsize=20)
    plt.xticks(fontsize=16)  # X-axis tick labels font size
    plt.yticks(fontsize=16)  # Y-axis tick labels font size
    plt.show()  # Display the heatmap

## Time-Series of Accidents by Hour and Day of the Week

#### Columns Used:
- Day of Week
- Time (HH)
- Number of Accidents

## Distribution of Accident Severity by Weather Conditions

### Columns Used:
- Accident Severity
- Weather Conditions
- Road Surface Conditions
- Speed Limit

## Pie Chart of Casualty Types Based on Vehicle Involvement

### Columns Used:
- Casualty Type
- Vehicle Type
- Number of Vehicles

## Accident Hotspot Map Based on Geo-Coordinates and Number of Casualties

### Columns Used:
- Location Easting OSGR, Location Northing OSGR
- Number of Casualties
- Accident Severity


## Bar Chart of Casualty Severity by Age Group

### Columns Used:
- Casualty Severity
- Age Band of Casualty
- Casualty Class
- Casualty Type

## Correlation Matrix of Accident Factors and Casualty Outcome

### Columns Used:
- Number of Vehicles
- Number of Casualties
- Accident Severity
- Casualty Severity
- Speed Limit
- Weather Conditions

## Scatter Plot of Accidents by Speed Limit and Severity

### Columns Used:
- Accident Severity
- Speed Limit
- Road Type

## Comparison of Accident Frequency Between Rural and Urban Areas

### Columns Used:
- Urban or Rural Area
- Accident Severity
- Road Type

## Impact of Junction Control on Accident Severity

### Columns Used:
- Junction Control
- Accident Severity
- Vehicle Type
- Road Surface Conditions