In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [None]:
accident_data = pd.read_csv('Dataset/accidient_data_of_india.csv')
accident_data

In [None]:
accident_data.isnull().sum()

In [None]:
accident_data.columns.tolist()

In [None]:
accident_data.rename(columns={
    'Years':'Year',
    'Total Number of Road Accidents (in numbers)':'Total accidents',
    'Total Number of Persons Killed (in numbers)':'Total Killed',
    'Total Number of Persons Injured (in numbers)':'Total Injured',
    'Population of India (in thousands)':'Total Population',
    'Total Number of Registered Motor Vehicles (in thousands)':'Total Registered Vehicle',
    'Road Length (in kms)':'Road Length',
    'Number of Accidents per Lakh Population':'Accidents Per Lakh',
    'Number of Accidents per Ten Thousand Vehicles':'Accidents Per 10K Vehicles',
    'Number of Accidents per Ten Thousand Kms of Roads':'Accidents Per 10K KM Road',
    'Number of Persons Killed Per Lakh Population':'Killed Per Lakh',
    'Number of Persons Killed Per Ten Thousand Vehicles':'Killed Per 10K Vehicles',
    'Number of Persons Killed per Ten Thousand Kms of Roads':'Killed Per 10K KM Road',
    'Number of Persons Injured per Lakh Population':'Injured Per Lakh',
    'Number of Persons Injured Per Ten Thousand Vehicles':'Injured Per 10K Vehicles',
    'Number of Persons Injured Per Ten Thousand Kms of Roads':'Injured Per 10K KM Road'}, inplace=True)
accident_data

In [None]:
accident_data.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
accident_data

In [None]:
import re
def clean_numerical_columns(value):
    if isinstance(value, str):
        if value.count('.') == 0:
            value = re.sub(r'[^0-9]+', '', value)
            value = int(value) if value else np.nan
        else:
            value = re.sub(r'[^0-9.]+', '', value)
            value = float(value) if value else np.nan
        return value
    print(value)
    return np.nan

for col in accident_data.select_dtypes(include=['object']).columns:
    accident_data[col] = accident_data[col].apply(clean_numerical_columns)

In [None]:
accident_data.drop(index = accident_data[accident_data['Year'] == 1].index, inplace=True)

In [None]:
accident_data

In [None]:
fig = px.area(accident_data, x='Year', y='Total accidents', title='Total accidents per year')
fig.add_scatter(x=accident_data['Year'], y=accident_data['Total accidents'], mode='markers', name='Accidents', marker=dict(color='blue', size=10))
mean_accidents = accident_data['Total accidents'].mean()
fig.add_annotation(xref='paper', yref='paper', x=0.1, y=1, text=f"Average accidents per year = {mean_accidents:.0f}",showarrow=False)
fig.show()

In [None]:
fig = px.area(accident_data, x='Year', y='Total Killed', title='Total Killed per year')
fig.add_scatter(x=accident_data['Year'], y=accident_data['Total Killed'], mode='markers', name='Killed', marker=dict(color='green', size=10))
mean_accidents = accident_data['Total Killed'].mean()
fig.add_annotation(xref='paper', yref='paper', x=0.1, y=1, text=f"Average Killed per year = {mean_accidents:.0f}",showarrow=False)
fig.show()

In [None]:
fig = px.area(accident_data, x='Year', y='Total Injured', title='Total Injured per year')
fig.add_scatter(x=accident_data['Year'], y=accident_data['Total Injured'], mode='markers', name='Injured', marker=dict(color='red', size=10))
mean_accidents = accident_data['Total Injured'].mean()
fig.add_annotation(xref='paper', yref='paper', x=0.1, y=1, text=f"Average Injuries per year = {mean_accidents:.0f}",showarrow=False)
fig.show()