In [1]:
import numpy as np 
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
from bokeh.io import output_file,show,output_notebook,push_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource,HoverTool,CategoricalColorMapper
from bokeh.layouts import row,column,gridplot
from bokeh.models.widgets import Tabs,Panel
output_notebook()

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/benvictoria17/DataVisualization/master/dataset/Vehicle%20Insurance%20Claim%20Fraud%20Detection/fraud_oracle.csv")

In [3]:
print("#####################################################################################")
print("Shape of data: " + str(df.shape) )
print("#####################################################################################")
print("Missing values: ")
print((df.isnull().sum()))
print("#####################################################################################")
print(df.dtypes)
print("#####################################################################################")
print(df.head(5))
print("#####################################################################################")
print(df.describe().T)

#####################################################################################
Shape of data: (15420, 33)
#####################################################################################
Missing values: 
Month                   0
WeekOfMonth             0
DayOfWeek               0
Make                    0
AccidentArea            0
DayOfWeekClaimed        0
MonthClaimed            0
WeekOfMonthClaimed      0
Sex                     0
MaritalStatus           0
Age                     0
Fault                   0
PolicyType              0
VehicleCategory         0
VehiclePrice            0
FraudFound_P            0
PolicyNumber            0
RepNumber               0
Deductible              0
DriverRating            0
Days_Policy_Accident    0
Days_Policy_Claim       0
PastNumberOfClaims      0
AgeOfVehicle            0
AgeOfPolicyHolder       0
PoliceReportFiled       0
WitnessPresent          0
AgentType               0
NumberOfSuppliments     0
AddressChange_Claim     0
Numb

In [4]:
cat_col = [col for col in df.columns if df[col].dtypes == "O"]
cat_col

['Month',
 'DayOfWeek',
 'Make',
 'AccidentArea',
 'DayOfWeekClaimed',
 'MonthClaimed',
 'Sex',
 'MaritalStatus',
 'Fault',
 'PolicyType',
 'VehicleCategory',
 'VehiclePrice',
 'Days_Policy_Accident',
 'Days_Policy_Claim',
 'PastNumberOfClaims',
 'AgeOfVehicle',
 'AgeOfPolicyHolder',
 'PoliceReportFiled',
 'WitnessPresent',
 'AgentType',
 'NumberOfSuppliments',
 'AddressChange_Claim',
 'NumberOfCars',
 'BasePolicy']

In [5]:
df_month = df.groupby("Month").agg({"Month":"count"})
df_month.columns = ["Counts"]
df_month.reset_index(inplace=True)
print(df_month)


fig = px.bar(df_month, x='Month', y='Counts', title="Number of accidents per month")
fig.show()

   Month  Counts
0    Apr    1280
1    Aug    1127
2    Dec    1285
3    Feb    1266
4    Jan    1411
5    Jul    1257
6    Jun    1321
7    Mar    1360
8    May    1367
9    Nov    1201
10   Oct    1305
11   Sep    1240


In [6]:
df_month_weekday = df.groupby(["Month", "DayOfWeek"]).agg({"Month":"count"})
df_month_weekday.columns = ["Counts"]
df_month_weekday.reset_index(inplace=True)
print(df_month_weekday.head(10))

fig = px.bar(df_month_weekday, x="Month", y="Counts", color="DayOfWeek",
             pattern_shape="DayOfWeek", pattern_shape_sequence=[".", "x", "+"],
            title = "How many accidents happened on which days of the month?")
fig.show()

  Month  DayOfWeek  Counts
0   Apr     Friday     185
1   Apr     Monday     185
2   Apr   Saturday     222
3   Apr     Sunday     161
4   Apr   Thursday     172
5   Apr    Tuesday     176
6   Apr  Wednesday     179
7   Aug     Friday     158
8   Aug     Monday     189
9   Aug   Saturday     118


In [7]:

df_sex_maritalstatus = df.groupby(["Sex", "MaritalStatus"]).agg({"Sex":"count"})
df_sex_maritalstatus.columns = ["Counts"]
df_sex_maritalstatus.reset_index(inplace=True)
print(df_sex_maritalstatus.head(10))


fig = px.bar(df_sex_maritalstatus, x="Sex", y="Counts",
             color='MaritalStatus', barmode='group',
             height=400,
            title = "Gender and marital status of the accident victims")
fig.show()

      Sex MaritalStatus  Counts
0  Female      Divorced      40
1  Female       Married    1325
2  Female        Single    1031
3  Female         Widow      24
4    Male      Divorced      36
5    Male       Married    9300
6    Male        Single    3653
7    Male         Widow      11


In [8]:
df_year_month = df.groupby(["Year", "Month"]).agg({"Year":"count"})
df_year_month.columns = ["Counts"]
df_year_month.reset_index(inplace=True)
print(df_year_month.head(10))

fig = px.bar(df_year_month, x="Year", y="Counts", color="Month", title="Number of accidents by Years and Months")
fig.show()

   Year Month  Counts
0  1994   Apr     533
1  1994   Aug     470
2  1994   Dec     471
3  1994   Feb     528
4  1994   Jan     608
5  1994   Jul     495
6  1994   Jun     543
7  1994   Mar     584
8  1994   May     569
9  1994   Nov     453


In [9]:
df_make_vehicle_age = df.groupby(["Make", "AgeOfVehicle"]).agg({"Make":"count"})
df_make_vehicle_age.columns = ["Counts"]
df_make_vehicle_age.reset_index(inplace=True)
print(df_make_vehicle_age.head(10))
print("------------------------------------------------------------")
print("Car Brands: ")
print(df_make_vehicle_age["Make"].unique())

     Make AgeOfVehicle  Counts
0  Accura      2 years       1
1  Accura      3 years       2
2  Accura      4 years       1
3  Accura      5 years      19
4  Accura      6 years      68
5  Accura      7 years     226
6  Accura  more than 7     155
7     BMW      2 years       1
8     BMW      5 years       1
9     BMW      6 years       2
------------------------------------------------------------
Car Brands: 
['Accura' 'BMW' 'Chevrolet' 'Dodge' 'Ferrari' 'Ford' 'Honda' 'Jaguar'
 'Lexus' 'Mazda' 'Mecedes' 'Mercury' 'Nisson' 'Pontiac' 'Porche' 'Saab'
 'Saturn' 'Toyota' 'VW']


In [10]:

fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_make_vehicle_age["AgeOfVehicle"], 
                     values=df_make_vehicle_age.loc[df_make_vehicle_age["Make"] == "Accura"]["Counts"], 
                     name="Accura"),
              1, 1)
fig.add_trace(go.Pie(labels=df_make_vehicle_age["AgeOfVehicle"], 
                     values=df_make_vehicle_age.loc[df_make_vehicle_age["Make"] == "BMW"]["Counts"], 
                     name="BMW"),
              1, 2)
fig.add_trace(go.Pie(labels=df_make_vehicle_age["AgeOfVehicle"], 
                     values=df_make_vehicle_age.loc[df_make_vehicle_age["Make"] == "Chevrolet"]["Counts"], 
                     name="Chevrolet"),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Ages of vehicles involved in the accident by car brands",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Accura', x=0.12, y=0.5, font_size=10, showarrow=False),
                 dict(text='BMW', x=0.50, y=0.5, font_size=10, showarrow=False),
                dict(text='Chevrolet', x=0.89, y=0.5, font_size=10, showarrow=False)])
fig.show()

In [11]:

fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_make_vehicle_age["AgeOfVehicle"], 
                     values=df_make_vehicle_age.loc[df_make_vehicle_age["Make"] == "Ferrari"]["Counts"], 
                     name="Ferrari"),
              1, 1)
fig.add_trace(go.Pie(labels=df_make_vehicle_age["AgeOfVehicle"], 
                     values=df_make_vehicle_age.loc[df_make_vehicle_age["Make"] == "Ford"]["Counts"], 
                     name="Ford"),
              1, 2)
fig.add_trace(go.Pie(labels=df_make_vehicle_age["AgeOfVehicle"], 
                     values=df_make_vehicle_age.loc[df_make_vehicle_age["Make"] == "Toyota"]["Counts"], 
                     name="Toyota"),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Ages of vehicles involved in the accident by car brands",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Ferrari', x=0.12, y=0.5, font_size=10, showarrow=False),
                 dict(text='Ford', x=0.50, y=0.5, font_size=10, showarrow=False),
                dict(text='Toyota', x=0.88, y=0.5, font_size=10, showarrow=False)])
fig.show()

In [12]:
df_exp_vehicle_brand = df.groupby(["Make", "VehiclePrice"]).agg({"Make":"count"})
df_exp_vehicle_brand.columns = ["Counts"]
df_exp_vehicle_brand.reset_index(inplace=True)
print(df_exp_vehicle_brand.head(10))
print("------------------------------------------------------------")
print("Vehicle Prices: ")
print(df_exp_vehicle_brand["VehiclePrice"].unique())

        Make     VehiclePrice  Counts
0     Accura   20000 to 29000      90
1     Accura   30000 to 39000     202
2     Accura   40000 to 59000      18
3     Accura   60000 to 69000       2
4     Accura  more than 69000     160
5        BMW   20000 to 29000       8
6        BMW   30000 to 39000       5
7        BMW   40000 to 59000       1
8        BMW  more than 69000       1
9  Chevrolet   20000 to 29000     721
------------------------------------------------------------
Vehicle Prices: 
['20000 to 29000' '30000 to 39000' '40000 to 59000' '60000 to 69000'
 'more than 69000' 'less than 20000']


In [13]:

fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_exp_vehicle_brand["VehiclePrice"], 
                     values=df_exp_vehicle_brand.loc[df_make_vehicle_age["Make"] == "Accura"]["Counts"], 
                     name="Accura"),
              1, 1)
fig.add_trace(go.Pie(labels=df_exp_vehicle_brand["VehiclePrice"], 
                     values=df_exp_vehicle_brand.loc[df_make_vehicle_age["Make"] == "BMW"]["Counts"], 
                     name="BMW"),
              1, 2)
fig.add_trace(go.Pie(labels=df_exp_vehicle_brand["VehiclePrice"], 
                     values=df_exp_vehicle_brand.loc[df_make_vehicle_age["Make"] == "Chevrolet"]["Counts"], 
                     name="Chevrolet"),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Price of vehicles involved in the accident by car brands",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Accura', x=0.12, y=0.5, font_size=10, showarrow=False),
                 dict(text='BMW', x=0.50, y=0.5, font_size=10, showarrow=False),
                dict(text='Chevrolet', x=0.89, y=0.5, font_size=10, showarrow=False)])
fig.show()
# Price of vehicles involved in the accident by car brands  (consider with all years)

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_exp_vehicle_brand["VehiclePrice"], 
                     values=df_exp_vehicle_brand.loc[df_make_vehicle_age["Make"] == "Accura"]["Counts"], 
                     name="Ferrari"),
              1, 1)
fig.add_trace(go.Pie(labels=df_exp_vehicle_brand["VehiclePrice"], 
                     values=df_exp_vehicle_brand.loc[df_make_vehicle_age["Make"] == "BMW"]["Counts"], 
                     name="Ford"),
              1, 2)
fig.add_trace(go.Pie(labels=df_exp_vehicle_brand["VehiclePrice"], 
                     values=df_exp_vehicle_brand.loc[df_make_vehicle_age["Make"] == "Chevrolet"]["Counts"], 
                     name="Toyota"),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Price of vehicles involved in the accident by car brands",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Ferrari', x=0.12, y=0.5, font_size=10, showarrow=False),
                 dict(text='Ford', x=0.50, y=0.5, font_size=10, showarrow=False),
                dict(text='Toyota', x=0.88, y=0.5, font_size=10, showarrow=False)])
fig.show()
# Regions where accidents occurred by years
df_area_year = df.groupby(["AccidentArea", "Year"]).agg({"AccidentArea":"count"})
df_area_year.columns = ["Counts"]
df_area_year.reset_index(inplace=True)
print(df_area_year.head(10))


fig = px.bar(df_area_year, x="Year", y="Counts",
             color='AccidentArea', barmode='group',
             height=400,
            title = "Regions where accidents occurred by year")
fig.show()

  AccidentArea  Year  Counts
0        Rural  1994     642
1        Rural  1995     536
2        Rural  1996     420
3        Urban  1994    5500
4        Urban  1995    4659
5        Urban  1996    3663
