# Driving styles and car type

In [1]:
import os
os.chdir('../data')

In [2]:
import scipy
import numpy as np
import pandas as pd
import pingouin as pg
import matplotlib.pyplot as plt
from statsmodels.stats.anova import AnovaRM

In [105]:
driving_data = pd.read_csv("input/results-survey528393.csv")

In [106]:
print("Driving data", driving_data.shape)

Driving data (43, 334)


In [107]:
driving_data["condition"].value_counts()

2    15
1    15
3    13
Name: condition, dtype: int64

## Preprocessing

Get results from the driving style scale that each participant responded.

**TODO**: maybe group all agg and all def. Then group all fam and sports.

In [108]:
# group column names of aggressive and defensive conditions.
aggressive = driving_data.columns[driving_data.columns.str.fullmatch("agg[^2]+\[SQ00[1-2]\]")]
aggressive2 = driving_data.columns[driving_data.columns.str.fullmatch("agg.+2\[SQ00[1-2]\]")]

defensive = driving_data.columns[driving_data.columns.str.fullmatch("def[^2]+\[SQ00[1-2]\]")]
defensive2 = driving_data.columns[driving_data.columns.str.fullmatch("def.+2\[SQ00[1-2]\]")]

In [109]:
aggressive

Index(['aggpassenger[SQ001]', 'aggpassenger[SQ002]', 'aggdriver[SQ001]',
       'aggdriver[SQ002]', 'aggrural[SQ001]', 'aggrural[SQ002]',
       'aggurban[SQ001]', 'aggurban[SQ002]', 'aggfamily[SQ001]',
       'aggfamily[SQ002]', 'aggsports[SQ001]', 'aggsports[SQ002]'],
      dtype='object')

In [110]:
driving_data.loc[:, "aggmean"] = driving_data.loc[:, aggressive].mean(axis=1)
driving_data.loc[:, "aggmean2"] = driving_data.loc[:, aggressive2].mean(axis=1)
driving_data.loc[:, "defmean"] = driving_data.loc[:, defensive].mean(axis=1)
driving_data.loc[:, "defmean2"] = driving_data.loc[:, defensive2].mean(axis=1)

driving_data.loc[:, "aggressive"] = driving_data.loc[:, ["aggmean", "aggmean2"]].mean(axis=1)    # it is called "assertive" in Yusof's paper.
driving_data.loc[:, "defensive"] = driving_data.loc[:, ["defmean", "defmean2"]].mean(axis=1)

In [73]:
# split car types
agg_family = driving_data.columns[driving_data.columns.str.fullmatch("aggfam.+\[SQ00[1-2]\]")]
def_family = driving_data.columns[driving_data.columns.str.fullmatch("deffam.+\[SQ00[1-2]\]")]

agg_sports = driving_data.columns[driving_data.columns.str.fullmatch("aggsport.+\[SQ00[1-2]\]")]
def_sports = driving_data.columns[driving_data.columns.str.fullmatch("defsport.+\[SQ00[1-2]\]")]

In [74]:
agg_sports

Index(['aggsports[SQ001]', 'aggsports[SQ002]', 'aggsports2[SQ001]',
       'aggsports2[SQ002]'],
      dtype='object')

In [114]:
# keep only condition = 3, which explores car type
car_type_data = driving_data[driving_data["condition"] == 3]

car_type_data = pd.DataFrame(
    {
        "agg_family": car_type_data.loc[:, agg_family].mean(axis=1),
        "def_family": car_type_data.loc[:, def_family].mean(axis=1),
        "agg_sports": car_type_data.loc[:, agg_sports].mean(axis=1),
        "def_sports": car_type_data.loc[:, def_sports].mean(axis=1),
    }
)
car_type_data = car_type_data.reset_index().rename(columns={"index": "id"})
car_type_data.to_csv("working/car_type_data.csv", index=False)
car_type_data.head()

Unnamed: 0,id,agg_family,def_family,agg_sports,def_sports
0,1,2.25,2.25,2.75,3.25
1,24,3.5,2.0,2.75,2.5
2,25,3.25,3.75,4.0,3.75
3,27,1.0,3.25,2.25,3.75
4,30,2.5,1.75,2.75,2.25


In [83]:
agg_family_data = pd.DataFrame(
    {
        "driving_style": "aggressive",
        "car_type": "family",
        "rating": car_type_data.loc[:, agg_family].mean(axis=1),
    }
)

def_family_data = pd.DataFrame(
    {
        "driving_style": "defensive",
        "car_type": "family",
        "rating": car_type_data.loc[:, def_family].mean(axis=1),
    }
)

agg_sports_data = pd.DataFrame(
    {
        "driving_style": "aggressive",
        "car_type": "sports",
        "rating": car_type_data.loc[:, agg_sports].mean(axis=1),
    }
)

def_sports_data = pd.DataFrame(
    {
        "driving_style": "defensive",
        "car_type": "sports",
        "rating": car_type_data.loc[:, def_sports].mean(axis=1),
    }
)

In [115]:
car_type_data_unpivot = pd.concat(
    [agg_family_data, def_family_data, agg_sports_data, def_sports_data]
)
car_type_data_unpivot = car_type_data_unpivot.reset_index().rename(columns={"index": "id"})
car_type_data_unpivot.sample(5)

Unnamed: 0,id,driving_style,car_type,rating
50,41,defensive,sports,2.0
14,24,defensive,family,2.0
20,36,defensive,family,2.75
9,39,aggressive,family,2.0
21,38,defensive,family,3.5


In [116]:
car_type_data_unpivot.to_csv("working/cartype_data_pivot.csv", index=False)

In [None]:
# make car type data usable for JASP
columns = ["aggressive", "defensive"]
jasp = pd.melt(car_type_data[["id"] + columns], id_vars="id", var_name="driving_style", value_name="rating")
jasp.to_csv("working/cartype_jasp.csv", index=False)

In [121]:
driving_data.to_pickle("working/driving_data.pkl")
car_type_data.to_pickle("working/car_type_data.pkl")
car_type_data_unpivot.to_pickle("working/car_type_data_unpivot.pkl")

## Descriptive statistics

### Demographics

In [35]:
descriptive_columns = [
    "drivingxpcat",     # driving in the last 12 months
    "educationyears",
    "age",
    "gender",
    "drivingxp",        # years of experience driving cars
    "educationlevel",
]

In [139]:
car_type_descriptive_data = driving_data.loc[driving_data["condition"] == 3, descriptive_columns]
car_type_descriptive_data.shape

In [40]:
print("Descriptive data of car type condition", car_type_descriptive_data.shape, "\n")
for nominal_column in ["drivingxpcat", "gender", "educationlevel"]:
    print(car_type_descriptive_data[nominal_column].value_counts(normalize=True), "\n")

Descriptive data of car type condition (13, 6) 

Less than once a month.    0.384615
Once a week.               0.307692
Not at all.                0.153846
Almost daily.              0.076923
Once a month.              0.076923
Name: drivingxpcat, dtype: float64 

Female    0.615385
Male      0.384615
Name: gender, dtype: float64 

Bachelor degree                         0.692308
Masters/Diploma degree                  0.230769
Vocational training/Berufsausbildung    0.076923
Name: educationlevel, dtype: float64 



In [41]:
car_type_descriptive_data.describe()

Unnamed: 0,educationyears,age,drivingxp
count,13.0,12.0,13.0
mean,16.923077,28.833333,9.0
std,2.361551,9.962049,10.824355
min,12.0,21.0,1.0
25%,16.0,24.0,2.0
50%,18.0,26.0,5.0
75%,18.0,28.5,10.0
max,20.0,59.0,41.0


In [42]:
car_type_descriptive_data["age"].value_counts().sort_index()

21.0    1
23.0    1
24.0    2
26.0    3
27.0    1
28.0    1
30.0    1
32.0    1
59.0    1
Name: age, dtype: int64

In [146]:
family_mean = car_type_data_unpivot.loc[car_type_data_unpivot["car_type"] == "family", "rating"].mean()
sports_mean = car_type_data_unpivot.loc[car_type_data_unpivot["car_type"] == "sports", "rating"].mean()

print("family:", family_mean, "sports:", sports_mean)

family: 2.576923076923077 sports: 2.855769230769231


## Results

### t-test

Comparing ratings of aggressive/defensive driving behaviours.

First comparing means for the whole data.

Then comparing means for the car type condition.

We used independent samples t-test for both comparisons.

In [43]:
driving_data = pd.read_pickle("working/driving_data.pkl")
car_type_data = pd.read_pickle("working/car_type_data.pkl")

In [52]:
# perform t test on driving data
pg.ttest(driving_data["aggressive"], driving_data["defensive"], alternative="greater")
# pg.ttest(driving_data["aggressive"], driving_data["defensive"])

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,0.091332,84,greater,0.463723,"[-0.25, inf]",0.019697,0.452,0.060062


In [48]:
driving_data["aggressive"].mean() - driving_data["defensive"].mean()

0.014534883720930036

In [46]:
# perform t test on car_type_data
pg.ttest(car_type_data["aggressive"], car_type_data["defensive"], alternative="greater")

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.559925,24,greater,0.70964,"[-0.58, inf]",0.219621,0.815,0.014287


### Repeated measures ANOVA

In [118]:
car_type_data_unpivot.head()

Unnamed: 0,id,driving_style,car_type,rating
0,1,aggressive,family,2.25
1,24,aggressive,family,3.5
2,25,aggressive,family,3.25
3,27,aggressive,family,1.0
4,30,aggressive,family,2.5


In [147]:
print(
    AnovaRM(
        data=car_type_data_unpivot, depvar="rating", subject="id", within=["driving_style", "car_type"]
    ).fit()
)

                       Anova
                       F Value Num DF  Den DF Pr > F
----------------------------------------------------
driving_style           0.3384 1.0000 12.0000 0.5715
car_type                3.6913 1.0000 12.0000 0.0788
driving_style:car_type  0.4558 1.0000 12.0000 0.5124



In [148]:
car_type_data

Unnamed: 0,id,agg_family,def_family,agg_sports,def_sports
0,1,2.25,2.25,2.75,3.25
1,24,3.5,2.0,2.75,2.5
2,25,3.25,3.75,4.0,3.75
3,27,1.0,3.25,2.25,3.75
4,30,2.5,1.75,2.75,2.25
5,31,1.25,1.5,2.0,3.0
6,32,3.5,3.75,2.5,2.75
7,36,3.25,2.75,3.0,3.0
8,38,1.75,3.5,2.25,3.0
9,39,2.0,2.0,2.75,2.0
