# Driving styles and car type

In [1]:
import os
os.chdir('../data')

In [2]:
import scipy
import numpy as np
import pandas as pd
import pingouin as pg
import matplotlib.pyplot as plt
from statsmodels.stats.anova import AnovaRM

In [187]:
driving_data = pd.read_csv("input/results-survey528393.csv")

In [188]:
print("Driving data", driving_data.shape)

Driving data (48, 334)


In [189]:
driving_data["condition"].value_counts().sort_index()

1    15
2    15
3    18
Name: condition, dtype: int64

## Preprocessing

Get results from the driving style scale that each participant responded and filter out the participants from the conditions other than "car type".

In [196]:
car_type_data = driving_data[driving_data["condition"] == 3]

In [197]:
# split car types
agg_family = car_type_data.columns[car_type_data.columns.str.fullmatch("aggfam.+\[SQ00[1-2]\]")]
def_family = car_type_data.columns[car_type_data.columns.str.fullmatch("deffam.+\[SQ00[1-2]\]")]

agg_sports = car_type_data.columns[car_type_data.columns.str.fullmatch("aggsport.+\[SQ00[1-2]\]")]
def_sports = car_type_data.columns[car_type_data.columns.str.fullmatch("defsport.+\[SQ00[1-2]\]")]

In [198]:
agg_sports

Index(['aggsports2[SQ001]', 'aggsports2[SQ002]', 'aggsports[SQ001]',
       'aggsports[SQ002]'],
      dtype='object')

In [199]:
conditions_data = pd.DataFrame(
    {
        "agg_family": car_type_data.loc[:, agg_family].mean(axis=1),
        "def_family": car_type_data.loc[:, def_family].mean(axis=1),
        "agg_sports": car_type_data.loc[:, agg_sports].mean(axis=1),
        "def_sports": car_type_data.loc[:, def_sports].mean(axis=1),
    }
)
car_type_data = car_type_data.join(conditions_data)

In [200]:
car_type_data.columns

Index(['id', 'submitdate', 'lastpage', 'startlanguage', 'seed', 'startdate',
       'datestamp', 'consent', 'consent2', 'generatecode',
       ...
       'SensationArrayTime', 'TrustArrayTime', 'groupTime177',
       'participationhoursTime', 'groupTime188', 'FeedbackTime', 'agg_family',
       'def_family', 'agg_sports', 'def_sports'],
      dtype='object', length=338)

In [202]:
car_type_data.to_csv("working/car_type_data.csv", index=False)
car_type_data.shape

(18, 338)

In [203]:
# get data in a different format (unpivot)
agg_family_data = pd.DataFrame(
    {
        "driving_style": "aggressive",
        "car_type": "family",
        "rating": car_type_data.loc[:, agg_family].mean(axis=1),
    }
)

def_family_data = pd.DataFrame(
    {
        "driving_style": "defensive",
        "car_type": "family",
        "rating": car_type_data.loc[:, def_family].mean(axis=1),
    }
)

agg_sports_data = pd.DataFrame(
    {
        "driving_style": "aggressive",
        "car_type": "sports",
        "rating": car_type_data.loc[:, agg_sports].mean(axis=1),
    }
)

def_sports_data = pd.DataFrame(
    {
        "driving_style": "defensive",
        "car_type": "sports",
        "rating": car_type_data.loc[:, def_sports].mean(axis=1),
    }
)

In [204]:
car_type_data_unpivot = pd.concat(
    [agg_family_data, def_family_data, agg_sports_data, def_sports_data]
)
car_type_data_unpivot = car_type_data_unpivot.reset_index().rename(columns={"index": "id"})
car_type_data_unpivot

Unnamed: 0,id,driving_style,car_type,rating
0,1,aggressive,family,2.25
1,24,aggressive,family,3.50
2,25,aggressive,family,3.25
3,27,aggressive,family,1.00
4,30,aggressive,family,2.50
...,...,...,...,...
67,43,defensive,sports,3.25
68,44,defensive,sports,3.25
69,45,defensive,sports,1.75
70,46,defensive,sports,3.25


In [205]:
car_type_data.to_pickle("working/car_type_data.pkl")
car_type_data_unpivot.to_pickle("working/car_type_data_unpivot.pkl")

## Descriptive statistics

### Demographics

In [213]:
car_type_data = pd.read_pickle("working/car_type_data.pkl")
car_type_data_unpivot = pd.read_pickle("working/car_type_data_unpivot.pkl")

In [208]:
descriptive_columns = [
    "drivingxpcat",     # driving in the last 12 months
    "educationyears",
    "age",
    "gender",
    "drivingxp",        # years of experience driving cars
    "educationlevel",
]

In [209]:
descriptive_data = car_type_data[descriptive_columns]
descriptive_data.shape

(18, 6)

In [210]:
print("Descriptive data of car type condition", descriptive_data.shape, "\n")
for nominal_column in ["drivingxpcat", "gender", "educationlevel"]:
    print(descriptive_data[nominal_column].value_counts(normalize=True), "\n")

Descriptive data of car type condition (18, 6) 

Once a week.               0.388889
Less than once a month.    0.333333
Not at all.                0.111111
Almost daily.              0.111111
Once a month.              0.055556
Name: drivingxpcat, dtype: float64 

Female    0.555556
Male      0.444444
Name: gender, dtype: float64 

Bachelor degree                         0.611111
Masters/Diploma degree                  0.277778
Vocational training/Berufsausbildung    0.055556
Doctoral degree                         0.055556
Name: educationlevel, dtype: float64 



In [211]:
descriptive_data.describe()

Unnamed: 0,educationyears,age,drivingxp
count,18.0,17.0,18.0
mean,17.277778,29.058824,9.055556
std,2.13667,8.437051,9.257571
min,12.0,21.0,1.0
25%,17.0,26.0,4.0
50%,18.0,26.0,7.5
75%,18.0,31.0,11.5
max,20.0,59.0,41.0


In [212]:
descriptive_data["age"].value_counts().sort_index()

21.0    1
23.0    1
24.0    2
26.0    5
27.0    1
28.0    1
30.0    1
31.0    1
32.0    2
33.0    1
59.0    1
Name: age, dtype: int64

In [218]:
family = car_type_data_unpivot.loc[car_type_data_unpivot["car_type"] == "family", "rating"]
sports = car_type_data_unpivot.loc[car_type_data_unpivot["car_type"] == "sports", "rating"]

print("family:", family.mean(), "sports:", sports.mean())

family: 2.5416666666666665 sports: 2.826388888888889


In [219]:
aggressive = car_type_data_unpivot.loc[car_type_data_unpivot["driving_style"] == "aggressive", "rating"]
defensive = car_type_data_unpivot.loc[car_type_data_unpivot["driving_style"] == "defensive", "rating"]

print("aggressive:", aggressive.mean(), "defensive:", defensive.mean())

aggressive: 2.6319444444444446 defensive: 2.736111111111111


## Results

### t-test

Comparing ratings of aggressive/defensive driving behaviours.

First comparing means for the whole data.

Then comparing means for the car type condition.

We used independent samples t-test for both comparisons.

In [222]:
# perform t test on driving data
pg.ttest(aggressive, defensive, alternative="greater")

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.646816,70,greater,0.740067,"[-0.37, inf]",0.152456,0.582,0.011143


In [223]:
aggressive.mean() - defensive.mean()

-0.10416666666666652

In [226]:
# perform t test on car_type_data
pg.ttest(family, sports, alternative="greater")

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-1.803178,70,greater,0.962168,"[-0.55, inf]",0.425013,0.516,0.000301


### Repeated measures ANOVA

In [227]:
car_type_data_unpivot.head()

Unnamed: 0,id,driving_style,car_type,rating
0,1,aggressive,family,2.25
1,24,aggressive,family,3.5
2,25,aggressive,family,3.25
3,27,aggressive,family,1.0
4,30,aggressive,family,2.5


In [228]:
print(
    AnovaRM(
        data=car_type_data_unpivot, depvar="rating", subject="id", within=["driving_style", "car_type"]
    ).fit()
)

                       Anova
                       F Value Num DF  Den DF Pr > F
----------------------------------------------------
driving_style           0.3381 1.0000 17.0000 0.5686
car_type                6.2110 1.0000 17.0000 0.0233
driving_style:car_type  1.0363 1.0000 17.0000 0.3230

