In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import statistics as st
import scipy.stats as stats
import math
import itertools
import arviz as az
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=Warning)



In [2]:
%config InlineBackend.figure_format = 'retina'

RANDOM_SEED = 42
rng = np.random.default_rng(RANDOM_SEED)

az.style.use("arviz-grayscale")
plt.rcParams['figure.dpi'] = 300

In [3]:
df = pd.read_csv("../data/video_games.csv")
df.head()

Unnamed: 0,outcome,condition
0,7.145137,Violent game
1,7.48493,Violent game
2,7.29747,Violent game
3,7.760477,Violent game
4,6.497964,Violent game


In [4]:
df.shape

(800, 2)

In [5]:
summary = df.groupby('condition')['outcome'].agg(
    mean='mean',
    standard_deviation='std',
    standard_error=lambda x: x.std() / len(x) ** 0.5
)

print(summary)

                  mean  standard_deviation  standard_error
condition                                                 
Non-violent game  6.65                0.53          0.0265
Violent game      6.81                0.51          0.0255


In [6]:
# Assuming df has at least 100 rows
df_100 = df.sample(n=100)

In [7]:
summary_100 = df_100.groupby('condition')['outcome'].agg(
    mean='mean',
    standard_deviation='std',
    standard_error=lambda x: x.std() / len(x) ** 0.5
)

print(summary_100)

                      mean  standard_deviation  standard_error
condition                                                     
Non-violent game  6.705947            0.495681        0.071545
Violent game      6.827266            0.451882        0.062665


In [8]:
def cohen_d(mean1, mean2, sd1, sd2, n1, n2):
    pooled_sd = math.sqrt(((n1 - 1) * sd1**2 + (n2 - 1) * sd2**2) / (n1 + n2 - 2))
    return (mean2 - mean1) / pooled_sd

In [9]:
mean1 = 6.663322
mean2 = 6.861076
sd1 = 0.566330
sd2 = 0.519873
n1 = n2 = 100

result = cohen_d(mean1, mean2, sd1, sd2, n1, n2)
print("Cohen's d:", result)

Cohen's d: 0.3637871999925845


In [10]:
mean1 = 6.65
mean2 = 6.81
sd1 = 0.53
sd2 = 0.51
n1 = n2 = 800

result = cohen_d(mean1, mean2, sd1, sd2, n1, n2)
print("Cohen's d:", result)

Cohen's d: 0.3076354277014899


In [11]:
%load_ext watermark
%watermark -n -u -v -iv 

Last updated: Sun Sep 17 2023

Python implementation: CPython
Python version       : 3.11.5
IPython version      : 8.15.0

arviz     : 0.16.0
seaborn   : 0.12.2
pandas    : 2.0.3
numpy     : 1.25.2
matplotlib: 3.7.2
scipy     : 1.10.1

