In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import statsmodels.stats.power as pwr
import math as ma

In [2]:
# importing and cleaning our data

osrs_data = pd.read_csv("player-stats.csv")
print(osrs_data.shape)

# nix empty rows

osrs_data = osrs_data.dropna(axis = 0, how = "all")

# replace "nan" values with minimum level
# hitpoints starts at level 10
osrs_data["hitpoints"] = osrs_data["hitpoints"].fillna(10)
# other skills start at 1
osrs_data = osrs_data.fillna(1)

# check shape again

print(osrs_data.shape)

# printing summary for reference

print(osrs_data.columns)

# explort to csv

osrs_data.to_csv("cleaned_osrs_data.csv")

(1999625, 26)
(1999625, 26)
Index(['username', 'rank', 'total', 'attack', 'defence', 'strength',
       'hitpoints', 'ranged', 'prayer', 'magic', 'cooking', 'woodcutting',
       'fletching', 'fishing', 'firemaking', 'crafting', 'smithing', 'mining',
       'herblore', 'agility', 'thieving', 'slayer', 'farming', 'runecraft',
       'hunter', 'construction'],
      dtype='object')


In [3]:
# if a user has a 99 in a "skilling" skill, assign them skiller

skiller = osrs_data[((osrs_data["cooking"] == 99) | 
            (osrs_data["woodcutting"] == 99) |
            (osrs_data["fletching"] == 99) |
            (osrs_data["fishing"] == 99) |            
            (osrs_data["firemaking"] == 99) |
            (osrs_data["crafting"] == 99) |
            (osrs_data["smithing"] == 99) |
            (osrs_data["mining"] == 99) |
            (osrs_data["herblore"] == 99) |
            (osrs_data["agility"] == 99) |
            (osrs_data["thieving"] == 99) |
            (osrs_data["farming"] == 99) |
            (osrs_data["runecraft"] == 99) |
            (osrs_data["hunter"] == 99) |
            (osrs_data["construction"] == 99)) &
            ~((osrs_data["attack"] == 99) | 
            (osrs_data["defence"] == 99) | 
            (osrs_data["strength"] == 99) | 
            (osrs_data["hitpoints"] == 99) | 
            (osrs_data["ranged"] == 99) | 
            (osrs_data["magic"] == 99))]
            
# if they have 99s in a combat skill, assign them pvmer

pvmer = osrs_data[(osrs_data["attack"] == 99) | 
        (osrs_data["defence"] == 99) | 
        (osrs_data["strength"] == 99) | 
        (osrs_data["hitpoints"] == 99) | 
        (osrs_data["ranged"] == 99) |
        (osrs_data["magic"] == 99)]

print("Number of skillers: ", len(skiller))
print("Number of pvmers: ", len(pvmer))

skiller.to_csv("cleaned_osrs_skillers.csv")
pvmer.to_csv("cleaned_osrs_PvMers.csv")

Number of skillers:  248703
Number of pvmers:  532595


In [4]:
# calculate Cohen's d
diff = pvmer.total.mean() - skiller.total.mean()
pooledstdev = ma.sqrt((pvmer.total.std()**2 + skiller.total.std()**2)/2)
cohend = diff / pooledstdev
print("Cohen's d: ")
print(cohend)

Cohen's d: 
0.9269706425705225


In [5]:
# code used from Luke Hayden's "Performing Experiments in Python" DataCamp course:
# https://app.datacamp.com/learn/courses/experimental-design-in-python

# perform the t test
t_result = stats.ttest_ind(pvmer.total, skiller.total)
print("T test results:")
print(t_result)

T test results:
Ttest_indResult(statistic=381.96033871700257, pvalue=0.0)
