# Get started

Presents the basic statistic concepts to get started


In [50]:
import pandas as pd

In [51]:
dataset_measures_dict =  {
    'age': [15, 18, 25, 25, 40, 55, 58, 60, 80],
    'height': [160, 162, 165, 160, 172, 174, 174, 174, 176]
}

In [52]:
df_measures = pd.DataFrame.from_dict(dataset_measures_dict)

df_measures

Unnamed: 0,age,height
0,15,160
1,18,162
2,25,165
3,25,160
4,40,172
5,55,174
6,58,174
7,60,174
8,80,176


## Position Measurements

Go to position measurements notebook to see concepts.


In [53]:
# mean
df_measures["age"].mean()

41.77777777777778

In [54]:
# median
df_measures["age"].median()

40.0

In [55]:
# mode
df_measures["age"].mode()

0    25
Name: age, dtype: int64

## Dispersion Measurements

Go to dispersion measurements notebook to see concepts.


In [56]:
# variance
df_measures.age.var()

509.94444444444446

In [57]:
# standart deviation
df_measures.age.std()

22.58194952709895

In [58]:
# coefficient of variation
df_measures.age.std() / df_measures.age.mean() * 100

54.05253876167302

## Form Measurements

Go to form measurements notebook to see concepts.


In [59]:
# assimetry - age
df_measures.age.skew()

0.368108517895537

In [60]:
# assimetry - height
df_measures.height.skew()

-0.34412138466019687

In [61]:
# kurtosis - age
df_measures.age.kurtosis()

-1.1344461075421046

In [62]:
# kurtosis - height
df_measures.height.kurtosis()

-2.090571159409713

## Measure metrics


In [63]:
# age measure metrics
df_measures.age.describe()

count     9.000000
mean     41.777778
std      22.581950
min      15.000000
25%      25.000000
50%      40.000000
75%      58.000000
max      80.000000
Name: age, dtype: float64

In [64]:
# height measure metrics
df_measures.height.describe()

count      9.000000
mean     168.555556
std        6.691620
min      160.000000
25%      162.000000
50%      172.000000
75%      174.000000
max      176.000000
Name: height, dtype: float64

## Variables correlation

Go to correlation notebook to see concepts.


In [65]:
# pearson: implicit dataframe variables (2 variables)
df_measures.corr(method='pearson')

Unnamed: 0,age,height
age,1.0,0.931534
height,0.931534,1.0


In [66]:
# pearson: explicit dataframe variables
df_measures.age.corr(df_measures.height)

0.9315336676674598

In [67]:
# spearman: implicit dataframe variables (2 variables)
df_measures.corr(method='spearman')

Unnamed: 0,age,height
age,1.0,0.936034
height,0.936034,1.0
