# ___Variable Types___
-----------------

In [16]:
import numpy as np
import pandas as pd
np.random.seed(2023 - 4 - 8)
pd.set_option("display.max_columns", 100)

In [17]:
# Let's make a synthetic table

In [21]:
data = pd.DataFrame({"ID": np.random.randint(62161, 62202, size = 5), "BMI": [23.4, 20.9, 18.99, 26.04, 25.94],
              "Race": np.random.randint(1, 5, size = 5), "Age": np.random.randint(14, 40, size = 5)})

data["Adult"] = data.Age.apply(lambda age: 1 if age >= 18 else 0)

data

Unnamed: 0,ID,BMI,Race,Age,Adult
0,62187,23.4,1,34,1
1,62196,20.9,2,16,0
2,62180,18.99,3,39,1
3,62181,26.04,1,14,0
4,62163,25.94,2,16,0


In [20]:
# Race ->
    # 1 - Mexican American
    # 2 - Other Hispanic
    # 3 - Non-hispanic white
    # 4 - Non-hispanic black
    # 5 - Other
    
# Adult ->
    # 1 - If age >= 18
    # 0 - otherwise

## ___Can we reasonably compute the averages for BMI and Race?___

In [23]:
# For BMI, yes. It would be perfectly okay to calculate the average of BMI

print(f"Average BMI is {data.BMI.mean():.5f}")

Average BMI is 23.05400


In [26]:
# However, for Race it won't make much sense to compute the average of race, since it is a numerically encoded categorical variable.
# Average of Race will give no meaning.

print(f"Average of Race? is {data.Race.mean():.5f}")

Average of Race? is 1.80000


In [27]:
# What does this 1.800 suggest? ____________

In [28]:
# However, there are some analytical methods that do take the averages of categorical variables like Race!.

## ___Quantitative variables___
-----------------

In [32]:
# Numerical, measurable quantities, on which arithmetic operations can be sensibly made.
# e.g. the BMI scores.

# There are two types of quantitative variables ->
        # Continuous
        # Discrete

In [33]:
# Continuous quantitative variables -> Could be any value within a range/interval -> too many possibilities
        # e.g. floating point numbers (real numbers) between 0 and 10.
        # BMI scores
        # Height of a person
        # Weight of a person

In [31]:
# Discrete quantitative variables -> Countable values, finite possibilities.
        # e.g. Number of children in a household
        # Number of wheels in an automobile
        # A person's age in days

## ___Qualitative or categorical variables___
-----------------

In [34]:
# Classifies items into groups
# Categorical variables have two types ->
        # Quantitative ordinal
        # Quantitative nominal

In [35]:
# Quantitative ordinal -> A categorical variable with some sort of ordering / ranking.
        # e.g. Age groups as 0 - 10, 11 - 20, 21 - 30
        # Highest educational status as Middle school, High school diploma, Associate's degree, Bachelor's degree, Master's degree, Doctorate.
        # Seniority in a university first years, second years, third years & fourth years

In [36]:
# Quantitative nominal -> Has no ranking associated with it, composed of mere labels.
        # Race of a group of people
        # Birth place of a group of people
        # Brands of the car a set of people own
        # Marital status
        # Sex

In [39]:
# What type of variable is age of a 1,000 people ?
        # Quantitative discrete or Quantitative continuous
        # Because some might say their age is 14 and a half years ?

In [40]:
# What type of variable is a person's marital status ?
        # Can be unmarried, married, divorced
        # Just three (finite number) options
        # No ranking is possible among the options
        # Nominal categorical variable