# Getting started with R

## Brief history of R
- Programmers are great at naming
    - Sequel or alternative to “S” language
- Interactive (interpreted)
- Focus on statistical packages and data handling
- Released in 1995
- CRAN created, Comprehensive R Archive Network
    - 19,000+ packages


## R Syntax

In [None]:
# Basic math
print("4 * 9")
4 * 9

# Logical operators
print ("2 < 3")
2 < 3

# Data types
class(5)  # Numeric
class("Hello")  # Character
class(TRUE)  # Logical

# assignments
print("Create a vector")
v1 <- c(1,2,3,4,5)
v1
print("Second vector")
v2 = c('a','b','c','d')
v2

# Lists
lst <- list(name="John", age=30, scores=c(90, 85, 88))
lst

# Matrices
mat <- matrix(1:9, nrow=3, byrow=TRUE)
mat

## Working with Data in R

In [None]:
# Creating a data frame
df <- data.frame(ID = 1:5, Score = c(90, 85, NA, 78, 92))
head(df)

# Accessing elements
df$Score
df[1, 2]  # Row 1, Column 2

# Handling missing values
mean(df$Score, na.rm=TRUE)
df_no_NA <- na.omit(df)  # Remove rows with NA

# Importing a CSV file
bodyfat <- read.csv("body_fat.csv")
head(bodyfat)

## Built in data sets in R

In [None]:
# tell R that we want to use the built in sample data
data()

In [None]:
# load a sample data set
data(pressure)

# display the first few rows of the data set
head(pressure)

# get some info on the data set
help(pressure)

In [None]:
# reference a built in dataset without loading all of it
women$weight

## Basic statistical functions

In [None]:
# Descriptive statistics
data <- c(10, 15, 20, 25, 30, 35, 40)

mean(data)  # Mean
median(data)  # Median
var(data)  # Variance
sd(data)  # Standard deviation

## R packages

In [None]:
# install packages, only needs to be once per environment
# install.packages("ggplot2")
# install.packages("dplyr")

# tell R you are going to use a package, so load into memory
library(ggplot2)
library(dplyr)

# Using dplyr for data manipulation
df <- data.frame(Name=c("Alice", "Bob", "Charlie"), Age=c(25, 30, 28), Score=c(90, 85, 88))

# Filter rows
df_filtered <- df %>% filter(Age > 26)

# Mutate (add new column)
df <- df %>% mutate(Passed = Score > 85)

# Summarize
df_summary <- df %>% summarize(Average_Score = mean(Score))

# ggplot2 example
ggplot(df, aes(x=Name, y=Score, fill=Name)) +
    geom_bar(stat="identity") +
    theme_minimal()
