In [None]:
# R

# run r script
# Rscript my_file.R

# start R
# sudo -i R

# install package (from CRAN)
# install.packages("stringr")

# list packages installed
# library()

In [26]:
# variable assignment

# assignment
var1 <- "hello"
var1 <<- "hello"
"hello" -> var1
"hello" ->> var1
var1 = "hello"

# print
print(var1)
cat(var1, " there!", sep="")

[1] "hello"
hello there!

In [1]:
# string interpolation

# requires package
if (!require('stringr')) install.packages('stringr')
library('stringr')

# interpolation
name <- 'Itachi Uchiha'
str_interp("Name: ${name}")

Loading required package: stringr


In [2]:
# vector objects (types)
    # logical
    # numeric
    # integer
    # complex
    # character
    # raw

# atomic vectors -- linear vectors of a single primitive type.

v <- TRUE 
print(class(v))

v <- 23.5
print(class(v))

v <- 2L
print(class(v))

v <- 2+5i
print(class(v))

v <- "TRUE"
print(class(v))

v <- charToRaw("Hello")
print(class(v))

[1] "logical"
[1] "numeric"
[1] "integer"
[1] "complex"
[1] "character"
[1] "raw"


In [21]:
# list variables

print(ls()) # print all (non-hidden) variables
print(ls(pattern = "var")) # filter
print(ls(all.name = TRUE)) # include hidden variables

# remove variables
var.2 <- 'Hello!!!'
rm(var.2)

# remove all variables
# rm(list = ls())

[1] "a"            "A"            "apple"        "apple_colors" "BMI"         
[6] "factor_apple" "list1"        "v"            "var1"        
[1] "var1"
 [1] ".pbd_env"     "a"            "A"            "apple"        "apple_colors"
 [6] "BMI"          "factor_apple" "list1"        "v"            "var1"        


In [23]:
# vector arithmetic operators
    # if vectors are of unequal length -- short vector's elements are recycled to create matching length

v <- c( 2, 5.5, 6)
t <- c(8, 3, 4)

print(v+t) # element-wise addition
print(v-t) # element-wise subtraction
print(v*t) # element-wise multiplication
print(v/t) # element-wise division
print(v%%t) # element-wise remainder (like modulus)
print(v%/%t) # element-wise floor division
print(v^t) # element-wise exponent

[1] 10.0  8.5 10.0
[1] -6.0  2.5  2.0
[1] 16.0 16.5 24.0
[1] 0.250000 1.833333 1.500000
[1] 2.0 2.5 2.0
[1] 0 1 1
[1]  256.000  166.375 1296.000


In [24]:
# relational operators 

v <- c(2, 5.5, 6, 9)
t <- c(8, 2.5, 14, 9)

# element-wise comparison
print(v > t)
print(v < t)
print(v == t)
print(v <= t)
print(v >= t)
print(v != t)

[1] FALSE  TRUE FALSE FALSE
[1]  TRUE FALSE  TRUE FALSE
[1] FALSE FALSE FALSE  TRUE
[1]  TRUE FALSE  TRUE  TRUE
[1] FALSE  TRUE FALSE  TRUE
[1]  TRUE  TRUE  TRUE FALSE


In [25]:
# logical operators

v <- c(3,1,TRUE,2+3i)
t <- c(4,1,FALSE,2+3i)

# element-wise comparison
print(v & t)
print(v | t)
print(!v)

# consider only first elements
print(v && t)
print(v || t)


[1]  TRUE  TRUE FALSE  TRUE
[1] TRUE TRUE TRUE TRUE
[1] FALSE FALSE FALSE FALSE
[1] TRUE
[1] TRUE


In [27]:
# other operators

# range/sequence
print(2:8)

# membership testing 
v1 <- 8
v2 <- 12
t <- 1:10
print(v1 %in% t) 
print(v2 %in% t) 

# A @ A_t
M = matrix( c(2,6,5,1,10,4), nrow = 2,ncol = 3,byrow = TRUE)
t = M %*% t(M)
print(t)


[1] 2 3 4 5 6 7 8
[1] TRUE
[1] FALSE
     [,1] [,2]
[1,]   65   82
[2,]   82  117


In [42]:
# control flow

# if
x <- 30L
if(is.integer(x)) {
   print("X is an Integer")
}

# if, else if, else
v1 <- c(1:10)
num1 <- 12
num2 <- 8.3
if(num1 %in% v1) {
   print("num1 is in v1 ")
} else if (num2 %in% v1) {
   print("num2 is in v1")
} else {
   print("no match found")
}

# switch

# repeat loop
cnt <- 0
repeat {
   print(cnt)
   cnt <- cnt+1
   if(cnt > 3) {
      break
   }
}

# while loop
cnt <- 0
while (cnt < 4) {
   print(cnt)
   cnt = cnt + 1
}

# for loop
for (i in 0:3) {
    print(i)
}


# break, next
for (i in 0:8) {
    if(i == 3) next
    print(i)
    if(i == 5) break
}

[1] "X is an Integer"
[1] "no match found"
[1] 0
[1] 1
[1] 2
[1] 3
[1] 0
[1] 1
[1] 2
[1] 3
[1] 0
[1] 1
[1] 2
[1] 3
[1] 0
[1] 1
[1] 2
[1] 4
[1] 5


In [4]:
# functions

# requires package
if (!require('stringr')) install.packages('stringr')
library('stringr')

# no param
func1 <- function() {
    print('hello there!')
}
# with arg
func2 <- function(name) {
    print(str_interp("Hello, ${name}!"))
}
# variable number of args
func3 <- function(...) {
    print(sum(...))
}
# default arguments
func4 <- function(a=3, b=4) {
    print(a + b)
}

# execute
func1()
func2('Kakashi')
func3(c(1,2,3))
func4()

[1] "hello there!"
[1] "Hello, Kakashi!"
[1] 6
[1] 7


In [43]:
# useful built-in functions
print(seq(32,44))
print(mean(32:44))
print(sum(32:44))

 [1] 32 33 34 35 36 37 38 39 40 41 42 43 44
[1] 38
[1] 494


In [5]:
# sequence
print(seq(1,10))
print(seq(1.1, 10.1))
print(seq(1,2, by= .1))

 [1]  1  2  3  4  5  6  7  8  9 10
 [1]  1.1  2.1  3.1  4.1  5.1  6.1  7.1  8.1  9.1 10.1
 [1] 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0


In [6]:
# c
print(c('a','b','c'))
print(c(1, 2, 3, 'a', 'b', 'c')) # coerced to characters

[1] "a" "b" "c"
[1] "1" "2" "3" "a" "b" "c"


In [None]:
# vectors
    # supports single type

# Create a vector.
apple <- c('red','green',"yellow")
print(apple)

# Get the class of the vector.
print(class(apple))

In [32]:
# vectors
    # all elements have same type
    # R uses 1-based indexing!!!

# vector
t <- c("Sun","Mon","Tue","Wed","Thurs","Fri","Sat")

# position
u <- t[c(2,3,6)]

# logical (boolean indexing)
v <- t[c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)]

# negative indexing
x <- t[c(-2,-5)]

# binary indexing
y <- t[c(0,0,0,0,0,0,1)]


In [None]:
# vector element sorting
v <- c(3,8,4,5,0,11, -9, 304)
print(sort(v))
print(sort(v, decreasing=TRUE))

In [None]:
# lists
    # supports multiple types
list1 = list(c('red', 2, TRUE, 2+5i))
print(list1)

In [31]:
# lists
    # supports multiple different types

# create list
l = list('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')
# name list elements
names(l) = c('Sun', 'Mon', 'Tues', 'Wed', 'Thur', 'Fri', 'Sat')
# access by index
element = l[1]
# access by name
element = l['Sun']
# add element
    # l[8] = 'MadeUpDay'
l = append(l,'MadeUpDay') 
# update element
l[8] = 'IShouldNotExistDay'
# remove last element
l[8] = NULL

# merge list
merged_list = c(l,l)
# list to vector
v = unlist(l)




In [None]:
# matrices
    # supports multiple types (methods may not)
A = matrix( c(1, 2, 3, 4, 5, 6, 7, 8, 9), nrow = 3, ncol = 3, byrow=TRUE)
print(A)

In [64]:
# matrices

# dimension names
indices = c("row1", "row2", "row3", "row4")
cols = c("col1", "col2", "col3")

A = matrix(
    c(1:12), 
    nrow = 4, 
    ncol = 3, 
    byrow = TRUE,
    dimnames = list(indices, cols)
)

# indexing
element = A[1,3] # first row, third column
row = A[2,] # second row
col = A[,3]

# math (element-wise)
sum = A + A
dif = A - A
product = A * A
quotient = A / A

# matrix multiplication (A @ At)
matmul = A %*% t(A)


In [102]:
# arrays
    # supports multiple dimensions (matrix is limited to 2)
a <- array(c(1:27),dim = c(3,3,3))
print(a)

, , 1

     [,1] [,2] [,3]
[1,]    1    4    7
[2,]    2    5    8
[3,]    3    6    9

, , 2

     [,1] [,2] [,3]
[1,]   10   13   16
[2,]   11   14   17
[3,]   12   15   18

, , 3

     [,1] [,2] [,3]
[1,]   19   22   25
[2,]   20   23   26
[3,]   21   24   27



In [72]:
# array

# create content
v = c(1:9)
indices = c("row1", "row2", "row3")
cols = c("col1", "col2", "col3")
pages = c('page1', 'page2')
# create array
ndarray = array(
    v,
    dim = c(3,3,2),
    dimnames = list(indices, cols, pages)
)
print(ndarray)
# indexing
element = ndarray[1,1,1]
row = ndarray[1,,]
col = ndarray[,1,]
page = ndarray[,,1]


, , page1

     col1 col2 col3
row1    1    4    7
row2    2    5    8
row3    3    6    9

, , page2

     col1 col2 col3
row1    1    4    7
row2    2    5    8
row3    3    6    9



In [103]:
# factors

# Create a vector.
apple_colors <- c('green','green','yellow','red','red','red','green')
# Create a factor object.
factor_apple <- factor(apple_colors)
# Print the factor.
print(factor_apple) # factor
print(nlevels(factor_apple)) # number of distinct values

[1] green  green  yellow red    red    red    green 
Levels: green red yellow
[1] 3


In [79]:
# factors (kinda like sets?)

# vector
v <- c("East","West","East","North","North","East","West","West","West","East","North")
# create factor
f <- factor(v)
# print factor
print(f)
# print number of levels (number of unique values)
print(nlevels(f))
# specify level order
f <- factor(v, levels = c('East', 'West', 'North'))

# generate
    # n -- number of levels, k -- replications, labels -- vector of labels
f <- gl(n = 3, k = 4, labels = c('red', 'green', 'blue'))

 [1] East  West  East  North North East  West  West  West  East  North
Levels: East North West
[1] 3


In [None]:
# csv

# read csv
df1 = read.csv('data.csv')
# get info about df
print(is.data.frame(df1))
print(ncol(df1))
print(nrow(df1))
# write csv
write.csv(df1, 'data.csv')

In [None]:
# xlsx

# import
if (!require('xlsx')) install.packages('xlsx')
library('xlsx')

# read
df1 <- read.xlsx("data.xlsx", sheetIndex = 1)
# write
write.xlsx(df1, "data.xlsx", sheetName="Sheet1", col.names=TRUE, row.names=TRUE)


In [None]:
# json

# NOT TESTED!!!

# import
if (!require('rjson')) install.packages('rjson')
library('rjson')

if (!require('jsonlite')) install.packages('jsonlite')
library('jsonlite')

# read
data <- fromJSON(file = "input.json")
df1 = as.data.frame(data)
# write (columns)
json_data = toJSON(df1, pretty = TRUE)
write(json_data, 'data.json')
# write (records)
json_data <- toJSON(unname(split(res1, 1:nrow(df1))))
write(json_data, 'data.json')


In [None]:
# mysql
    # https://www.rdocumentation.org/packages/RMySQL/versions/0.10.13

# import
if (!require('RMySQL')) install.packages('RMySQL')
library('RMySQL')

# read
conn = dbConnect(MySQL(), user = 'root', password = '', dbname = 'sakila', host = 'localhost')
alltables = dbListTables(conn)
res <- dbSendQuery(conn, "SELECT * FROM table_name")
df1 = dbFetch(res)

# write

# disconnect
dbDisconnect(conn)

In [None]:
# sqlite
    # https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html

# import
if (!require('RSQLite')) install.packages('RSQLite')
library('RSQLite')

# read
conn = dbConnect(drv="SQLite", dbname="data.sqlite")
alltables = dbListTables(conn) # list tables
df1 = dbGetQuery(conn,'select * from table_name' )

# write
conn <- dbConnect(RSQLite::SQLite(), "data.sqlite")
dbWriteTable(conn, "table_name", df1)


In [None]:
# postgres

# import
if (!require('RPostgreSQL')) install.packages('RPostgreSQL')
library('RPostgreSQL')

# read
pg = dbDriver("PostgreSQL")
conn = dbConnect(pg, user="postgres", password="postgres", host="localhost", port=5432, dbname="postgres")
res = dbGetQuery(conn, "select * from table_name")
df1 <- as.data.frame(res)
# write


# disconnect
dbDisconnect(con)

In [104]:
# dataframes

# create dataframe
df1 <- data.frame(
    gender = c("Male", "Male","Female"), 
    height = c(152, 171.5, 165), 
    weight = c(81,93, 78),
    Age = c(42,38,26)
)

# create dataframe using cbind (on vectors)
df2 <- cbind(
    gender = c("Male", "Male","Female"), 
    height = c(152, 171.5, 165), 
    weight = c(81,93, 78),
    Age = c(42,38,26)
)
print(df1 == df2)

     gender height weight  Age
[1,]   TRUE   TRUE   TRUE TRUE
[2,]   TRUE   TRUE   TRUE TRUE
[3,]   TRUE   TRUE   TRUE TRUE


In [101]:
# dataframes
df1 = data.frame(
    id = c(1:5),
    name = c('Kakashi', 'Hiruzen', 'Konohamaru', 'Iruka', 'Yamato'),
    age = c(50L, 70L, 30L, 50L, 50L),
    jonin = c(TRUE, TRUE, TRUE, FALSE, TRUE),
    stringsAsFactors = FALSE 
)
summary = summary(df1) # summary of df
# selection
row_selection = df1[1:2,] # first 2 rows
col_selection = df1[,1:2] # first 2 cols
both_selection = df1[c(1,3), c(2,4)] # rows 1 & 3, cols 2 & 4
# new column
df1$awesome = TRUE # new column, same value
df1$alive = c(TRUE, FALSE, TRUE, TRUE, TRUE) # new column from vector of values
# new row
itachi = data.frame(
    id = 6L,
    name = 'Itachi',
    age = '50',
    jonin = TRUE,
    awesome = TRUE,
    alive = FALSE
)
df1 = rbind(df1, itachi)
# display
df1

id,name,age,jonin,awesome,alive
<int>,<chr>,<chr>,<lgl>,<lgl>,<lgl>
1,Kakashi,50,True,True,True
2,Hiruzen,70,True,True,False
3,Konohamaru,30,True,True,True
4,Iruka,50,False,True,True
5,Yamato,50,True,True,True
6,Itachi,50,True,True,False


In [None]:
# working with dataframes
col1 = df1$col1 # get column
max1 = max(df1$col1) # get max value
record1 = subset(df1, col1 == max(col1)) # subset of df
subset1 = subset(df1, col1 >= 5) # subset of df
subset1 = subset(df1, col3 == 'Male' & col4 > 40) # multiple conditions
subset1 = subset(df1, as.Date(start_date) > as.Date("2014-01-01")) # compare dates

In [110]:
# os

# cwd
    # setwd()
print(getwd())



ERROR: Error in setwd("2*"): cannot change working directory


In [2]:
# pie chart

# Create data for the graph.
v <- c(21, 62, 10, 53)
labels <- c("London", "New York", "Singapore", "Mumbai")
# Give the chart file a name.
png(file = "city.jpg")
# Plot the chart.
pie(v,labels)
# Save the file.
dev.off()

In [9]:
# bar chart

# Create the data for the chart
H <- c(7,12,28,3,41)
M <- c("Mar","Apr","May","Jun","Jul")
# Give the chart file a name
png(file = "barchart_months_revenue.png")
# Plot the bar chart 
barplot(H,names.arg=M,xlab="Month",ylab="Revenue",col="lavender",
main="Revenue chart",border="black")
# Save the file
dev.off()

In [10]:
# box plot

# Give the chart file a name.
png(file = "boxplot.png")
# Plot the chart.
boxplot(mpg ~ cyl, data = mtcars, xlab = "Number of Cylinders",
   ylab = "Miles Per Gallon", main = "Mileage Data")
# Save the file.
dev.off()

In [14]:
# histograms

# Create data for the graph.
v <- c(9,13,21,8,36,22,12,41,31,33,19)
# Give the chart file a name.
png(file = "histogram_lim_breaks.png")
# Create the histogram.
hist(v,xlab = "Weight",col = "lavender",border = "black", xlim = c(0,50), ylim = c(0,5),
   breaks = 5)
# Save the file.
dev.off()


In [16]:
# line graph

# Create the data for the chart.
v <- c(7,12,28,3,41)
t <- c(14,7,6,19,3)
# Give the chart file a name.
png(file = "line_chart_2_lines.jpg")
# Plot the bar chart.
plot(v,type = "o",col = "purple", xlab = "Month", ylab = "Rain fall", 
   main = "Rain fall chart")
lines(t, type = "o", col = "blue")
# Save the file.
dev.off()

In [17]:
# scatter plot

# Get the input values.
input <- mtcars[,c('wt','mpg')]
# Give the chart file a name.
png(file = "scatterplot.png")
# Plot the chart for cars with weight between 2.5 to 5 and mileage between 15 and 30.
plot(x = input$wt,y = input$mpg,
   xlab = "Weight",
   ylab = "Milage",
   xlim = c(2.5,5),
   ylim = c(15,30),		 
   main = "Weight vs Milage"
)
	 
# Save the file.
dev.off()

# -------------------------

# pairplot (scatterplot matrices)

# Give the chart file a name.
png(file = "scatterplot_matrices.png")
# Plot the matrices between 4 variables giving 12 plots.
# One variable with 3 others and total 4 variables.
pairs(~wt+mpg+disp+cyl,data = mtcars,
   main = "Scatterplot Matrix")

# Save the file.
dev.off()

In [27]:
# stats
v = c(1,2,3,4,5,6,7,8,8)
v_mean = mean(v)
v_mean = mean(v, trim = .2, na.rm = TRUE) # trim and remove na
v_median = median(v, na.rm = FALSE)

