In [None]:
# Read CSV file
data_csv <- read.csv("student.csv")

# View first rows
head(data_csv)


In [None]:
#If your file has a different separator (like ;), you can use
data_csv <- read.csv("student.csv", sep = ";")


In [None]:
# Install package (only once)
install.packages("readxl")

# Load the library
library(readxl)

# Read Excel file (first sheet by default)
data_excel <- read_excel("student.xlsx")

# Read a specific sheet
data_excel2 <- read_excel("student.xlsx", sheet = "Sheet2")

head(data_excel)


In [None]:
# Read TXT file (tab separated)
data_txt <- read.table("student.txt", header = TRUE, sep = "\t")

head(data_txt)

# header = TRUE means the first row has column names.
# sep = "\t" is for tab-separated text.
# If values are space-separated.


In [None]:
data_txt <- read.table("student.txt", header = TRUE, sep = " ")


In [None]:
# Direct CSV from a link
data_csv <- read.csv("https://www.kaggle.com/datasets/zeeshier/student-information-dataset/stdent.csv")

head(data_csv)


In [None]:
library(readxl)

# Download the Excel file
url <- "https://www.kaggle.com/datasets/zeeshier/student-information-dataset/stdent.csv"
download.file(url, destfile = "temp.xlsx", mode = "wb")

# Read it
data_excel <- read_excel("student.xlsx")
head(data_excel)


In [None]:
# Read tab-delimited TXT from URL
data_txt <- read.table("https://www.kaggle.com/datasets/zeeshier/student-information-dataset/stdent.csv",
                       sep = ",", header = TRUE)

head(data_txt)


In [None]:
install.packages("openxlsx")
library(openxlsx)

# Read Excel directly from URL
url <- "hhttps://www.kaggle.com/datasets/zeeshier/student-information-dataset/stdent.csv"
data_excel <- openxlsx::read.xlsx(url, sheet = 1)

head(data_excel)


In [None]:
install.packages("curl")
library(curl)
library(readxl)

# Read directly from URL connection
url <- "https://www.kaggle.com/datasets/zeeshier/student-information-dataset/stdent.csv"
data_excel <- read_excel(curl::curl_download(url, tempfile(fileext = ".xlsx")))

head(data_excel)

# openxlsx::read.xlsx() → can read Excel directly from URL.
# curl + readxl → downloads in memory, then opens it (no file left behind).

In [None]:
# Save dataframe as CSV
write.csv(mtcars, "student.csv", row.names = FALSE)


In [None]:
install.packages("writexl")
library(writexl)

# Write dataframe to Excel
write_xlsx(mtcars, "car.xlsx")


In [None]:
install.packages("openxlsx")
library(openxlsx)

# Create workbook and save
write.xlsx(mtcars, "car.xlsx", sheetName = "Cars", overwrite = TRUE)


In [None]:
# Tab-delimited TXT
write.table(mtcars, "car.txt", sep = "\t", row.names = FALSE, quote = FALSE)

# Space-delimited TXT
write.table(mtcars, "car.txt", sep = " ", row.names = FALSE, quote = FALSE)


In [None]:
install.packages("rdrop2")
library(rdrop2)

# Authenticate Dropbox
token <- drop_auth()

# Upload file
drop_upload("car.csv", path = "Apps/my_folder/")


In [None]:
# Subsetting
#Look at first rows
head(student)

# Select first row
student[1, ]

# Select first column
student[, 1]

# Select row 1, column 2
student[1, 2]

# Select multiple rows and specific columns
student[1:5, c("Name", "Score")]

# Using $ to access a column
student$Score

# Subsetting with condition

# Students with score > 80
high_score <- student[student$Score > 80, ]

# Students in Class 10
class10 <- student[student$Class == 10, ]

# Students with Score > 80 AND Class = 10
top_class10 <- student[student$Score > 80 & student$Class == 10, ]


In [None]:
#Missing Values with student

# Check missing values in dataset
is.na(student)

# Count total missing values
sum(is.na(student))

# Check missing values column-wise
colSums(is.na(student))

# Remove rows with missing values
student_clean <- na.omit(student)

# Replace missing Score with 0
student$Score[is.na(student$Score)] <- 0

# Replace missing Score with mean Score
student$Score[is.na(student$Score)] <- mean(student$Score, na.rm = TRUE)

In [None]:
#Sorting with order()

# Sort by Score (ascending)
student_sorted <- student[order(student$Score), ]

# Sort by Score (descending)
student_sorted_desc <- student[order(-student$Score), ]

# Sort by multiple columns (Class first, then Score)
student_sorted_multi <- student[order(student$Class, -student$Score), ]


In [None]:
#Filtering with subset()

# Students with Score > 80
high_score <- subset(student, Score > 80)

# Students in Class 10
class10 <- subset(student, Class == 10)

# Students in Class 10 with Score > 80
top_class10 <- subset(student, Class == 10 & Score > 80)

# Select only specific columns after filtering
top_class10_small <- subset(student, Class == 10 & Score > 80,
                            select = c(Name, Score))


In [None]:
# Applying Functions

# Sometimes we want to apply a function to each row/column/list element.
# apply() (for matrices/data frames)

# Average score per student (row-wise if multiple subjects)
row_means <- apply(student[, c("Math", "English", "Science")], 1, mean)

# Average score per subject (column-wise)
col_means <- apply(student[, c("Math", "English", "Science")], 2, mean)


In [None]:
#lapply() (list apply → always returns list)
# Apply mean to each subject, return list
lapply(student[, c("Math", "English", "Science")], mean)

# Convert list back to vector
unlist(lapply(student[, c("Math", "English", "Science")], mean))


In [None]:
#sapply() (simplified apply → returns vector)
# Get class-wise mean of Score
tapply(student$Score, student$Class, mean)

# Quick summary statistics of Score
sapply(student$Score, function(x) c(Mean = mean(x), SD = sd(x)))



order() → sort dataset

subset() → filter rows (with conditions)

apply() → apply functions to rows/columns

sapply() → apply functions, return simplified result

lapply() → apply functions, return list

In [None]:
#Factor Variables (Categorical Data)
#In R, factors are used to represent categorical data (like Class, Gender, Grade).

# Convert Class or Gender to factor
student$Class <- as.factor(student$Class)
student$Gender <- as.factor(student$Gender)

# Check factor levels
levels(student$Class)
levels(student$Gender)

# Summary of factor
summary(student$Class)
summary(student$Gender)


In [None]:
#Using factors in analysis
# Count students by Class
table(student$Class)

# Count students by Gender
table(student$Gender)

# Relevel factors (change reference level)
student$Class <- relevel(student$Class, ref = "10")  # Class 10 as baseline


In [None]:
#Strings (stringr package basics)

#The stringr package makes string manipulation easier.

# Install and load
install.packages("stringr")
library(stringr)

# Example dataset: student$Name
head(student$Name)

# 1. Detect pattern
str_detect(student$Name, "Ali")   # TRUE if "Ali" appears

# 2. Extract pattern
str_extract(student$Name, "Ali")

# 3. Replace text
student$Name <- str_replace(student$Name, "Ali", "Alif")

# 4. Split string
str_split(student$Name, " ")      # Split by space

# 5. String length
str_length(student$Name)

# 6. Convert case
str_to_upper(student$Name)
str_to_lower(student$Name)
str_to_title(student$Name)