In [None]:
# Data Engineering Tasks Notebook

# Load necessary packages
library(dplyr)
library(ggplot2)
library(RPostgreSQL)
library(data.table)


In [None]:

# Connect to the PostgreSQL database
db <- dbConnect(
  PostgreSQL(),
  dbname = "my_db_name",
  host = "localhost",
  port = 5432,
  user = "my_user_name",
  password = "my_password"
)


In [None]:

# Extract data from a database table
data <- dbGetQuery(db, "SELECT * FROM my_table")


In [None]:

# Perform data cleaning and transformation
cleaned_data <- data %>%
  filter(!is.na(variable_1)) %>%
  mutate(variable_2 = ifelse(variable_2 < 0, 0, variable_2))


In [None]:

# Aggregate data
aggregated_data <- cleaned_data %>%
  group_by(variable_3) %>%
  summarize(avg_variable_4 = mean(variable_4),
            max_variable_5 = max(variable_5))


In [None]:

# Export data to a file
write.csv(aggregated_data, file = "aggregated_data.csv")


In [None]:

# Load data from a file
new_data <- fread("new_data.csv")


In [None]:

# Merge data
merged_data <- full_join(data, new_data, by = "variable_3")


In [None]:

# Load data to a database table
dbWriteTable(db, "merged_data", merged_data, overwrite = TRUE)


In [None]:

# Disconnect from the database
dbDisconnect(db)


In [None]:

# Visualize data
ggplot(data, aes(x = variable_1, y = variable_2)) +
  geom_point() +
  labs(x = "Variable 1", y = "Variable 2")
