# Load necessary libraries


In [None]:
library(ggplot2)
library(dplyr)

# Load datasets


In [None]:
lung_cancer_df <- read.csv("lung_cancer_prediction_dataset.csv")
air_pollution_df <- read.csv("global_air_pollution_dataset.csv")


# 1. Boxplot of Lung Cancer Deaths Distribution

In [None]:
ggplot(lung_cancer_df, aes(y = Annual_Lung_Cancer_Deaths)) +
  geom_boxplot(fill = "lightcoral", color = "black") +
  labs(title = "Boxplot of Lung Cancer Deaths Distribution", 
       y = "Annual Lung Cancer Deaths") +
  theme_minimal()


# 2. Histogram of PM2.5 AQI Values

In [None]:
ggplot(air_pollution_df, aes(x = PM2.5_AQI_Value)) +
  geom_histogram(bins = 30, fill = "darkblue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of PM2.5 AQI Values", 
       x = "PM2.5 AQI Value", y = "Frequency") +
  theme_minimal()


# 3. Density Plot of the Lung Cancer Mortality Rate
# Convert to numeric and remove zero values to avoid KDE issues

In [None]:
lung_cancer_df$Mortality_Rate <- as.numeric(lung_cancer_df$Mortality_Rate)
lung_cancer_filtered <- lung_cancer_df %>% filter(Mortality_Rate > 0)

ggplot(lung_cancer_filtered, aes(x = Mortality_Rate)) +
  geom_density(fill = "purple", alpha = 0.6) +
  labs(title = "Density Plot of Lung Cancer Mortality Rate", 
       x = "Mortality Rate", y = "Density") +
  theme_minimal()

# 4. Scatter Plot of Normal and Logistic Distributions

In [None]:
set.seed(42)  # For reproducibility
normal_values <- rnorm(100, mean = 0, sd = 1)
logistic_values <- rlogis(100, location = 0, scale = 1)
scatter_data <- data.frame(Normal = normal_values, Logistic = logistic_values)

ggplot(scatter_data, aes(x = Normal, y = Logistic)) +
  geom_point(color = "brown", alpha = 0.7) +
  labs(title = "Scatter Plot of Normal vs. Logistic Distribution", 
       x = "Normal Distribution Values", y = "Logistic Distribution Values") +
  theme_solarized(light = FALSE)