<a href="https://colab.research.google.com/github/Vin-Okumu/Climate-Change-Analysis/blob/main/Climate-Change-Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!apt-get install r-base
!R -e "install.packages('IRkernel'); IRkernel::installspec(user = TRUE)"

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
r-base is already the newest version (4.4.3-1.2204.0).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.

R version 4.4.3 (2025-02-28) -- "Trophy Case"
Copyright (C) 2025 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> install.packages('IRkernel'); IRkernel::installspec(user = TRUE)
Installing package into ‘/usr/local/lib

In [5]:
url <- "https://github.com/Vin-Okumu/Climate-Change-Indicator-Analysis/blob/main/Climate_Change.zip"
download.file(url, destfile = "Climate_Change.zip")
unzip("Climate_Change.zip")
df <- read.csv("data.csv")

“cannot open URL 'https://github.com/Vin-Okumu/Climate-Change-Indicator-Analysis/blob/main/Climate_Change.zip': HTTP status was '404 Not Found'”


ERROR: Error in download.file(url, destfile = "Climate_Change.zip"): cannot open URL 'https://github.com/Vin-Okumu/Climate-Change-Indicator-Analysis/blob/main/Climate_Change.zip'


In [None]:
anyNA(df) #Checks for missing values, helps ensure there are no missing values within data frame
head(df) #Returns a glimpse of the data frame

In [None]:
library(dplyr)
df = df%>%arrange(Year) #Rearranges the data in the data frame by year
head(df)
tail(df)

In [None]:
df = df %>%
  group_by(Year) %>%
  summarise(across(everything(),\(x)mean(x, na.rm = TRUE))) #calculates the mean readings for each year respectively
head(df)
tail(df)

In [None]:
# Check normality using Shapiro-Wilk test
for (col in c("Global_Avg_Temp", "CO2_Conc", "Rise_in_Sea_Lvl", "Arctic_Ice_Area")) {
  cat("\nShapiro-Wilk test for", col, "\n")
  print(shapiro.test(df[[col]]))
}

In [None]:
library(ggplot2)
# Convert data to long format
df_long <- pivot_longer(df, cols = c(Global_Avg_Temp, CO2_Conc, Rise_in_Sea_Lvl, Arctic_Ice_Area),
                        names_to = "Variable", values_to = "Value")

# Plot histograms using facets
ggplot(df_long, aes(x = Value)) +
  geom_histogram(aes(y = after_stat(density)), bins = 10, fill = "goldenrod4", alpha = 0.5) +
  geom_density(color = "grey0", linewidth = 0.5) +  # Density curve
  facet_wrap(~ Variable, scales = "free") +  # Separate histograms for each variable
  theme_minimal() +
  labs(title = "Histograms of Readings", x = "Value", y = "Density") +
  theme(
      strip.text = element_text(size = 14, face = "bold", color = "black"),
      panel.spacing = unit(1, "cm"),  #increase space between facets
      panel.grid.major = element_blank(), #removes major  gridlines
      panel.grid.minor = element_blank(),
      axis.title = element_text(color = "black", size = 18), #colors the axis titles bisque
      plot.title = element_text(color = "black", face = "bold", hjust = 0.5, size = 20)) #removes minor gridlines
      options(repr.plot.width = 12, repr.plot.height = 8) #sets panel width and height


In [None]:
ggplot(df,aes(x = CO2_Conc, y = Global_Avg_Temp)) +
  geom_point(pch = 15, color = "firebrick3", size = 1.5) +
  ggtitle("CO2 Concentration vs Global Average\nTemperature") +
  scale_x_continuous(expand = c(0.01,0.01)) +
  geom_smooth(method = "lm", se = FALSE, color = "darkblue", linewidth = 0.4) +
  theme_minimal(base_size = 15) +
  theme(
    plot.background = element_rect(fill = "grey85", color = NA), #greys out the background
    panel.background = element_rect(fill = "gray99", color = NA), #colors the panel white
    panel.grid.major = element_blank(), #removes major  gridlines
    panel.grid.minor = element_blank(), #removes minor gridlines
    axis.text = element_text(color = "black"), #colors the axis labels lavender
    axis.title = element_text(color = "gray25", face = "bold"), #colors the axis titles bisque
    plot.title = element_text(color = "grey10", face = "bold", hjust = 0.5, size = 20) #Colors the title gold, bold it and centers it across the canvas
  )

In [None]:
#Performing pearson correlation test
Corr_1 = cor.test(df$Global_Avg_Temp,df$CO2_Conc, method = "pearson")
print(Corr_1)

#Extracting P-value
p_value = Corr_1$p.value

#Hypothesis
if (p_value < 0.05) {
  print("There is a significant correlation between Global average temperature and CO2 concentration.")
} else {
  print("There is no significant correlation between Global average temperature and CO2 concentration.")
}

In [None]:
ggplot(df,aes(x = Global_Avg_Temp, y = Rise_in_Sea_Lvl)) +
  geom_point(pch = 8, color = "azure", size = 2) +
  ggtitle("Global Average Temperature vs Rise in Sea\nLevel") +
  scale_x_continuous(expand = c(0.001,0.001)) +
  geom_smooth(method = "lm", se = FALSE, color = "ivory", linewidth = 0.4) +
  theme_minimal(base_size = 15) +
  theme(
    plot.background = element_rect(fill = "grey5", color = NA), #greys out the background
    panel.background = element_rect(fill = "gray8", color = NA), #colors the panel white
    panel.grid.major = element_blank(), #removes major  gridlines
    panel.grid.minor = element_blank(), #removes minor gridlines
    axis.text = element_text(color = "lavender"), #colors the axis labels lavender
    axis.title = element_text(color = "bisque", face = "bold"), #colors the axis titles bisque
    plot.title = element_text(color = "ivory", face = "bold", hjust = 0.5, size = 20) #Colors the title gold, bold it and centers it across the canvas
  )

In [None]:
Corr_2 = cor.test(df$Global_Avg_Temp,df$Rise_in_Sea_Lvl, method = "pearson")
print(Corr_2)

#Extracting P-value
p_value = Corr_2$p.value

#Hypothesis

if (p_value < 0.05) {
  print("There is a significant correlation between Global average temperature and Rise in sea level.")
} else {
  print("There is no significant correlation between Global average temperature and Rise in sea level.")
}

In [None]:
ggplot(df,aes(x = Global_Avg_Temp, y = Arctic_Ice_Area)) +
  geom_point(pch = 8, color = "darkblue", size = 2) +
  ggtitle("Global Average Temperature vs Arctic Ice Area") +
  scale_x_continuous(expand = c(0.001,0.001)) +
  geom_smooth(method = "lm", se = FALSE, color = "firebrick4", linewidth = 0.4) +
  theme_minimal(base_size = 15) +
  theme(
    plot.background = element_rect(fill = "grey45", color = NA), #greys out the background
    panel.background = element_rect(fill = "grey95", color = NA), #colors the panel white
    panel.grid.major = element_blank(), #removes major  gridlines
    panel.grid.minor = element_blank(), #removes minor gridlines
    axis.text = element_text(color = "lavender"), #colors the axis labels lavender
    axis.title = element_text(color = "bisque", face = "bold"), #colors the axis titles bisque
    plot.title = element_text(color = "darkorange2", face = "bold", hjust = 0.5, size = 20) #Colors the title gold, bold it and centers it across the canvas
  )

In [None]:
Corr_3 = cor.test(df$Global_Avg_Temp,df$Arctic_Ice_Area, method = "pearson")
print(Corr_3)

#Extracting P-value
p_value = Corr_3$p.value

#Hypothesis

if (p_value < 0.05) {
  print("There is a significant correlation between Global average temperature and Arctic ice area.")
} else {
  print("There is no significant correlation between Global average temperature and Arctic ice area.")
}


In [None]:
ggplot(df,aes(x = CO2_Conc, y = Arctic_Ice_Area)) +
  geom_point(pch = 17, color = "black", size = 2) +
  ggtitle("CO2 Concentration vs Arctic Ice Area") +
  scale_x_continuous(expand = c(0.001,0.001)) +
  geom_smooth(method = "lm", se = FALSE, color = "firebrick4", linewidth = 0.4) +
  theme_minimal(base_size = 15) +
  theme(
    plot.background = element_rect(fill = "grey4", color = NA), #greys out the background
    panel.background = element_rect(fill = "aliceblue", color = NA), #colors the panel white
    panel.grid.major = element_blank(), #removes major  gridlines
    panel.grid.minor = element_blank(), #removes minor gridlines
    axis.text = element_text(color = "lavender"), #colors the axis labels lavender
    axis.title = element_text(color = "bisque", face = "bold"), #colors the axis titles bisque
    plot.title = element_text(color = "ivory", face = "bold", hjust = 0.5, size = 20) #Colors the title gold, bold it and centers it across the canvas
  )

In [None]:
Corr_4 = cor.test(df$CO2_Conc,df$Arctic_Ice_Area, method = "pearson")
print(Corr_4)

#Extracting P-value
p_value = Corr_4$p.value

#Hypothesis

if (p_value < 0.05) {
  print("There is a significant correlation between CO2 concentration and Arctic ice area.")
} else {
  print("There is no significant correlation between CO2 concentration and Arctic ice area.")
}

In [None]:
ggplot(df,aes(x = CO2_Conc, y = Rise_in_Sea_Lvl)) +
  geom_point(pch = 8, color = "darkgreen", size = 2) +
  ggtitle("CO2 Concentration vs Rise in Sea Level") +
  scale_x_continuous(expand = c(0,0)) +
  geom_smooth(method = "lm", se = FALSE, color = "firebrick4", linewidth = 0.4) +
  theme_minimal(base_size = 15) +
  theme(
    plot.background = element_rect(fill = "white", color = NA), #greys out the background
    panel.background = element_rect(fill = "grey98", color = NA), #colors the panel white
    panel.grid.major = element_blank(), #removes major  gridlines
    panel.grid.minor = element_blank(), #removes minor gridlines
    axis.text = element_text(color = "darkgreen"), #colors the axis labels lavender
    axis.title = element_text(color = "darkgreen", face = "bold"), #colors the axis titles bisque
    plot.title = element_text(color = "limegreen", face = "bold", hjust = 0.5, size = 20) #Colors the title gold, bold it and centers it across the canvas
  )

In [None]:
Corr_5 = cor.test(df$CO2_Conc,df$Rise_in_Sea_Lvl, method = "pearson")
print(Corr_5)

#Extracting P-value
p_value = Corr_5$p.value

#Hypothesis

if (p_value < 0.05) {
  print("There is a significant correlation between CO2 concentration and Rise in sea level.")
} else {
  print("There is no significant correlation between CO2 concentration and Rise in sea level.")
}

In [None]:
ggplot(df,aes(x = Arctic_Ice_Area, y = Rise_in_Sea_Lvl)) +
  geom_point(pch = 18, color = "grey1", size = 3.5) +
  ggtitle("Arctic Ice Area vs Rise in Sea Level") +
  scale_x_continuous(expand = c(0.001,0.001)) +
  geom_smooth(method = "lm", se = FALSE, color = "firebrick4", linewidth = 0.4) +
  theme_minimal(base_size = 15) +
  theme(
    plot.background = element_rect(fill = "grey0", color = NA), #greys out the background
    panel.background = element_rect(fill = "grey98", color = NA), #colors the panel white
    panel.grid.major = element_blank(), #removes major  gridlines
    panel.grid.minor = element_blank(), #removes minor gridlines
    axis.text = element_text(color = "lavender"), #colors the axis labels lavender
    axis.title = element_text(color = "bisque", face = "bold"), #colors the axis titles bisque
    plot.title = element_text(color = "cornsilk", face = "bold", hjust = 0.5, size = 20) #Colors the title gold, bold it and centers it across the canvas
  )

In [None]:
Corr_6 = cor.test(df$Arctic_Ice_Area,df$Rise_in_Sea_Lvl, method = "pearson")
print(Corr_6)

#Extracting P-value
p_value = Corr_6$p.value

#Hypothesis

if (p_value < 0.05) {
  print("There is a significant correlation between Arctic ice area and Rise in sea level.")
} else {
  print("There is no significant correlation between Arctic ice area and Rise in sea level.")
}