In [1]:
# Download the data file
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv", 
              destfile = "ss06hid.csv")

# Read the CSV file
data <- read.csv("ss06hid.csv")

# Create logical vector for:
# 1. Households on >10 acres (AGS = 6)
# 2. Sold >$10,000 agriculture products (AGS = 6)
agricultureLogical <- (data$ACR == 3 & data$AGS == 6)

# Show which rows meet the criteria
which(agricultureLogical)


In [4]:
library(jpeg)

# Read the JPEG image
img <- readJPEG("getdata_jeff.jpg", native=TRUE)

# Calculate the 30th and 80th quantiles
quantile(img, probs = c(0.3, 0.8))


In [5]:
# Download GDP data
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FGDP.csv",
              destfile = "GDP.csv")

# Download educational data  
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FEDSTATS_Country.csv",
              destfile = "EDSTATS_Country.csv")

# Read the CSV files
gdp_data <- read.csv("GDP.csv", skip=4, nrows=190) # Skip header rows
edu_data <- read.csv("EDSTATS_Country.csv")

# Clean GDP data - select relevant columns and rename
gdp_clean <- gdp_data[,c(1,2,4,5)]
names(gdp_clean) <- c("CountryCode", "Rank", "Country", "GDP")
gdp_clean$GDP <- as.numeric(gsub(",","", gdp_clean$GDP))

# Merge datasets by country code
merged_data <- merge(gdp_clean, edu_data, by="CountryCode")

# Count matching IDs
print(paste("Number of matching IDs:", nrow(merged_data)))

# Sort by GDP rank in descending order
merged_data <- merged_data[order(-merged_data$Rank),]

# Print 13th country
print(paste("13th country:", merged_data$Country[13]))


[1] "Number of matching IDs: 189"
[1] "13th country: St. Kitts and Nevis"


In [6]:
# Calculate average GDP ranking by income group
income_groups <- c("High income: OECD", "High income: nonOECD")
avg_rankings <- aggregate(Rank ~ Income.Group, 
                        data=merged_data[merged_data$Income.Group %in% income_groups,], 
                        FUN=mean)

# Display results
print(avg_rankings)


          Income.Group     Rank
1 High income: nonOECD 91.91304
2    High income: OECD 32.96667


In [7]:
# Create quantile groups for GDP ranking (1-5, with 1 being highest GDP)
merged_data$GDPQuantile <- cut(merged_data$Rank, 
                              breaks=quantile(merged_data$Rank, probs=seq(0, 1, 0.2)), 
                              labels=1:5,
                              include.lowest=TRUE)

# Create contingency table of Income Group vs GDP Quantile
gdp_income_table <- table(merged_data$Income.Group, merged_data$GDPQuantile)
print("Distribution of countries by income group and GDP quantile:")
print(gdp_income_table)

# Find number of Lower middle income countries in highest GDP quantile (group 1)
lower_middle_high_gdp <- sum(merged_data$Income.Group == "Lower middle income" & 
                            merged_data$GDPQuantile == 1)
print(paste("Number of Lower middle income countries in highest GDP quantile:", 
            lower_middle_high_gdp))


[1] "Distribution of countries by income group and GDP quantile:"
                      
                        1  2  3  4  5
  High income: nonOECD  4  5  8  4  2
  High income: OECD    18 10  1  1  0
  Low income            0  1  9 16 11
  Lower middle income   5 13 11  9 16
  Upper middle income  11  9  8  8  9
[1] "Number of Lower middle income countries in highest GDP quantile: 5"
