In [None]:
#install.packages('ggplot2')
library("ggplot2")

#Set working directory as needed
setwd('D:/R/Model_Year_Tracking_Data')

#load data set
#data includes model year of vehicle, and average pollution for HC, NOX, CO for each model year during the given testing year
#Note that years before 2010 were not included because not all testing years required vehicles with model year in the late 2000s to test
#model years from pre-2000 also did not have substantial quantities of records for all testing years and were thus not included
#model years past 2011 were not included as vehicles past this cutoff were not required to be tested yet during the time of this analysis
Model_Year_Data = read.csv("./Model_Year_Pollution_Tracking.csv",header = TRUE)



In [None]:
#below 3 plots are created, one each for CO, HC, and NOX emissions
#plots are made so data is by plotted by vehicle year, and grouped/colored by the emissions testing year
ggplot(Model_Year_Data,aes(ï..Vehicle_Year, CO_Emissions_grams_per_mile,color=Testing_Year, group=Testing_Year)) + 
  geom_line() + 
  xlab('Vehicle Year') +
  ylab('CO Emissions (grams/mile)')+
  ggtitle('CO Emissions by Model Year and Testing Year')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))+
  scale_y_continuous(breaks=seq(0,50,10))


ggplot(Model_Year_Data,aes(ï..Vehicle_Year, HC_Emissions_grams_per_mile,color=Testing_Year, group=Testing_Year)) + 
  geom_line() + 
  xlab('Vehicle Year') +
  ylab('HC Emissions (grams/mile)')+
  ggtitle('HC Emissions by Model Year and Testing Year')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))+
  scale_y_continuous(breaks=seq(0,3.5,.5))

ggplot(Model_Year_Data,aes(ï..Vehicle_Year, NOX_Emissions_grams_per_mile,color=Testing_Year, group=Testing_Year)) + 
  geom_line() + 
  xlab('Vehicle Year') +
  ylab('NOX Emissions (grams/mile)')+
  ggtitle('NOX Emissions by Model Year and Testing Year')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))+
  scale_y_continuous(breaks=seq(0,3,.5))
  #scale_colour_manual(name = "", 
   #                   values = c("green3", "orange", "blue", "red", "grey", "purple", "yellow"))


###################################################################



In [None]:
#now we will create a table and charts to track how the average emission value for a model year fleet changed over time
#first we initialize empty vectors for each pollutant
CO_change_vector = vector()
HC_change_vector = vector()
NOX_change_vector = vector()

#the for loop below will run through each model year, and subtract the average pollutant value for all three pollutants of the 2016 test year
#by the values in the 2010 test year
#the end results will be saved in the three vectors above
for (x in 1982:2011) {
HC_Change_value = Model_Year_Data[Model_Year_Data$Testing_Year==2016 & Model_Year_Data$ï..Vehicle_Year==x,]$HC_Emissions_grams_per_mile - Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$HC_Emissions_grams_per_mile
NOX_Change_value = Model_Year_Data[Model_Year_Data$Testing_Year==2016 & Model_Year_Data$ï..Vehicle_Year==x,]$NOX_Emissions_grams_per_mile - Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$NOX_Emissions_grams_per_mile
CO_Change_value = Model_Year_Data[Model_Year_Data$Testing_Year==2016 & Model_Year_Data$ï..Vehicle_Year==x,]$CO_Emissions_grams_per_mile - Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$CO_Emissions_grams_per_mile

HC_change_vector = c(HC_change_vector,HC_Change_value)
NOX_change_vector = c(NOX_change_vector,NOX_Change_value)
CO_change_vector = c(CO_change_vector,CO_Change_value)

}

#change the vectors to dataframes, and create a years column to represent each model year
HC_change_vector=as.data.frame(HC_change_vector)
NOX_change_vector=as.data.frame(NOX_change_vector)
CO_change_vector=as.data.frame(CO_change_vector)
years = as.data.frame(c(1982:2011))



In [None]:
#create new table to show model year emissions change calculations from above
Pollution_Change_df = as.data.frame(cbind(years,CO_change_vector, HC_change_vector,NOX_change_vector))

#below the emissions change data calculated above is plotted, model year vs emissions change
#note because of scale, CO is graphed separately than NOX and HC.

ggplot() + 
  geom_line(data=Pollution_Change_df,aes(x=`c(1982:2011)`, y=NOX_change_vector), color='red')+
  geom_line(data=Pollution_Change_df,aes(x=`c(1982:2011)`, y=HC_change_vector),color='blue')+
  #geom_line(data=Pollution_Change_df,aes(x=Pollution_Change_df$`c(1982:2011)`, y=Pollution_Change_df$CO_change_vector),color='green')+

  xlab('Vehicle Year') +
  ylab('Change in Emissions, 2010 to 2016, g/mile')+
  ggtitle('NOX&HC Emissions Change by Model Year, 2010 to 2016')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))
  

ggplot() + 
  geom_line(data=Pollution_Change_df,aes(x=`c(1982:2011)`, y=CO_change_vector),color='green')+
  
  xlab('Vehicle Year') +
  ylab('Change in Emissions, 2010 to 2016, g/mile')+
  ggtitle('CO Emissions Change by Model Year, 2010 to 2016')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))

#looking at the charts, we see that the value for each graph started relatively high and then decreased


In [None]:
#################################################################################################

#the below calculations mirror those of the section above, but showing percent change rather than g/mile raw change
CO_percent_change_vector = vector()
HC_percent_change_vector = vector()
NOX_percent_change_vector = vector()

for (x in 1982:2011) {
  HC_pct_Change_value = 100*(Model_Year_Data[Model_Year_Data$Testing_Year==2016 & Model_Year_Data$ï..Vehicle_Year==x,]$HC_Emissions_grams_per_mile - Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$HC_Emissions_grams_per_mile)/Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$HC_Emissions_grams_per_mile
  NOX_pct_Change_value = 100*(Model_Year_Data[Model_Year_Data$Testing_Year==2016 & Model_Year_Data$ï..Vehicle_Year==x,]$NOX_Emissions_grams_per_mile - Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$NOX_Emissions_grams_per_mile)/Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$NOX_Emissions_grams_per_mile
  CO_pct_Change_value = 100*(Model_Year_Data[Model_Year_Data$Testing_Year==2016 & Model_Year_Data$ï..Vehicle_Year==x,]$CO_Emissions_grams_per_mile - Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$CO_Emissions_grams_per_mile)/Model_Year_Data[Model_Year_Data$Testing_Year==2010 & Model_Year_Data$ï..Vehicle_Year==x,]$CO_Emissions_grams_per_mile
  
  HC_percent_change_vector = c(HC_percent_change_vector,HC_pct_Change_value)
  NOX_percent_change_vector = c(NOX_percent_change_vector,NOX_pct_Change_value)
  CO_percent_change_vector = c(CO_percent_change_vector,CO_pct_Change_value)
  
}


Pollution_Percent_Change_df = cbind(years,CO_percent_change_vector, HC_percent_change_vector,NOX_percent_change_vector)

ggplot() + 
  geom_line(data=Pollution_Percent_Change_df,aes(x=`c(1982:2011)`, y=NOX_percent_change_vector), color='red')+
  geom_line(data=Pollution_Percent_Change_df,aes(x=`c(1982:2011)`, y=HC_percent_change_vector),color='blue')+
  xlab('Vehicle Year') +
  ylab('Percent Change in Emissions, 2010 to 2016')+
  ggtitle('NOX&HC Emissions Percent Change by Model Year, 2010 to 2016')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))
  

ggplot() + 
  geom_line(data=Pollution_Percent_Change_df,aes(x=`c(1982:2011)`, y=CO_percent_change_vector),color='green')+
  
  xlab('Vehicle Year') +
  ylab('Percent Change in Emissions, 2010 to 2016, g/mile')+
  ggtitle('CO Emissions Percent Change by Model Year, 2010 to 2016')+
  scale_x_continuous(breaks=c(1982,1990,2000,2010))

#looking at the charts, we see a highly fluctuating line
#this is likely because newer model year vehicles emitted far less than older vehicles
#thus minor changes/degradation to the pollution controls of newer vehicles would still lead to high % change values
#because of this sensitivity, using the charts from section above are likely to be more useful for presentation