Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 121 lines (90 sloc) 3.777 kB
1294def @emhart First commit
authored
1 #' Search google trends (http://trends.google.com)
2 #' using a python API.
3 #'
4 #'
5 #' @param keywords A vector of search strings, each term will be a seperate search. Required
6 #' @param date A vector of dates coded a string in the form YYYY-MM, with the first element being the starting date and the second element the end date. If you want to return the entire range leave blank. If you want from a starting date to the last available point, leave the second element in vector as "all", or the first as "all" to search from the beginning until the specified end date
7 #' @return a data frame with weekly search output and search volmue within the specified date range
8 #' @import rJython rJava stringr
9 #' @author Edmund Hart \email{edmund.m.hart@@gmail.com}
10 #' @export
87538c4 @sckott roxygen fixes
sckott authored
11 #' @examples \dontrun{
82f5e91 @emhart Set to automatically find python source
authored
12 #' my_search <- rGtrends("Bieber")
1294def @emhart First commit
authored
13 #' plot(my_search[,2],my_search[,1],type='l')
14 #' }
15 #'
16
d11cdfe @emhart Finished documentation, added some extra error handling
authored
17 rGtrends <- function(keywords, date=c("all","all"),src_path = NA){
1294def @emhart First commit
authored
18 #require(rJava)
19 #require(rJython)
20 #require(stringr)
21 ## Exception handling
22 if(!is.character(keywords)) stop("Keywords must be strings")
d11cdfe @emhart Finished documentation, added some extra error handling
authored
23 if(length(keywords) > 5) stop("Only 5 keywords can be used in one search")
24
1294def @emhart First commit
authored
25 if(length(date)!=2) stop("Date must be a vector of length 2, see documentation")
26
82f5e91 @emhart Set to automatically find python source
authored
27 ####Find the installed path of the package for the python if not specified ###
28 if(is.na(src_path)){
29 libs_path <- installed.packages()
30 libs_path <- libs_path[grep("rGtrends",libs_path)[1],2]
31 src_path <- paste(libs_path,"/rGtrends/src",sep="")
32 }
33
1294def @emhart First commit
authored
34
35
36 pg_path <- paste("'",src_path,"/pyGTrends.py'",sep="")
37 pyg_src <- paste("pg = imp.load_source('pyGTrends',",pg_path,")",sep="")
38
39 pgp_path <- paste("'",src_path,"/pyGparse.py'",sep="")
40 pygp_src <- paste("gp = imp.load_source('pyGparse',",pgp_path,")",sep="")
41
42 rJython <- rJython()
43 rJython$exec("import imp")
44
45 rJython$exec(pyg_src)
46 rJython$exec(pygp_src)
47
48 #' Name and password for rGtrends. People may not want to send
49 #' their e-mail credentials unencrypted so I made a dummy email account
50 #' but I may have to change this at somepoint. Please don't send e-mails
51 #' from it :)
52
53 rJython$exec("con = pg.pyGTrends('rgtrendsapi','ropensci')")
54
55
56
57 terms <- paste("(",paste(add_char(keywords),collapse=","),")",sep="")
58 call <- paste("con.download_report(",terms,")",sep="")
59 rJython$exec(call)
60 #### Leaving this code in for when the API is fixed
61 #if (language){
62 # data_call <- paste("data=gp.pyGparse(con.csv(section=",add_char("Language"),"))")
63 #}
64
65 #if (city){
66 # data_call <- paste("data=gp.pyGparse(con.csv(section=",add_char("Cities"),"))")
67 #}
68
69 #if (region){
70 # data_call <- paste("data=gp.pyGparse(con.csv(section=",add_char("Region"),"))")
71 #}
72
73 #if(sum(c(region,city,language)) == 0){
74 data_call <- paste("data=gp.pyGparse(con.csv())")
75 #}
76
77 ###Extract data into a dataframe
78
79
80 rJython$exec(paste(data_call,"[0]",sep=""))
81 dat <- rJython$get("data")
82 dat <- .jstrVal(dat)
83 dat <- strip_char(dat)
84 c_names <- vector()
85 output <- matrix(0,ncol=length(keywords),nrow=length(dat)-1)
86
87 my_dates <- format_py_date(dat[2:length(dat)])
88
89
90 for(i in 1:(length(keywords))){
91 rJython$exec(paste(data_call,"[",i,"]",sep=""))
92 dat <- rJython$get("data")
93 dat <- .jstrVal(dat)
94 dat <- strip_char(dat)
95 c_names[i] <- dat[1]
96 output[,i] <- as.numeric(dat[2:length(dat)])
97
98 }
99 output <- data.frame(output)
5fba29c @emhart Made date the first column
authored
100 output <- cbind(my_dates,output)
101 colnames(output) <- c("Date",gsub(" ","",keywords))
1294def @emhart First commit
authored
102
103 if(date[1]=="all" && date[2]=="all"){
104 return(output)
105 }
106
107 if(date[1]=="all" && date[2] != "all"){
108 return(output[1:max(grep(date[2],my_dates)),])
109 }
110
111 if(date[1] !="all" && date[2] == "all"){
112 return(output[min(grep(date[1],my_dates)):dim(output)[1],])
113 }
114
115 if(date[1]!="all" && date[2] != "all"){
116 return(output[min(grep(date[1],my_dates)):max(grep(date[2],my_dates)),])
117 }
118
119 }
120
Something went wrong with that request. Please try again.