Skip to content

Commit e284444

Browse files
committed
init
0 parents  commit e284444

File tree

1,090 files changed

+24416
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,090 files changed

+24416
-0
lines changed

.Rhistory

Lines changed: 512 additions & 0 deletions
Large diffs are not rendered by default.

local.Rdata

1.91 MB
Binary file not shown.

main.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
library(rvest)
2+
list = jsonlite::fromJSON("https://raw.githubusercontent.com/Lchiffon/cosx.org/aloglia/index.json")
3+
# URL = sub("cosx.org","cos.name",list$url)
4+
URL = list$url
5+
outputList = list()
6+
for(i in 1:length(URL)){
7+
print(URL[i])
8+
content = try(readLines(URL[i], encoding = 'UTF-8'))
9+
writeLines(content, "tmp.html", useBytes = T)
10+
tmpImgURL = read_html("tmp.html", encoding='UTF-8') %>% html_nodes("img") %>% html_attr('src') %>%
11+
grep("cos.name",.,value = T)
12+
outputList = append(outputList, list(str = tmpImgURL))
13+
}
14+
15+
downloadURL = do.call(c, outputList) %>% unique %>% grep("https://cos.name/",.,value = T)
16+
fileNames = downloadURL %>% gsub("https://cos.name/","./",.)
17+
dirNames = sapply(fileNames, function(x){
18+
out = strsplit(x,"/")[[1]]
19+
n = length(out)
20+
return(paste0(out[-n], collapse = "/"))
21+
})
22+
23+
# setwd("D:/git/uploads/")
24+
25+
26+
for(i in 1: length(downloadURL)){
27+
print(downloadURL[i])
28+
if(!dir.exists(dirNames[i]))
29+
dir.create(dirNames[i],recursive = T)
30+
download.file(downloadURL[i],fileNames[i],"curl")
31+
}
32+
#
33+
#
34+
# Warning messages:
35+
# 1: In dir.create(dirNames[i], recursive = T) :
36+
# cannot create dir 'https:\', reason 'Invalid argument'
37+
# 2: running command 'curl "https://web.archive.org/web/20120602095612/https://cos.name/wp-content/uploads/2011/04/%E6%A8%A1%E6%8B%9Flars.png" -o "https://web.archive.org/web/20120602095612/./wp-content/uploads/2011/04/%E6%A8%A1%E6%8B%9Flars.png"' had status 23
38+
# 3: In download.file(downloadURL[i], fileNames[i], "curl") :
39+
# 下载退出状态不是零
40+
# 4: In dir.create(dirNames[i], recursive = T) :
41+
# cannot create dir 'https:\', reason 'Invalid argument'
42+
# 5: running command 'curl "https://web.archive.org/web/20110521125938/https://cos.name/wp-content/uploads/2011/05/%E5%BB%BA%E6%A8%A1%E7%AD%94%E8%BE%A9-021-fixed.jpg" -o "https://web.archive.org/web/20110521125938/./wp-content/uploads/2011/05/%E5%BB%BA%E6%A8%A1%E7%AD%94%E8%BE%A9-021-fixed.jpg"' had status 23
43+
# 6: In download.file(downloadURL[i], fileNames[i], "curl") :
44+
# 下载退出状态不是零
45+
save(list, downloadURL, file = "local.Rdata")

tmp.html

Lines changed: 984 additions & 0 deletions
Large diffs are not rendered by default.

wp-content/uploads/2008/11/1.jpg

17.6 KB

wp-content/uploads/2008/11/2.jpg

30.6 KB
15 KB
5.4 KB

wp-content/uploads/2008/11/counts.png

13.3 KB
16.5 KB

0 commit comments

Comments
 (0)