Skip to content

Latest commit

 

History

History
1475 lines (1132 loc) · 40.8 KB

history.md

File metadata and controls

1475 lines (1132 loc) · 40.8 KB

每次修改时的变更记录。

20170721

  • 修复Search的Showcase图标显示Markdown文件配制

20161227

  • 修改服务端端口号为9101,减少与本机其他应用程序冲突。同时修改测试代码执行端口号

20161205

  • 修复搜索页面中测试,每个action一定要包括一个return。且必须要有内容

20161201

  • 修复专题数据获取方,使用async模块

  • 删除sleep模块的引用

  • 实现部分searchPage BDD测试

20161130

  • 完成首页BDD测试
Feature: HomePage feature
  As a user
  I open homePage
  So that I can see infoData, trendData and articleData.

  Scenario: Show infoData
    Given I am on "HomePage"
    Then I should see "2016" in "infoData"

  Scenario: Show trendData
    Given I am on "HomePage"
    Then I should see "2016" in "trendData"

  Scenario: Show articleData
    Given I am on "HomePage"
    Then I should see "2016" in "articleData"
module.exports = function () {

    this.Given(/^I am on the Cucumber.js GitHub repository$/, function() {
        return this.driver.get('https://github.com/cucumber/cucumber-js/tree/master');
    });

    this.When(/^I click on "([^"]*)"$/, function (text) {
        return this.driver.findElement({linkText: text}).then(function(element) {
            return element.click();
        });
    });

    this.Then(/^I should see "([^"]*)"$/, function (text) {
        var xpath = "//*[contains(text(),'" + text + "')]";
        var condition = webdriver.until.elementLocated({xpath: xpath});
        return this.driver.wait(condition, 5000);
    });

    this.Given(/^I am on "([^"]*)"$/, function(pageURl) {
        if (pageURl == 'HomePage'){
            return this.driver.get('http://localhost:4001/');
        }else if (pageURl == "articlePage"){
            return this.driver.get('http://localhost:4001/' + "article");
        }else if (pageURl == "collectionsPage"){
            return this.driver.get('http://localhost:4001/' + "collections");
        }else if (pageURl == "userPage") {
            return this.driver.get('http://localhost:4001/' + "user");
        }else {
            return this.driver.get('http://localhost:4001/' + "search");
        }

    });

    this.Then(/^I should see "([^"]*)" in "([^"]*)"$/, function (content, area) {
        if (area == "infoData"){
            var xpath = "//*[@id=\"content\"]/td[1][contains(text(),'" + content + "')]";
            var condition = webdriver.until.elementLocated({xpath: xpath});
            return this.driver.wait(condition, 10000);
        } else if (area == "trendData"){
            var css = ".highcharts-xaxis-labels";
            var condition = webdriver.until.elementLocated({css: css});
            return this.driver.wait(condition, 10000);
        } else{
            var xpath = "//*[@id=\"myArticle\"]/div[1]/div[2]/div[2]/table/tbody/tr[1]/td[2][contains(text(),'" + content + "')]";
            var condition = webdriver.until.elementLocated({xpath: xpath});
            return this.driver.wait(condition, 10000);
        }

    });

};
  • 重构首页中BDD测试,使用Examples
# features/homePage.feature
Feature: HomePage feature
  As a user
  I open homePage
  So that I can see infoData, trendData and articleData.

  Scenario Outline: Show infoData
    When I am on <Page>
    Then I should see <Data> in <Area>

    Examples:
      | Page     | Area     | Data |
      | "HomePage" | "infoData" | "2016" |
      | "HomePage" | "trendData" | "2016" |
      | "HomePage" | "articleData" | "2016" |

20161129

  • 添加Cucumber和WebDrive,深度使用BDD的方式进行测试

    • 执行测试./node_modules/cucumber/bin/cucumber.js

20161123

  • 修复表头排序时,仅按首数字排序: 去掉td中的span元素

  • 首页中,显示个人文章列表中热门文章,按阅读量排序

20161121

  • 修复首页中个人数据表格中的数据重复,将数据的处理缩进统一调整
tbody
    each result in info
        tr#content
            td #{result.date}
            td #{result.favorite}
            td #{result.follower}
  • 提取测试代码 - 提取公共的支持脚本support/config.js - 添加pageObject文件夹,存放PO对象

20161116

  • 修复首页中个人数据部分表头与数据内容无法对齐

    • 固定个人数据与趋势图的列宽
  • 修复个人数据测试失败

20161108

  • 修复个人信息获取专题数据获取时的数据更新,使用findOneAndUpdate方法,替换update

20161103

a(href="/", class='glyphicon glyphicon-home') 首页
  • 添加自己文章列表的和作者列表中的表格排序功能
table(data-toggle="table", data-sort-name="date", data-sort-order="desc")
    thead
        tr
            th(data-field="title", data-sortable="false") 文章标题
            th(data-field="date", data-sortable="true") 发布时间
            th(data-field="reading", data-sortable="true") 阅读量
            th(data-field="comment", data-sortable="true") 评论
            th(data-field="favorite", data-sortable="true") 喜欢

20161102

  • 添加专题页面显示

    • 用户关注数倒序排列

    • 默认仅展示前20条专题内容

    • 添加专题页面测试

  • 添加bootstrap bootstrap-table的样式处理

    • 替换Header,表格样式

20161101

  • 获取整个网站的专题数据

    • 每一页的专题数据为http://www.jianshu.com/collections
    • 第二页请求为http://www.jianshu.com/collections?page=2&_=1477976894514
    • 后面的所有请求格式均为http://www.jianshu.com/collections?order_by=score&page=3&_=1477976894515

    尝试使用Postman修改统一的请求格式/collections?order_by=score&page=3&_=1477976894515,结果为所有请求均可使用此方式请求。

  • 时间戳

    • 使用moment的格式化参数.format(''x)
  • 尝试查看整个网站有效的专题页码一共34页,使用笨办法: for循环50次,来获取所有的专题数据

    • 添加sleep模块,减少在查找时,数据不同步。添加等待时间
function getCollections() {
    var now = moment().format('x');

    for(var count = 0; count < 50; count++){
        sleep.sleep(1);
        getURL.getPageContent("/collections?page=" + count + "&_=" + now, function (err, res) {
            if (err) {
                console.log(err);
            } else {
                var $ = cheerio.load(res.text);
                if ($('div').find('h1').text() == "您要找的页面不存在") {
                    console.log('页面不存在');
                    count = 50;
                } else {
                    $('#all-collections li .collections-info').each(function (idx, collectionEle) {
                        var href = $(collectionEle).find('h5 a').attr('href');
                        var articleCount = $(collectionEle).find('.blue-link').text();
                        var follower = getCollectionFollower($(collectionEle).find('p').last().text());
                        var collection = [];
                        collection.push({
                            id: href.split('/')[href.split('/').length - 1].toString(),
                            title: $(collectionEle).find('h5 a').text(),
                            articleCount: articleCount.split('篇')[0],
                            follower: follower,
                            description: $(collectionEle).find('.description').text()
                        });
                        collectionsProxy.saveAndUpdateCollections(collection[0]);
                    });
                }
            }
        })
    }

}

20161031

  • 分离数据爬取:文章内容/自己信息和专题
function syncArticle() {

    var rule = new schedule.RecurrenceRule();
    //sync articles
    rule.second = 10;

    schedule.scheduleJob(rule, function () {
        console.log('Sync article...');
        articleInfo();
    });

}

function syncMyInfoAndCollections() {

    var rule = new schedule.RecurrenceRule();
    //sync collections and myInfo

    rule.minute = 15;
    schedule.scheduleJob(rule, function () {
        console.log('Sync myInfo and collections...');
        myInfo();
        getCollections();
    });
}

function syncData() {

    syncArticle();

    syncMyInfoAndCollections();
}

20161027

  • 修复在获取网页数据时,报503错误后,APP崩溃问题

    • 添加如果报错,则仅显示日志信息,不callback

20161025

  • 添加获取并存储专题数据功能

20161020

  • 调整个人信息中, 关注变更为获取喜欢数据

  • 调整作者页面中,关注获取喜欢数据

20161019

  • 完善搜索结果,在没有找到结果时,提示信息,并隐藏数据表格显示

  • 重构用户数据展示模板,添加userList.jade

  • 优化搜索后,返回搜索类型搜索内容

select(name='searchType')
    option(value='article', selected= searchType == 'article') 文章
    option(value='author', selected= searchType == 'author') 作者
  • 添加用户搜索的测试

  • 修复chart图表中,日期显示与实际数据不匹配

    • 在给highchart插入数据时,先将从mongo中获取的数据进行颠倒,再返回给前端进行渲染
    followerList: followerList.reverse(),
    followingList: followingList.reverse(),
    dateList: dateList.reverse()
    

20161018

  • 设置在未输入搜索内容时,搜索框内容为空

    • 设置搜索输入框的placeholder
if result == null
    input(name='searchContent', value='', placeholder='请输入搜索内容')
else
    input(name='searchContent', value='#{searchContent}')
  • 添加搜索测试内容

    • 默认页面
    if ($("input").toArray().length < 1) throw new Error('There is not input_text!');
    if (!$("button")) throw new Error('There is not search_button!');
    
    • 搜索结果
    request.post('/search')
        .send({'searchContent': '1'})
        .expect(200)
        .expect(function (res) {
            var $ = cheerio.load(res.text);
            if ($("#article tbody tr td").eq(0).text().length < 1) {
                throw new Error("article title must exist.");
            }
        })
        .end(done);
    
  • 提取文章内容展示公式模板文件articleList.jade

  • 添加搜索类型的下拉列表选项

select(name='searchType')
    option(value='article') 文章
    option(value='author') 作者

20161014

  • 添加搜索文章功能,支持搜索结束后,将搜索内容回显给输入框

    • 使用form表单提交搜索内容,search.jade
    form(action='/search', method='post')
        input(name='searchContent')
        button(type='submit') 搜索
    
    • 模糊查询标题名,proxy/article.js
    exports.findByTitle = function (articleTitle, callback) {
        var findContent = { 'title': { $regex:  articleTitle}};
        article.find(findContent, callback);
    };
    
    

20161011

  • 修改个人数据获取时,若数据已存在,则更新数据
exports.updateInfo = function (today, following, follower, callback) {
    myInfo.update({date: today}, {following: following, follower: follower}, callback);
};
  • 提取页面头部header为公式的jade文件,使用include调用

  • 发布Release: V0.0.3

20161010

  • 添加作者页面: 默认展示粉丝量最高的前20位作者

    • 添加测试代码test/user-spec.js

    • 作者页面中,作者名支持可点击至简书官网作者的个人信息页面

  • 作者数据已存在,则更新作者关注量粉丝量

    • proxy/user.js
    exports.updateUser = function (article, following, follower, callback) {
        user.update({id: article.authorHref}, {following: following, follower: follower},callback)
    };
    
    • util/syncData.js
    userProxy.getUserById(article.authorHref,function (err, findAuthor) {
        if (findAuthor.length == 0) {
            userProxy.saveUser(article, following, follower, function (err) {
                if (err) return next(err);
            });
        } else {
            userProxy.updateUser(article, following, follower,function (err) {
                if (err) return next(err);
            })
        }
    });
    

20161009

  • 变更记录提取为单独的文件history.md

  • 将测试内容更加精细化: 数据内容表格内容数量

  • 重构获取个人信息个人文章内容

  • 提取数据库公共链接配制model/config.js

module.exports = {
    dbUrl: "localhost/jianshu"
};

20161008

"highcharts": "^5.0.0"
- 配制假数据,前端可使用`highcharts`控件来显示数据
    - `layout.jade`中添加`highcharts`引用

    ```
      head
        title 简书爬虫
        link(rel='stylesheet', href='/stylesheets/style.css')
        script(type='text/javascript', src="http://code.jquery.com/jquery-1.9.1.min.js")
        script(type='text/javascript', src="http://code.highcharts.com/highcharts.js")
        script(type='text/javascript', src="http://code.highcharts.com/modules/exporting.js")
    ```

    - `index.jade`中添加静态数据展示

    ```
    div#container(style="min-width: 500px; height: 500px; margin: 0 auto")
      script.
        $(function () {
          $('#container').highcharts({
            title: {
              text: '个人信息时势图',
              x: -20 //center
            },
            subtitle: {
              text: '数据来源: jianshu.com',
              x: -20
            },
            xAxis: {
              categories: ['2016-09-22', '2016-09-26', '2016-09-27', '2016-09-28', '2016-09-29', '2016-09-30', '2016-10-08']
            },
            yAxis: {
              title: {
                text: '人数'
              },
              plotLines: [{
                value: 0,
                width: 1,
                color: '#808080'
              }]
            },
            tooltip: {
              valueSuffix: '人'
            },
            legend: {
              layout: 'vertical',
              align: 'right',
              verticalAlign: 'middle',
              borderWidth: 0
            },
            series: [{
              name: '关注',
              data: [28, 28, 28, 29, 29, 29, 28]
            }, {
              name: '粉丝',
              data: [69, 73, 78, 92, 95, 95, 97]
            }]
          });
        });
    ```
  • 使用真数据替换

    • 将获取的三组数据进行打包, 通过render传递给jade模块, 此时需要使用sort方法, 否则会被默认重新排序
    res.render('index', {info: myInfo, myArticle: myArticle, followerList: followerList.sort(), followingList: followingList.sort(), dateList: dateList.sort()});
    
    • 由于时间格式原来采用2016-09-30,在node获取时,会自动进行计算。因此将获取过来的数据进行格式转化: 20160930
    dateList.push(info.date.replace(/-/g,''));
    
    • 完整前端index.jade代码
    div#container(style="min-width: 500px; height: 500px; margin: 0 auto")
      script.
        var arrToMultiArr = function (arr) {
          var result = new Array()
          for (var i = 0; i < arr.length; i++) {
            var date = arr[i]
            result.push([date])
          }
          return result
        };
        var followerList = arrToMultiArr([#{followerList}]);
        var followingList = arrToMultiArr([#{followingList}]);
        var dateList = arrToMultiArr([#{dateList}]);
    
        $(function () {
          $('#container').highcharts({
            title: {
              text: '个人信息时势图',
              x: -20 //center
            },
            subtitle: {
              text: '数据来源: jianshu.com',
              x: -20
            },
            xAxis: {
              categories: dateList
              //categories: ['2016-09-22', '2016-09-26', '2016-09-27', '2016-09-28', '2016-09-29', '2016-09-30', '2016-10-08']
            },
            yAxis: {
              title: {
                text: '人数'
              },
              plotLines: [{
                value: 0,
                width: 1,
                color: '#808080'
              }]
            },
            tooltip: {
              valueSuffix: '人'
            },
            legend: {
              layout: 'vertical',
              align: 'right',
              verticalAlign: 'middle',
              borderWidth: 0
            },
            series: [{
              name: '关注',
              data: followingList
            }, {
              name: '粉丝',
              data: followerList
            }]
          });
        });
    
  • 样式调整

    • 将个人数据及图表放在同一行,文章列表放在下一行

    • 所有表格数据内容均采用局中对齐,文件标题采用左对齐

  • 格式化所有jade代码: 缩进调整

20160930

  • 发布V0.0.2版本

  • 完善测试,添加内容验证: 首页/文章列表

if ($("#myInfo tbody tr td").eq(0).text().indexOf("2016")) throw new Error("MyInfo date is lost, because date is not 201*Year");
if ($("#article tbody tr td").toArray().length == 7) throw new Error("There is not 7 information.");

if ($("#myArticle tbody tr td").eq(1).text().indexOf("2016")) throw new Error("MyArticle is lost, because publish-date is not 201*Year");

20160928

  • 修复: 获取文章数据时,会同时插入一条空数据

    var articleScheme = new Schema({
        title: {type:String, required:true},
        articleHref: {type:String, required:true},
        author: {type:String, required:true},
        authorHref: {type:String, required:true}
    });
    
  • 调整myInfo authormodel添加字段约束

  • 完善测试首页的验证内容

    • 添加对个人信息中首个数据的年份验证

    • 添加对自己文章中首个数据的发布时间年份验证

if ($("#myInfo tbody tr td").eq(0).text().indexOf("2016")) throw new Error("MyInfo date is lost, because date is not 201*Year");
if ($("#myArticle tbody tr td").eq(1).text().indexOf("2016")) throw new Error("MyArticle is lost, because publish-date is not 201*Year");

20160927

  • 个人信息中的排序修改为倒序

    • 修改myInfoSchema.find({},cb)方法为myInfoSchema.find({}).sort(date:-1).exec(cb)
  • 修改个人信息中数据仅显示前7

    • 调整myInfoSchema.find({}).sort(date:-1).exec(cb)添加limit(7)限制
  • 修改文章列表仅显示最新的20条

    • 修改getAllArticles方法,排序按_id号排序
    exports.getAllArticles = function (callback) {
        article.find({}).limit(20).sort({ _id: -1}).exec(callback);
    };
    

20160921

  • 时间处理方式统一

    • 调整时间处理格式使用moment模块处理

    • 修复旧数据中时间格式统一将原数据格式调整为: db.myinfos.update({'date':'Fri Aug 05 2016'},{$set:{'date':'2016-08-05'}})

  • 调整首页中获取简书文章时的时间处理: 显示格式YYYY-MM-DD,将获取的时间进行切割

$article.find('.time').attr('data-shared-at').split('T')[0]
  • 调整首页中个人文章列表中数据获取,提取公共模块util/convertString.js处理获取的数据: 评论数/喜欢数/阅读量
function getLatestNumberWithSpace(string) {
    var splittedString = string.split(' ');
    return splittedString[splittedString.length-1]
}

  • 添加自动同步功能: 个人信息/新文章,使用模块node-schedule
function myInfo(){
    myInfoSchema.find({'date': today},function (err, result) {
        if (result.length == 0){
            request.get('http://www.jianshu.com' + myPageHref).end(function (err, res) {
                var $ = cheerio.load(res.text);
                var following = $('.clearfix').find('b').eq(0).text();
                var follower = $('.clearfix').find('b').eq(1).text();
                myInfoSchema.create({
                    userHref: myPageHref,
                    date: today,
                    following: following,
                    follower: follower
                },function (err, result) {
                    if (err) return next(err);
                });
            });
        }
    });
}

function syncData() {
    var rule = new schedule.RecurrenceRule();
    rule.second = 30;
    schedule.scheduleJob(rule, function () {
        myInfo();
    });
}
  • 重构获取文章列表方法

    • 提取proxy/article.js文档操作方法,针对Mongoose中的Scheme进行操作
    exports.getAllArticles = function (callback) {
      article.find({},callback)
    };
    
    
    • 调整获取文章列表数据routes/jianshu.com
    article.getAllArticles(function (err, articles) {
    
        if (err) return next(err);  
        res.render('jianshu', {articles: articles});
    
        });
    
    

20160919

  • 完善首页中的个人文章的链接拼接

  • 添加首页个人文章内容测试代码

20160817

  • 删除文章列表页面中已注释的代码

  • 添加获取自己文件的数据,及返回至前端-未完成链接获取

    result.forEach(function (info) {
      // console.log('Date:' + info.date);
      myInfo.push({
        date: info.date,
        following: info.following,
        follower: info.follower
      });
    });
    request.get('http://www.jianshu.com' + myPageHref)
        .end(function (err, resT) {
          var $ = cheerio.load(resT.text);
          $('.article-list li').each(function (idx, article) {
            var $article = $(article);
            myArticle.push({
                article: $article.find('.title a').text(),
                publishDate: $article.find('.time').attr('data-shared-at'),
                articleHref: $article.find('.title a').attr('href'),
                reading: $article.find('.list-footer a').eq(0).text(),
                comment: $article.find('.list-footer a').eq(1).text(),
                favorite: $article.find('.list-footer a').eq(2).text()
            })
          });
          res.render('index', {info: myInfo,myArticle: myArticle});
        });

20160816

  • 添加文章列表、同步文章测试:存在同步最新文章文章标题作者,两个页面内容一样,测试验证项也一样。
it('Verify page content',function (done) {
    request.get('/jianshu')
        .expect(200)
        .expect(function (res) {
            if (!(res.text.indexOf("同步最新文章"))) throw new Error("missing sync latest article link");
            if (!(res.text.indexOf("文章标题"))) throw new Error("missing article content about title");
            if (!(res.text.indexOf("作者"))) throw new Error("missing article content about author");
        })
        .end(done);
});
  • 添加首页测试:存在文章列表日期关注粉丝,及2016数据,测试不太严谨,但可测试功能
it('Exist:go to articles content',function (done) {
    request.get('')
        .expect(200)
        .expect(function (res) {
            if (!(res.text.indexOf("文章列表"))) throw new Error("missing go to article content");
            if (!(res.text.indexOf("日期"))) throw new Error("missing myinfo content about date");
            if (!(res.text.indexOf("关注"))) throw new Error("missing myinfo content about following");
            if (!(res.text.indexOf("粉丝"))) throw new Error("missing myinfo content about follower");
            if (!(res.text.indexOf("2016"))) throw new Error("missing myinfo data");
        })
        .end(done);
});
  • 调整测试超时时间为10S,同步使用文章时,使用时间较长, gulp-mocha
gulp.src(['test/**.js'], { read: false})
    .pipe(mocha({
        reporter: 'spec',
        globals: {
            should: require('should')
        },
        timeout: 10000
    }));

20160815

  • 重构测试代码: 抽取URL

    • 添加服务器自动构建
      • 添加gulp-nodemon,browser-syncdevDependencies
      • 调整gulpfile.js:添加自动监听client和server端的代码变化,并及时重新构建 http://localhost:4000:为Browser-sync同步监听客户端,并自动刷新前端页面 http://localhost:3000:为无browser-sync效果

20160811

  • 删除重复的myinfo数据
db.myinfos.remove({"_id" : ObjectId("57a810b87a33b27050c8529f")})
  • 添加测试框架supertest,构建工具gulp
gulp.task('test', function() {
    gulp.src(['test/**.js'], { read: false})
        .pipe(mocha({
            reporter: 'spec',
            globals: {
                should: require('should')
            }
        }));
});

20160808

  • 重构:提取获取文章列表的公共方法 -- Block

20160804

  • 完成同步文章列表的功能
    • 在获取新文章后,再使用render到到文章列表模板
async.mapLimit(articleTitles,5,function (article, callback) {
    fetchUrl(article,callback);
},function (err, result) {
    console.log('获取数据结束');
    var resultsAllArticles = [];
    articleSchema.find({},function (err, result) {
        if (err) return next(err);
        result.forEach(function (article) {
            resultsAllArticles.push({
                articleTitle: article.title,
                articleHref: article.articleHref,
                author: article.author,
                authorHref: article.authorHref
            });
        });
        res.render('jianshu', {results: resultsAllArticles});
    });
});

20160803

  • index页面显示自己的数据following/follower
  div
    table
      thead
        tr
          td 日期
          td 关注
          td 粉丝
      tbody
        each result in info
          tr
            td #{result.date}
            td #{result.following}
            td #{result.follower}

  • 每次刷新页面时,会验证是否已经有今天的数据,如果没有则插入
  myInfoSchema.find({'userHref': '/users/552f687b314b'},function (err, result) {
    var myInfo = [];
    result.forEach(function (info) {
      console.log('Date:' + info.date);
      myInfo.push({
        date: info.date,
        following: info.following,
        follower: info.follower
      });
    });
    res.render('index', { title: 'Express' ,info: myInfo});
  });

20160802

  • 在首页中添加每天自己的粉丝收获喜欢数量,并存入数据库中
    • myInfo模型
var myInfoScheme = new Schema({
    userHref: String,
    date: String,
    following: Number,
    follower: Number
});

- 插入数据库
request.get('http://www.jianshu.com' + myPageHref).end(function (err, res) {
    var $ = cheerio.load(res.text);
    var following = $('.clearfix').find('b').eq(0).text();
    var follower = $('.clearfix').find('b').eq(1).text();
    myInfoSchema.create({
      userHref: myPageHref,
      date: new Date().toDateString(),
      following: following,
      follower: follower
    },function (err, result) {
      if (err) return next(err);
    });
});
- 日期存入格式为: `new Date().toDateString()`,使用字符串进行判断是否已经存入
  myInfoSchema.find({'date': new Date().toDateString()},function (err, result) {
    if (result.length == 0){
      request.get('http://www.jianshu.com' + myPageHref).end(function (err, res) {
        var $ = cheerio.load(res.text);
        var following = $('.clearfix').find('b').eq(0).text();
        var follower = $('.clearfix').find('b').eq(1).text();
        myInfoSchema.create({
          userHref: myPageHref,
          date: new Date().toDateString(),
          following: following,
          follower: follower
        },function (err, result) {
          if (err) return next(err);
        });
      });
    }
  });
  • 更新mongo数据库中myInfo数据
db.myinfos.update({'userHref':'/users/552f687b314b'},{$set:{'date':'Tue Aug 01 2016'}})

  • 存入文章时,一并存入作者数据。并需要对文章及作者数据进行去重
articleSchema.find({articleHref:article.articleHref},function (err, findArticle) {
    console.log(findArticle);
    if (findArticle.length == 0) {
        articleSchema.create({
            title: article.articleTitle,
            articleHref: article.articleHref,
            author: article.author,
            authorHref: article.authorHref
        },function(err, result) {
            if (err) return next(err);
        });
    }
});
authorSchema.find({id:article.authorHref},function (err, findAuthor) {
    if (findAuthor.length == 0) {
        authorSchema.create({
            id: article.authorHref,
            author: article.author,
            following: following,
            follower: follower
        },function(err, result) {
            if (err) return next(err);
        });
    }
});

20160727

  • mongoose中createConnectionconnection的用法区别

    • createConnectoin用法
var mongoose = require('mongoose');
var db = mongoose.createConnection('mongodb://localhost/jianshu');

var Schema = mongoose.Schema;

var articleScheme = new Schema({
    title: String,
    articleHref: String,
    author: String,
    authorHref: String
});

module.exports = db.model('article', articleScheme);

- `connection`用法
var mongoose = require('mongoose');
mongoose.createConnection('mongodb://localhost/jianshu');

var Schema = mongoose.Schema;

var articleScheme = new Schema({
    title: String,
    articleHref: String,
    author: String,
    authorHref: String
});

module.exports = mongoose.model('article', articleScheme);

  • 作者数据存入author中,存入数据:authorHref/author/following/follower

    • model:author.js`
var mongoose = require('mongoose');
var db = mongoose.createConnection('mongodb://localhost/jianshu');

var Schema = mongoose.Schema;

var authorScheme = new Schema({
    id: String,
    author: String,
    following: Number,
    follower: Number
});

module.exports = db.model('author', authorScheme);

- 插入数据`routes/jianshu.js`
authorSchema.find({id:article.authorHref},function (err, findAuthor) {
    console.log(findAuthor);
    if (findAuthor.length == 0) {
        authorSchema.create({
            id: article.authorHref,
            author: article.author,
            following: following,
            follower: follower
        },function(err, result) {
            if (err) return next(err);
        });
    }
});

20160726

articleSchema.find({articleHref:article.articleHref},function (err, findArticle) {
    console.log(findArticle);
    if (findArticle.length == 0) {
        articleSchema.create({
            title: article.articleTitle,
            articleHref: article.articleHref,
            author: article.author,
            authorHref: article.authorHref
        },function(err, result) {
            if (err) return next(err);
        });
    }
});

20160720

  • 添加数据库中存储数据作者作者信息链接

    • 添加scheme配制
var articleScheme = new Schema({
    title: String,
    articleHref: String,
    author: String,
    authorHref: String
});
- 将获取的数据一并保存至数据库中
articleScheme.create({
    title: article.articleTitle,
    articleHref: article.articleHref,
    author: article.author,
    authorHref: article.authorHref
},function(err, result) {
    if (err) return next(err);
});
- 在mongo中查询插入的数据。使用mongo链接查询时,建议使用`pretty()`方法来将返回的数据展示的更易读
> db.articles.find({'_id': ObjectId('578f07f5bf9d4937b7c9f0a9')}).pretty()
{
        "_id" : ObjectId("578f07f5bf9d4937b7c9f0a9"),
        "title" : "就Excel而言,掌握这些就足以应付大部分工作了",
        "articleHref" : "http://www.jianshu.com/p/aab3f09f015b",
        "author" : "北大小笨",
        "authorHref" : "http://www.jianshu.com/users/2528bd080aa8",
        "__v" : 0
}

20160719

  • 使用mongodb数据库存储获取的数据:首页文章、文章链接

    • 使用mongoose来操作和维护mongodb

    • 使用articlecollection来存储数据

var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/jianshu');

var Schema = mongoose.Schema;

var articleScheme = new Schema({
    title: String,
    href: String
});

module.exports = mongoose.model('article', articleScheme);

- 向数据库中插入每次查询的结果
articleScheme.create({
    title: article.articleTitle,
    href: article.href
},function(err, result) {
    if (err) return next(err);
});
  • 使用async获取数据的并发数,确保每次均可完整获取数据

    • async.mapLimit异常的执行最大的执行数,从1开始

    • 将所有的返回数据全部存放至results中,便于在前端展示

var conCurrencyCount = 0;
var fetchUrl = function (article, callback) {
    var delay = parseInt((Math.random() * 10000000) % 2000,10);
    conCurrencyCount++;
    console.log('并发数:' + conCurrencyCount + ',访问的页面是:' + article.authorLink + ',控制的延迟:' + delay);
    request.get(article.authorLink)
        .end(function (err, res) {
            if (err){
                return next(err);
            }
            var $ = cheerio.load(res.text);
            var author = $('.basic-info').find('h3').text();
            var following = $('.clearfix').find('b').eq(0).text();
            var follower = $('.clearfix').find('b').eq(1).text();
            results.push({
                articleTitle: article.articleTitle,
                articleUrl: article.href,
                author: author,
                authorUrl: article.authorLink,
                following: following,
                follower: follower
            })
        });
    setTimeout(function () {
        conCurrencyCount--;
        callback(null,article + ' html content');
    },delay);
};

async.mapLimit(articleTitles,5,function (article, callback) {
    fetchUrl(article,callback);
},function (err, result) {
    console.log('获取数据结束');
    res.render('jianshu', { title: '简书', results: results});
});

20160718

  • 获取每位作者的粉丝关注

    • 有时会出现获取不到粉丝关注,应该是由于简书官网做了并发访问限制

    • 使用eventproxy获取首页信息后,再获取每位作者的信息链接页面数据

var ep = new eventProxy();
ep.after('authorInfo_html',articleTitle.length,function (userInfors) {
    articleInfors = userInfors.map(function (userInfo) {
        var articleTitle = userInfo[0];
        var articleUrl = userInfo[1];
        var authorHtml = userInfo[2];
        var authorUrl = userInfo[3];
        var $ = cheerio.load(authorHtml);
        var author = $('.basic-info').find('h3').text();
        var following = $('.clearfix').find('b').eq(0).text();
        var follower = $('.clearfix').find('b').eq(1).text();
        return ({
            articleTitle: articleTitle,
            articleUrl: articleUrl,
            author: author,
            authorUrl: authorUrl,
            following: following,
            follower: follower
        });
    });

    res.render('jianshu', { title: '简书',articleTitle: articleTitle,showPage:0,articleInfors: articleInfors });
});

articleTitle.forEach(function (article) {
    console.log('获取:' + article.authorLink + '中');
    request.get(article.authorLink)
        .end(function (err, res) {
            console.log('获取:' + article.authorLink + '完成');
            var $ = cheerio.load(res.text);
            console.log('Init following is :' + $('.clearfix li b').eq(0).text());
            ep.emit('authorInfo_html',[article.articleTitle,article.href,res.text,article.authorLink]);
        });
});
});
- 调整前端展示及添加作者信息页面的链接功能
  div
    table
      thead
        tr
          td 文章标题
          td 作者
          td 关注
          td 粉丝
      tbody
        each info in articleInfors
          tr
            td
              a(href='#{info.articleUrl}') #{info.articleTitle}
            td
              a(href='#{info.authorUrl}') #{info.author}
            td #{info.following}
            td #{info.follower}

  • 添加文章列表中作者的信息链接

    • 获取作者的信息链接
articleTitle.push({
    articleTitle: $article.find('.title a').text(),
    author: $article.find('.author-name').text(),
    authorLink: 'http://www.jianshu.com' + $article.find('.author-name').attr('href'),
    href: 'http://www.jianshu.com' + $article.find('.title a').attr('href')
})
- 添加到前端展示的信息中
  tr
    td #{article.articleTitle}
    td
      a(href='#{article.authorLink}') #{article.author}
    td
      a(href='#{article.href}') #{article.href}

20160714

  • 获取首页文章的作者名及文章链接

    • 遍历每个文章块.article-list li,再从每个块中获取对应的文章标题、作者、链接地址
$('.article-list li').each(function (idx, article) {
                var $article = $(article);
                articleTitle.push({
                    articleTitle: $article.find('.title a').text(),
                    author: $article.find('.author-name').text(),
                    href: 'http://www.jianshu.com'+$article.find('a').attr('href')
                })
            });
- 在`views`中再对获取的内容进行展示,同时修改`thead`与`a`的css样式
    table
      thead
        tr
          td 文章标题
          td 作者
          td 链接
      tbody
        each article in articleTitle
          tr
            td #{article.articleTitle}
            td #{article.author}
            td
              a(href='#{article.href}') #{article.href}

- 尝试针对不同数据进行不同样式展示
tbody
        case showPage
          when 0
            each article in articleTitle
              tr
                td #{article.articleTitle}
                td #{article.author}
                td
                  a(href='#{article.href}') #{article.href}
          when 1
            tr
              td 1
              td 2
              td 3
          default
            tr
              td 0
              td 0
              td 0

20160713

    var articleTitle = [];
    request.get('http://www.jianshu.com/')
        .end(function (err,gres) {
            if (err){
                return next(err);
            }
            var $ = cheerio.load(gres.text);
            $('li .title').each(function (idx, title) {
                var $title = $(title);
                articleTitle.push({articleTitle:$title.text()})
            });
            res.render('jianshu', { title: '简书',articleTitle: articleTitle });
        });

- 获取文章标题,以列表的形式展示

- `cheerio`:用来转换返回的res数据,并支持jQuery处理

- `superagent`:模拟网络请求

- `nodemon`:用来实际检测`Node`文件是否有变更,有变更则自动重新部署。便于开发调度。启动命令:`./node_modules/nodemon/bin/nodemon.js jianshu`

- 添加`jianshu`的route和view模板,并将结果遍历出来
  div
    table
      thead
        tr
          td 文章标题
      tbody
        each article in articleTitle
          tr
            td #{article.articleTitle}

- 添加`thead`的css样式

thead {
  color: #00B7FF;
  font: 20px "Lucida Grande", Helvetica, Arial, sans-serif;
}