Skip to content

Commit

Permalink
更新包
Browse files Browse the repository at this point in the history
  • Loading branch information
zlzforever committed Feb 14, 2023
1 parent 1b9f029 commit 2d9d220
Show file tree
Hide file tree
Showing 18 changed files with 86 additions and 126 deletions.
8 changes: 4 additions & 4 deletions package.props
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
<Version>5.1.0</Version>
<FileVersion>5.1.0</FileVersion>
<AssemblyVersion>5.1.0</AssemblyVersion>
<Version>5.1.1</Version>
<FileVersion>5.1.1</FileVersion>
<AssemblyVersion>5.1.1</AssemblyVersion>
<Authors>zlzforever@163.com;</Authors>
<Copyright>Copyright 2018 Lewis Zou</Copyright>
<Copyright>Copyright 2023 Lewis Zou</Copyright>
<Description>DotnetSpider, a .NET Standard web crawling library. It is lightweight, efficient and fast high-level web crawling &amp; scraping framework</Description>
<PackageTags>DotnetSpider;crawler;dotnet core</PackageTags>
<PackageProjectUrl>https://github.com/dotnetcore/DotnetSpider</PackageProjectUrl>
Expand Down
12 changes: 6 additions & 6 deletions publish_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ rm -rf src/DotnetSpider.PostgreSql/bin/Release
rm -rf src/DotnetSpider.RabbitMQ/bin/Release
dotnet build -c Release
dotnet pack -c Release
dotnet nuget push src/DotnetSpider/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
dotnet nuget push src/DotnetSpider.HBase/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
dotnet nuget push src/DotnetSpider.Mongo/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
dotnet nuget push src/DotnetSpider.MySql/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
dotnet nuget push src/DotnetSpider.PostgreSql/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
dotnet nuget push src/DotnetSpider.RabbitMQ/bin/Release/*.nupkg --source $NUGET_SERVER --api-key $NUGET_KEY
dotnet nuget push src/DotnetSpider/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
dotnet nuget push src/DotnetSpider.HBase/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
dotnet nuget push src/DotnetSpider.Mongo/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
dotnet nuget push src/DotnetSpider.MySql/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
dotnet nuget push src/DotnetSpider.PostgreSql/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
dotnet nuget push src/DotnetSpider.RabbitMQ/bin/Release/*.nupkg -s $NUGET_SERVER -k $NUGET_KEY --skip-duplicate
2 changes: 1 addition & 1 deletion src/DotnetSpider.Agent/DotnetSpider.Agent.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="7.0.0" />
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />
Expand Down
2 changes: 1 addition & 1 deletion src/DotnetSpider.Mongo/DotnetSpider.Mongo.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="MongoDB.Driver" Version="2.18.0" />
<PackageReference Include="MongoDB.Driver" Version="2.19.0" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/DotnetSpider.MySql/DotnetSpider.MySql.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="MySqlConnector" Version="2.2.0" />
<PackageReference Include="MySqlConnector" Version="2.2.5" />
</ItemGroup>
</Project>
2 changes: 1 addition & 1 deletion src/DotnetSpider.PostgreSql/DotnetSpider.PostgreSql.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Npgsql" Version="7.0.0" />
<PackageReference Include="Npgsql" Version="7.0.1" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/DotnetSpider.Sample/DotnetSpider.Sample.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />
Expand Down
2 changes: 1 addition & 1 deletion src/DotnetSpider.Sample/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static async Task Main(string[] args)
// // await DistributedSpider.RunAsync();
// await ProxySpider.RunAsync();
// await EntitySpider.RunMySqlQueueAsync();
await ImageSpider.RunAsync();
await CnblogsSpider.RunAsync();

Console.WriteLine("Bye!");
}
Expand Down
60 changes: 19 additions & 41 deletions src/DotnetSpider.Sample/samples/CnblogsSpider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,54 +40,26 @@ protected override async Task InitializeAsync(CancellationToken stoppingToken =
{
AddDataFlow(new ListNewsParser());
AddDataFlow(new NewsParser());
AddDataFlow(new MyConsoleStorage());
await AddRequestsAsync(new Request("https://news.cnblogs.com/n/page/1/"));
var request = new Request("https://news.cnblogs.com/n/page/1")
{
};
request.Headers.UserAgent = "";
await AddRequestsAsync(request);
}

protected override SpiderId GenerateSpiderId()
{
return new(ObjectId.CreateId().ToString(), "博客园");
}

protected class MyConsoleStorage : DataFlowBase
{
public override Task InitializeAsync()
{
return Task.CompletedTask;
}

public override Task HandleAsync(DataFlowContext context)
{
if (IsNullOrEmpty(context))
{
Logger.LogWarning("数据流上下文不包含解析结果");
return Task.CompletedTask;
}

var typeName = typeof(News).FullName;
var data = context.GetData(typeName);
if (data is News news)
{
Console.WriteLine($"URL: {news.Url}, TITLE: {news.Title}, VIEWS: {news.Views}");
}

return Task.CompletedTask;
}
}

protected class ListNewsParser : DataParser
{
public override Task InitializeAsync()
{
// AddRequiredValidator("news\\.cnblogs\\.com/n/page");
AddRequiredValidator((request =>
{
var host = request.RequestUri.Host;
var regex = host + "/$";
return Regex.IsMatch(request.RequestUri.ToString(), regex);
}));
AddRequiredValidator("news\\.cnblogs\\.com/n/page");
// if you want to collect every pages
// AddFollowRequestQuerier(Selectors.XPath(".//div[@class='pager']"));
AddFollowRequestQuerier(Selectors.XPath(".//div[@class='pager']"));
return Task.CompletedTask;
}

Expand Down Expand Up @@ -128,16 +100,22 @@ public override Task InitializeAsync()
protected override Task ParseAsync(DataFlowContext context)
{
var typeName = typeof(News).FullName;
var url = context.Request.RequestUri.ToString();
var title = context.Request.Properties["title"]?.ToString()?.Trim();
var summary = context.Request.Properties["summary"]?.ToString()?.Trim();
var views = int.Parse(context.Request.Properties["views"]?.ToString()?.Trim() ?? "0");
var content = context.Selectable.Select(Selectors.XPath(".//div[@id='news_body']"))?.Value
?.Trim();
context.AddData(typeName,
new News
{
Url = context.Request.RequestUri.ToString(),
Title = context.Request.Properties["title"]?.ToString()?.Trim(),
Summary = context.Request.Properties["summary"]?.ToString()?.Trim(),
Views = int.Parse(context.Request.Properties["views"]?.ToString()?.Trim() ?? "0"),
Content = context.Selectable.Select(Selectors.XPath(".//div[@id='news_body']")).Value
?.Trim()
Url = url,
Title = title,
Summary = summary,
Views = views,
Content = content
});

return Task.CompletedTask;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/DotnetSpider.Spiders/DotnetSpider.Spiders.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="Serilog.AspNetCore" Version="6.0.1" />
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.RollingFile" Version="3.3.0" />
<PackageReference Include="Serilog.Sinks.PeriodicBatching" Version="3.1.0" />
Expand Down
4 changes: 2 additions & 2 deletions src/DotnetSpider.Tests/DotnetSpider.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.4.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.4.1" />
<PackageReference Include="xunit" Version="2.4.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
<PrivateAssets>all</PrivateAssets>
Expand All @@ -16,7 +16,7 @@
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Moq" Version="4.18.2" />
<PackageReference Include="Moq" Version="4.18.4" />
</ItemGroup>

<ItemGroup>
Expand Down
23 changes: 11 additions & 12 deletions src/DotnetSpider/Agent/AgentOptions.cs
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
namespace DotnetSpider.Agent
{
public class AgentOptions
{
public class AgentOptions
{
/// <summary>
/// 节点标识
/// </summary>
public string AgentId { get; set; }

/// <summary>
/// 节点标识
/// </summary>
public string AgentId { get; set; }

/// <summary>
/// 节点名称
/// </summary>
public string AgentName { get; set; }
}
/// <summary>
/// 节点名称
/// </summary>
public string AgentName { get; set; }
}
}
37 changes: 17 additions & 20 deletions src/DotnetSpider/Agent/AgentService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class AgentService : BackgroundService

protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
_logger.LogDebug(
_messageQueue.IsDistributed
? $"Agent {_options.AgentId}, {_options.AgentName} is starting"
: "Agent is starting");
Expand All @@ -66,26 +66,23 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
});
}

// 同类型下载器注册于相同的 topic,用于负载均衡
// 同类型下载器注册于相同的 topic, 用于负载均衡
await RegisterAgentAsync(_downloader.Name, stoppingToken);

if (_messageQueue.IsDistributed)
{
// 注册 agent_{id} 用于固定节点下载
await RegisterAgentAsync(string.Format(Topics.Spider, _options.AgentId), stoppingToken);
}

// 分布式才需要发送心跳
if (_messageQueue.IsDistributed)
{
await Task.Factory.StartNew(async () =>
// 分布式才需要发送心跳
Task.Factory.StartNew(async () =>
{
while (!stoppingToken.IsCancellationRequested)
{
await HeartbeatAsync();
await Task.Delay(5000, stoppingToken);
}
}, stoppingToken);
}, stoppingToken).ConfigureAwait(true).GetAwaiter();
}

_logger.LogInformation(_messageQueue.IsDistributed
Expand Down Expand Up @@ -121,14 +118,14 @@ private async Task HandleMessageAsync(byte[] bytes)
switch (message)
{
case Messages.Agent.Exit exit:
{
if (exit.AgentId == _options.AgentId)
{
_applicationLifetime.StopApplication();
}
if (exit.AgentId == _options.AgentId)
{
_applicationLifetime.StopApplication();
}

break;
}
break;
}
case Request request:
Task.Factory.StartNew(async () =>
{
Expand All @@ -150,11 +147,11 @@ private async Task HandleMessageAsync(byte[] bytes)
}).ConfigureAwait(false).GetAwaiter();
break;
default:
{
var msg = JsonSerializer.Serialize(message);
_logger.LogWarning($"Message not supported: {msg}");
break;
}
{
var msg = JsonSerializer.Serialize(message);
_logger.LogWarning($"Message not supported: {msg}");
break;
}
}
}

Expand All @@ -167,7 +164,7 @@ private async Task HeartbeatAsync()
{
AgentId = _options.AgentId,
AgentName = _options.AgentName,
AvailableMemory = MachineInfo.Current.AvailableMemory,
AvailableMemory = MachineInfo.Current.AvailableMemory,
CpuLoad = 0
});
}
Expand Down
6 changes: 3 additions & 3 deletions src/DotnetSpider/DotnetSpider.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@

<ItemGroup>
<PackageReference Include="Bert.RateLimiters" Version="1.0.15" />
<PackageReference Include="MessagePack" Version="2.4.35" />
<PackageReference Include="MessagePack" Version="2.4.59" />
<PackageReference Include="Microsoft.CSharp" Version="4.7.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="7.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="7.0.0" />
<PackageReference Include="Microsoft.VisualBasic" Version="10.3.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
<PackageReference Include="Dapper" Version="2.0.123" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
<PackageReference Include="System.Collections.Immutable" Version="7.0.0" />
<PackageReference Include="System.ComponentModel.Annotations" Version="5.0.0" />
<PackageReference Include="System.Data.SqlClient" Version="4.8.5" />
<PackageReference Include="System.Text.Json" Version="7.0.0" />
<PackageReference Include="System.Text.Json" Version="7.0.1" />
<PackageReference Include="System.Threading.Channels" Version="7.0.0" />
<PackageReference Include="System.Threading.Tasks.Dataflow" Version="7.0.0" />
<PackageReference Include="ZCJ.HashedWheelTimer" Version="0.10.2" />
Expand Down

0 comments on commit 2d9d220

Please sign in to comment.