Permalink
Browse files

(GH-234) Fault tolerance for transient exceptions

Added transient exception handling for SqlServer database. This is to
have a better support for Azure SQL Database where transient faults
are expected and happen periodically.

Support for transient fault handling is added in a base DefaultDatabase
class, but the retry strategy implementation is specified in a
concrete database provider (mssql in this case).

Transient faults are handled in following places:
* Transient faults when opening a connection
* Transient faults when running an update script
* Transient faults when dealing with RoundhousE internal tables (via
NHibernate).

Microsoft TransientFaultHandling library was referenced for retry
building blocks. This might look like a dependency that could be
avoided (by just re-implementing necessary bits). It was mainly used to
utilize ReliableSqlConnection class.

As transient faults are much more common for Azure Sql Database than
for MS SQL Server, it might make sense to put retry logic into a
new dedicated Azure specific provider. But having it in a single MSSQL
provider is much more convenient from user perspective (less options to
think about) and it should not worsen experience for existing MSSQL
users.
  • Loading branch information...
vansha committed Nov 18, 2015
1 parent 64ca03f commit de1e8d4130c944ff8aa06faec4812e9c502631b2
@@ -33,6 +33,8 @@
<include name="Npgsql.dll" />
<include name="Mono.Security.dll" />
<include name="System.Data.SQLite.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data.dll" />
</fileset>
</copy>
<echo level="Warning" message="Copying database dlls to '${dirs.build}${path.separator}${folder.app.drop}${path.separator}_PublishedApplications${path.separator}roundhouse.console'."/>
@@ -43,6 +45,8 @@
<include name="Npgsql.dll" />
<include name="Mono.Security.dll" />
<include name="System.Data.SQLite.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data.dll" />
</fileset>
</copy>
<echo level="Warning" message="Copying database dlls to '${dirs.build}${path.separator}${folder.app.drop}${path.separator}_PublishedApplications${path.separator}roundhouse.tasks'."/>
@@ -53,6 +57,8 @@
<include name="Npgsql.dll" />
<include name="Mono.Security.dll" />
<include name="System.Data.SQLite.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data.dll" />
</fileset>
</copy>
</target>
@@ -62,6 +62,8 @@
<include name="${dirs.merge.from}\MySql.Data.dll" />
<include name="${dirs.merge.from}\Npgsql.dll" />
<include name="${dirs.merge.from}\Mono.Security.dll" />
<include name="${dirs.merge.from}\Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll" />
<include name="${dirs.merge.from}\Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data.dll" />
</items>
</in>
<do>
@@ -106,6 +108,8 @@
<include name="Npgsql.dll" />
<include name="Mono.Security.dll" />
<include name="System.Data.SQLite.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll" />
<include name="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data.dll" />
</fileset>
</delete>
</target>
@@ -1,17 +1,32 @@
using System.Data.SqlClient;
namespace roundhouse.databases.sqlserver
{
using System;
using System.Data;
using System.Data.SqlClient;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling;
using infrastructure.app;
using connections;
using infrastructure.extensions;
using infrastructure.logging;
public class SqlServerDatabase : AdoNetDatabase
{
public SqlServerDatabase()
{
// Retry upto 5 times with exponential backoff before giving up
retry_policy = new RetryPolicy(
new TransientErrorDetectionStrategy(),
5,
minBackoff: TimeSpan.FromSeconds(5),
maxBackoff: TimeSpan.FromMinutes(2),
deltaBackoff: TimeSpan.FromSeconds(5));
retry_policy.Retrying += (sender, args) => log_command_retrying(args);
}
private string connect_options = "Integrated Security=SSPI;";
public override string sql_statement_separator_regex_pattern
@@ -60,9 +75,25 @@ private static string build_connection_string(string server_name, string databas
return string.Format("data source={0};initial catalog={1};{2}", server_name, database_name, connection_options);
}
protected override AdoNetConnection GetAdoNetConnection(string conn_string)
{
var connection_retry_policy = new RetryPolicy<TransientErrorDetectionStrategy>(
5, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5));
connection_retry_policy.Retrying += (sender, args) => log_connection_retrying(args);
// Command retry policy is only used when ReliableSqlConnection.ExecuteCommand helper methods are explicitly invoked.
// This is not our case, as those method are not used.
var command_retry_policy = RetryPolicy.NoRetry;
var connection = new ReliableSqlConnection(conn_string, connection_retry_policy, command_retry_policy);
connection_specific_setup(connection);
return new AdoNetConnection(connection);
}
protected override void connection_specific_setup(IDbConnection connection)
{
((SqlConnection)connection).InfoMessage += (sender, e) => Log.bound_to(this).log_a_debug_event_containing(" [SQL PRINT]: {0}{1}", Environment.NewLine, e.Message);
((ReliableSqlConnection)connection).Current.InfoMessage += (sender, e) => Log.bound_to(this).log_a_debug_event_containing(" [SQL PRINT]: {0}{1}", Environment.NewLine, e.Message);
}
public override void run_database_specific_tasks()
@@ -219,6 +250,23 @@ private DataTable execute_datatable(string sql_to_run)
return result.Tables.Count == 0 ? null : result.Tables[0];
}
private void log_connection_retrying(RetryingEventArgs args)
{
Log.bound_to(this).log_a_warning_event_containing(
"Failure opening connection, trying again (current retry count:{0}){1}{2}",
args.CurrentRetryCount,
Environment.NewLine,
args.LastException.to_string());
}
private void log_command_retrying(RetryingEventArgs args)
{
Log.bound_to(this).log_a_warning_event_containing(
"Failure executing command, trying again (current retry count:{0}){1}{2}",
args.CurrentRetryCount,
Environment.NewLine,
args.LastException.to_string());
}
}
}
@@ -0,0 +1,36 @@
namespace roundhouse.databases.sqlserver
{
using System;
using Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling;
using infrastructure.logging;
public class TransientErrorDetectionStrategy : ITransientErrorDetectionStrategy
{
private readonly SqlDatabaseTransientErrorDetectionStrategy inner_strategy;
public TransientErrorDetectionStrategy()
{
inner_strategy = new SqlDatabaseTransientErrorDetectionStrategy();
}
public bool IsTransient(Exception ex)
{
bool transient = IsTransientException(ex);
if (ex != null)
{
Log.bound_to(this).log_a_debug_event_containing("Checking whether the '{0}: {1}' error is transient - {2} ", ex.GetType(), ex.Message, transient);
}
return transient;
}
private bool IsTransientException(Exception ex)
{
if (ex == null)
return false;
// Unwrap exception to handle exceptions wrapped by NHibernate GenericAdoException
return inner_strategy.IsTransient(ex) || IsTransientException(ex.InnerException);
}
}
}
@@ -1,5 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="EnterpriseLibrary.TransientFaultHandling" version="6.0.1304.0" targetFramework="net45" />
<package id="EnterpriseLibrary.TransientFaultHandling.Data" version="6.0.1304.1" targetFramework="net45" />
<package id="FluentNHibernate" version="1.3.0.733" targetFramework="net35" />
<package id="Iesi.Collections" version="3.3.2.4000" targetFramework="net35" />
<package id="NHibernate" version="3.3.2.4000" targetFramework="net35" />
@@ -75,6 +75,14 @@
<Reference Include="Iesi.Collections, Version=1.0.1.0, Culture=neutral, PublicKeyToken=aa95f207798dfdb4, processorArchitecture=MSIL">
<HintPath>..\..\packages\Iesi.Collections.3.3.2.4000\lib\Net35\Iesi.Collections.dll</HintPath>
</Reference>
<Reference Include="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling, Version=6.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
<HintPath>..\..\packages\EnterpriseLibrary.TransientFaultHandling.6.0.1304.0\lib\portable-net45+win+wp8\Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data, Version=6.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
<HintPath>..\..\packages\EnterpriseLibrary.TransientFaultHandling.Data.6.0.1304.1\lib\NET45\Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.Data.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="NHibernate, Version=3.3.1.4000, Culture=neutral, PublicKeyToken=aa95f207798dfdb4, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\..\packages\NHibernate.3.3.2.4000\lib\Net35\NHibernate.dll</HintPath>
@@ -90,6 +98,7 @@
<Compile Include="..\..\SolutionVersion.cs">
<Link>Properties\SolutionVersion.cs</Link>
</Compile>
<Compile Include="TransientErrorDetectionStrategy.cs" />
<Compile Include="orm\ScriptsRunErrorMapping.cs" />
<Compile Include="orm\ScriptsRunMapping.cs" />
<Compile Include="orm\VersionMapping.cs" />
@@ -1,19 +1,15 @@
using System;
using roundhouse.infrastructure.app;
using roundhouse.infrastructure.logging;
namespace roundhouse.databases
namespace roundhouse.databases
{
using System.Collections.Generic;
using System.Data;
using System.Data.Common;
using System.Data.SqlClient;
using connections;
using infrastructure.app;
using infrastructure.logging;
using parameters;
public abstract class AdoNetDatabase : DefaultDatabase<IDbConnection>
{
private const int sql_connection_exception_number = 233;
private bool split_batches_in_ado = true;
public override bool split_batch_statements
@@ -26,7 +22,7 @@ public override bool split_batch_statements
private DbProviderFactory provider_factory;
private AdoNetConnection GetAdoNetConnection(string conn_string)
protected virtual AdoNetConnection GetAdoNetConnection(string conn_string)
{
provider_factory = DbProviderFactories.GetFactory(provider);
IDbConnection connection = provider_factory.CreateConnection();
@@ -57,7 +53,6 @@ public override void close_admin_connection()
admin_connection.Dispose();
admin_connection = null;
}
}
public override void open_connection(bool with_transaction)
@@ -122,40 +117,13 @@ protected override void run_sql(string sql_to_run, ConnectionType connection_typ
{
if (string.IsNullOrEmpty(sql_to_run)) return;
//really naive retry logic. Consider Lokad retry policy
//this is due to sql server holding onto a connection http://social.msdn.microsoft.com/Forums/en-US/adodotnetdataproviders/thread/99963999-a59b-4614-a1b9-869c6dff921e
try
{
run_command_with(sql_to_run, connection_type, parameters);
}
catch (SqlException ex)
if (transaction == null)
{
// If we are not running inside a transaction, then we can continue to the next command.
if (transaction == null)
{
// But only if it's a connection failure AND connection failure is the only error reported.
if (ex.Errors.Count == 1 && ex.Number == sql_connection_exception_number)
{
Log.bound_to(this).log_a_debug_event_containing("Failure executing command, trying again. {0}{1}", Environment.NewLine, ex.ToString());
run_command_with(sql_to_run, connection_type, parameters);
}
else
{
//Re-throw the original exception.
throw;
}
}
else
{
// Re-throw the exception, which will delegate handling of the rollback to DatabaseMigrator calling class,
// e.g. DefaultDatabaseMigrator.run_sql(...) method catches exceptions from run_sql and rolls back the transaction.
throw;
}
retry_policy.ExecuteAction(() => run_command_with(sql_to_run, connection_type, parameters));
}
catch (Exception ex)
else
{
// If the Exception is not due to a SqlException, which is the case for any non-SqlServer database, then also delegate handling of the rollback to DatabaseMigrator calling class.
throw;
run_command_with(sql_to_run, connection_type, parameters);
}
}
@@ -164,7 +132,6 @@ private void run_command_with(string sql_to_run, ConnectionType connection_type,
using (IDbCommand command = setup_database_command(sql_to_run, connection_type, parameters))
{
command.ExecuteNonQuery();
command.Dispose();
}
}
@@ -175,8 +142,14 @@ protected override object run_sql_scalar(string sql_to_run, ConnectionType conne
using (IDbCommand command = setup_database_command(sql_to_run, connection_type, null))
{
return_value = command.ExecuteScalar();
command.Dispose();
if (transaction == null)
{
return_value = retry_policy.ExecuteAction(() => command.ExecuteScalar());
}
else
{
return_value = command.ExecuteScalar();
}
}
return return_value;
Oops, something went wrong.

0 comments on commit de1e8d4

Please sign in to comment.