Skip to content

Commit

Permalink
Merge pull request #898 from globocom/autoupgrade-fix-errors-take-sna…
Browse files Browse the repository at this point in the history
…pshot

AutoUpgrade - Fix take_snapshot error handling
  • Loading branch information
nimbfire committed May 31, 2023
2 parents 6001b9c + 01f46a7 commit a60888e
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 49 deletions.
95 changes: 48 additions & 47 deletions dbaas/backup/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,70 +230,71 @@ def make_instance_gcp_snapshot_backup(

snapshot_final_status = Snapshot.SUCCESS

locked = None
client = None
driver = infra.get_driver()
client = driver.get_client(instance)
try:
client = driver.get_client(instance)
locked = lock_instance(driver, instance, client)
if not locked:
snapshot_final_status = Snapshot.WARNING

if 'MySQL' in type(driver).__name__:
mysql_binlog_save(client, instance)

has_snapshot = Snapshot.objects.filter(
has_snapshot_with_status_warning = Snapshot.objects.filter(
status=Snapshot.WARNING, instance=instance, end_at__year=datetime.now().year,
end_at__month=datetime.now().month, end_at__day=datetime.now().day
)
backup_hour_list = Configuration.get_by_name_as_list('make_database_backup_hour')
if not snapshot_final_status == Snapshot.WARNING and not has_snapshot:
cont = 0
for _ in range(backup_retry_attempts):
cont += 1
try:
code = 201
response, data = provider.new_take_snapshot(persist=persist)

if response.status_code < 400:
break
if has_snapshot_with_status_warning:
raise Exception("Backup with WARNING already created today.")

if cont >= 3:
raise IndexError
if snapshot_final_status == Snapshot.WARNING:
raise Exception("Snapshot has status WARNING, check the logs.")

except IndexError as e:
response, content = e
if response.status_code == 503:
errormsg = "{} - 503 error creating snapshot for instance: {}. It will try again in 30 seconds. ".format(
strftime("%d/%m/%Y %H:%M:%S"), instance
)
LOG.error(errormsg)
if task:
task.add_detail(errormsg)
sleep(30)
else:
raise e
cont = 0
for _ in range(backup_retry_attempts):
cont += 1
try:
code = 201
response, data = provider.new_take_snapshot(persist=persist)

if response.status_code < 400:
while code != 200:
sleep(20)
snap_response, snap_status = provider.take_snapshot_status(data['identifier'])
if snap_response.status_code in [200, 202]:
unlock_instance(driver, instance, client)
if snap_response.status_code == 200:
break
if snap_response.status_code >= 400:
raise error
code = snap_response.status_code

snapshot.done(snap_status)
snapshot.save()
else:
errormsg = response['message']
set_backup_error(infra, snapshot, errormsg)
if response.status_code < 400:
break

if cont >= 3:
raise IndexError

except IndexError as e:
response, content = e
if response.status_code == 503:
errormsg = "{} - 503 error creating snapshot for instance: {}. It will try again in 30 seconds. ".format(
strftime("%d/%m/%Y %H:%M:%S"), instance
)
LOG.error(errormsg)
if task:
task.add_detail(errormsg)
sleep(30)
else:
raise e

if response.status_code < 400:
while code != 200:
sleep(20)
snap_response, snap_status = provider.take_snapshot_status(data['identifier'])
if snap_response.status_code in [200, 202]:
unlock_instance(driver, instance, client)
if snap_response.status_code == 200:
break
if snap_response.status_code >= 400:
raise error
code = snap_response.status_code

snapshot.done(snap_status)
snapshot.save()
else:
if str(current_hour) in backup_hour_list:
raise Exception("Backup with WARNING already created today.")
errormsg = response['message']
set_backup_error(infra, snapshot, errormsg)
raise Exception(errormsg)

except Exception as e:
errormsg = "Error creating snapshot: {}".format(e)
Expand Down
9 changes: 7 additions & 2 deletions dbaas/maintenance/task_auto_upgrade_vm_offering.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def task_auto_upgrade_vm_offering(database, task, retry_from=None, resize_target
instances = infra.get_driver().get_database_instances() # nao traz a instance do arbitro (mongodb)

LOG.debug("Instances : %s", instances)
create_temporary_instance = False

temporary_instance = None
for instance in instances:
Expand All @@ -62,11 +63,15 @@ def task_auto_upgrade_vm_offering(database, task, retry_from=None, resize_target
if temporary_instance is None: # traz instances temporarias se n estiver como "database"
temporary_instances = infra.get_driver().get_temporary_instances()
LOG.debug("Temporary Instances: %s", temporary_instances)
instances.extend(infra.get_driver().get_temporary_instances())
if len(temporary_instances) != 0: # se encontrar temporary instances, adiciona elas a execucao
instances.extend(infra.get_driver().get_temporary_instances())
else: # se nao, pede pra criar uma nova
create_temporary_instance = True

last_vm_created = number_of_instances_before_task

if not retry_from:
if create_temporary_instance: # se precisar criar nova temporary instance, cria
LOG.info("Creating temporary instance")
for i in range(number_of_instances):
instance = None
last_vm_created += 1
Expand Down

0 comments on commit a60888e

Please sign in to comment.