Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: django/django
...
head fork: akaariai/django
compare: ticket_17788
Checking mergeability… Don’t worry, you can still create the pull request.
  • 2 commits
  • 8 files changed
  • 0 commit comments
  • 1 contributor
Commits on Jun 04, 2012
@akaariai akaariai Fixed #17788 -- Added a batch_size argument for qs.bulk_create()
The qs.bulk_create() method did not work with large batches together
with SQLite3. This commit added a way to split the bulk into smaller
batches. The default batch size is unlimited except for SQLite3 where
the batch size is limited to 1000 SQL parameters per batch.

Thanks to everybody who participated in the discussion at Trac.
Special thanks to alpar for the report.
d4eacac
@akaariai akaariai Fixed review issues noted by claudep. Refs #17788.
The issues fixed were some comment issues, use of '//' operator instead
of '/' operator. In addition, added some defensive programming
enchantments for division by zero and zero sized batches.
332fb93
View
30 django/db/backends/__init__.py
@@ -474,6 +474,14 @@ def autoinc_sql(self, table, column):
This SQL is executed when a table is created.
"""
return None
+
+ def bulk_batch_size(self, fields, objs):
+ """
+ Returns the maximum allowed batch size for the backend. The fields
+ are the fields going to be inserted in the batch, the objs contains
+ all the objects to be inserted.
+ """
+ return len(objs)
def date_extract_sql(self, lookup_type, field_name):
"""
@@ -511,6 +519,17 @@ def deferrable_sql(self):
during a CREATE TABLE statement.
"""
return ''
+
+ def distinct_sql(self, fields):
+ """
+ Returns an SQL DISTINCT clause which removes duplicate rows from the
+ result set. If any fields are given, only the given fields are being
+ checked for duplicates.
+ """
+ if fields:
+ raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
+ else:
+ return 'DISTINCT'
def drop_foreignkey_sql(self):
"""
@@ -567,17 +586,6 @@ def fulltext_search_sql(self, field_name):
"""
raise NotImplementedError('Full-text search is not implemented for this database backend')
- def distinct_sql(self, fields):
- """
- Returns an SQL DISTINCT clause which removes duplicate rows from the
- result set. If any fields are given, only the given fields are being
- checked for duplicates.
- """
- if fields:
- raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
- else:
- return 'DISTINCT'
-
def last_executed_query(self, cursor, sql, params):
"""
Returns a string of the query last executed by the given cursor, with
View
9 django/db/backends/sqlite3/base.py
@@ -83,7 +83,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
supports_1000_query_parameters = False
supports_mixed_date_datetime_comparisons = False
has_bulk_insert = True
- can_combine_inserts_with_and_without_auto_increment_pk = True
+ can_combine_inserts_with_and_without_auto_increment_pk = False
def _supports_stddev(self):
"""Confirm support for STDDEV and related stats functions
@@ -104,6 +104,13 @@ def _supports_stddev(self):
return has_support
class DatabaseOperations(BaseDatabaseOperations):
+ def bulk_batch_size(self, fields, objs):
+ """
+ SQLite has a compile-time default (SQLITE_LIMIT_VARIABLE_NUMBER) of
+ 999 variables per query.
+ """
+ return (999 // len(fields)) if len(fields) > 0 else len(objs)
+
def date_extract_sql(self, lookup_type, field_name):
# sqlite doesn't support extract, so we fake it with the user-defined
# function django_extract that's registered in connect(). Note that
View
29 django/db/models/query.py
@@ -388,7 +388,7 @@ def create(self, **kwargs):
obj.save(force_insert=True, using=self.db)
return obj
- def bulk_create(self, objs):
+ def bulk_create(self, objs, batch_size=None):
"""
Inserts each of the instances into the database. This does *not* call
save() on each of the instances, does not send any pre/post save
@@ -401,8 +401,10 @@ def bulk_create(self, objs):
# this could be implemented if you didn't have an autoincrement pk,
# and 2) you could do it by doing O(n) normal inserts into the parent
# tables to get the primary keys back, and then doing a single bulk
- # insert into the childmost table. We're punting on these for now
- # because they are relatively rare cases.
+ # insert into the childmost table. Some databases might allow doing
+ # this by using RETURNING clause for the insert query. We're punting
+ # on these for now because they are relatively rare cases.
+ assert batch_size is None or batch_size > 0
if self.model._meta.parents:
raise ValueError("Can't bulk create an inherited model")
if not objs:
@@ -418,13 +420,14 @@ def bulk_create(self, objs):
try:
if (connection.features.can_combine_inserts_with_and_without_auto_increment_pk
and self.model._meta.has_auto_field):
- self.model._base_manager._insert(objs, fields=fields, using=self.db)
+ self._batched_insert(objs, fields, batch_size)
else:
objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)
if objs_with_pk:
- self.model._base_manager._insert(objs_with_pk, fields=fields, using=self.db)
+ self._batched_insert(objs_with_pk, fields, batch_size)
if objs_without_pk:
- self.model._base_manager._insert(objs_without_pk, fields=[f for f in fields if not isinstance(f, AutoField)], using=self.db)
+ fields= [f for f in fields if not isinstance(f, AutoField)]
+ self._batched_insert(objs_without_pk, fields, batch_size)
if forced_managed:
transaction.commit(using=self.db)
else:
@@ -860,6 +863,20 @@ def db(self):
###################
# PRIVATE METHODS #
###################
+ def _batched_insert(self, objs, fields, batch_size):
+ """
+ A little helper method for bulk_insert to insert the bulk one batch
+ at a time. Inserts recursively a batch from the front of the bulk and
+ then _batched_insert() the remaining objects again.
+ """
+ if not objs:
+ return
+ ops = connections[self.db].ops
+ batch_size = (batch_size or max(ops.bulk_batch_size(fields, objs), 1))
+ for batch in [objs[i:i+batch_size]
+ for i in range(0, len(objs), batch_size)]:
+ self.model._base_manager._insert(batch, fields=fields,
+ using=self.db)
def _clone(self, klass=None, setup=False, **kwargs):
if klass is None:
View
20 docs/ref/models/querysets.txt
@@ -1349,7 +1349,7 @@ has a side effect on your data. For more, see `Safe methods`_ in the HTTP spec.
bulk_create
~~~~~~~~~~~
-.. method:: bulk_create(objs)
+.. method:: bulk_create(objs, batch_size=None)
.. versionadded:: 1.4
@@ -1371,20 +1371,12 @@ This has a number of caveats though:
* If the model's primary key is an :class:`~django.db.models.AutoField` it
does not retrieve and set the primary key attribute, as ``save()`` does.
-.. admonition:: Limits of SQLite
+The ``batch_size`` parameter controls how many objects are created in single
+query. The default is to create all objects in one batch, except for SQLite
+where the default is such that at maximum 999 variables per query is used.
- SQLite sets a limit on the number of parameters per SQL statement. The
- maximum is defined by the SQLITE_MAX_VARIABLE_NUMBER_ compilation option,
- which defaults to 999. For instance, if your model has 8 fields (including
- the primary key), you cannot create more than 999 // 8 = 124 instances at
- a time. If you exceed this limit, you'll get an exception::
-
- django.db.utils.DatabaseError: too many SQL variables
-
- If your application's performance requirements exceed SQLite's limits, you
- should switch to another database engine, such as PostgreSQL.
-
-.. _SQLITE_MAX_VARIABLE_NUMBER: http://sqlite.org/limits.html#max_variable_number
+.. versionadded:: 1.5
+ The ``batch_size`` parameter was added in version 1.5.
count
~~~~~
View
5 docs/releases/1.5.txt
@@ -85,6 +85,11 @@ Django 1.5 also includes several smaller improvements worth noting:
* In the localflavor for Canada, "pq" was added to the acceptable codes for
Quebec. It's an old abbreviation.
+* :meth:`QuerySet.bulk_create()
+ <django.db.models.query.QuerySet.bulk_create>` has now a batch_size
+ argument. By default the batch_size is unlimited except for SQLite where
+ single batch is limited so that 1000 parameters per query isn't exceeded.
+
Backwards incompatible changes in 1.5
=====================================
View
6 tests/regressiontests/bulk_create/models.py
@@ -18,4 +18,8 @@ class Pizzeria(Restaurant):
pass
class State(models.Model):
- two_letter_code = models.CharField(max_length=2, primary_key=True)
+ two_letter_code = models.CharField(max_length=2, primary_key=True)
+
+class TwoFields(models.Model):
+ f1 = models.IntegerField(unique=True)
+ f2 = models.IntegerField(unique=True)
View
45 tests/regressiontests/bulk_create/tests.py
@@ -2,9 +2,11 @@
from operator import attrgetter
-from django.test import TestCase, skipIfDBFeature, skipUnlessDBFeature
+from django.db import connection
+from django.test import TestCase, skipIfDBFeature
+from django.test.utils import override_settings
-from .models import Country, Restaurant, Pizzeria, State
+from .models import Country, Restaurant, Pizzeria, State, TwoFields
class BulkCreateTests(TestCase):
@@ -27,7 +29,6 @@ def test_simple(self):
self.assertEqual(created, [])
self.assertEqual(Country.objects.count(), 4)
- @skipUnlessDBFeature("has_bulk_insert")
def test_efficiency(self):
with self.assertNumQueries(1):
Country.objects.bulk_create(self.data)
@@ -69,3 +70,41 @@ def test_zero_as_autoval(self):
invalid_country = Country(id=0, name='Poland', iso_two_letter='PL')
with self.assertRaises(ValueError):
Country.objects.bulk_create([valid_country, invalid_country])
+
+ def test_large_batch(self):
+ with override_settings(DEBUG=True):
+ connection.queries = []
+ TwoFields.objects.bulk_create([
+ TwoFields(f1=i, f2=i+1) for i in range(0, 1001)
+ ])
+ self.assertTrue(len(connection.queries) < 10)
+ self.assertEqual(TwoFields.objects.count(), 1001)
+ self.assertEqual(
+ TwoFields.objects.filter(f1__gte=450, f1__lte=550).count(),
+ 101)
+ self.assertEqual(TwoFields.objects.filter(f2__gte=901).count(), 101)
+
+ def test_large_batch_mixed(self):
+ """
+ Test inserting a large batch with objects having primary key set
+ mixed together with objects without PK set.
+ """
+ with override_settings(DEBUG=True):
+ connection.queries = []
+ TwoFields.objects.bulk_create([
+ TwoFields(id=i if i % 2 == 0 else None, f1=i, f2=i+1)
+ for i in range(100000, 101000)])
+ self.assertTrue(len(connection.queries) < 10)
+ self.assertEqual(TwoFields.objects.count(), 1000)
+ # We can't assume much about the ID's created, except that the above
+ # created IDs must exists.
+ id_range = range(100000, 101000, 2)
+ self.assertEqual(TwoFields.objects.filter(id__in=id_range).count(), 500)
+
+ def test_explicit_batch_size(self):
+ objs = [TwoFields(f1=i, f2=i) for i in range(0, 100)]
+ with self.assertNumQueries(2):
+ TwoFields.objects.bulk_create(objs, 50)
+ TwoFields.objects.all().delete()
+ with self.assertNumQueries(1):
+ TwoFields.objects.bulk_create(objs, len(objs))
View
3  tests/regressiontests/queries/tests.py
@@ -1863,8 +1863,7 @@ def test_ticket14244(self):
# Test that the "in" lookup works with lists of 1000 items or more.
Number.objects.all().delete()
numbers = range(2500)
- for num in numbers:
- _ = Number.objects.create(num=num)
+ Number.objects.bulk_create(Number(num=num) for num in numbers)
self.assertEqual(
Number.objects.filter(num__in=numbers[:1000]).count(),
1000

No commit comments for this range

Something went wrong with that request. Please try again.