Skip to content

Commit

Permalink
Added regions, renamed sections districts, and cleaned up the data on…
Browse files Browse the repository at this point in the history
… import
  • Loading branch information
coderholic committed Nov 13, 2010
1 parent 0196925 commit 82bb511
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 36 deletions.
14 changes: 10 additions & 4 deletions README
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
django-cities - Place models and data for Django apps
=====================================================

This project includes country, city, and section models and is
This project includes country, region, city, and district models and is
prepopulated with data from GeoNames. The GeoNames import script
is also included if you'd like to re-import the data yourself.
is also included if you'd like to re-import the data yourself, or change
the way in which it gets cleaned up.

Includes 248 counties, 98245 cities and 743 sections (boroughs and districts)
Includes 234 counties, 2,610 regions, 97,949 cities and 606 districts

Examples:
=========
Finding all London boroughs:

>>> london = City.objects.filter(country__name='United Kingdom').get(name='London')
>>> boroughs = Section.objects.filter(city=london)
>>> boroughs = Districts.objects.filter(city=london)

Nearest city to a given lat,lon:

>>> City.objects.nearest_to(51, 1)
<City: Dymchurch, Kent, United Kingdom>

5 Nearest cities to London:

Expand Down
2 changes: 1 addition & 1 deletion cities/fixtures/initial_data.json

Large diffs are not rendered by default.

141 changes: 122 additions & 19 deletions cities/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
GeoNames city data import script. Requires the following files:
- http://download.geonames.org/export/dump/countryInfo.txt
- http://download.geonames.org/export/dump/admin1Codes.txt
- http://download.geonames.org/export/dump/cities1000.zip
Based on Richard Crowley's Django Shell Script https://gist.github.com/79156
Expand All @@ -21,6 +22,7 @@
from django.template.defaultfilters import slugify
from cities.models import *
import codecs
from django.db.models import Count

def import_countries():
for line in open("countryInfo.txt"):
Expand All @@ -31,21 +33,47 @@ def import_countries():
country = Country()
country.code = items[0]
country.name = items[4]
country.slug = slugify(country.name)
country.population = items[7]
country.continent = items[8]
country.tld = items[9]
country.save()
country.tld = items[9][1:] # strip the leading .

print "Added country %s" % country.name
# Some smaller countries share a TLD. Save the one with the biggest population
existing = Country.objects.filter(tld=country.tld)
if existing.count():
existing = existing[0]
if existing.population < country.population:
existing.delete()
country.save()
print "Replaced country %s with %s" % (existing.name, country.name)
else:
country.save()
print "Added country %s %s" % (country.name, country.code)

def import_regions():
for line in codecs.open("admin1Codes.txt", "r", "utf-8"):
if line[0] == "#":
continue

items = line.split("\t")
region = Region()
region.code = items[0]
region.name = items[1].strip()
region.slug = slugify(region.name)
try:
region.country = Country.objects.get(code=region.code[:2])
except:
print "Cannot find country %s - skipping" % region.code[:2]
continue

region.save()
print "Added region %s" % (region.name,)

def import_cities():
for line in codecs.open("cities1000.txt", "r", "utf-8"):
if line[0] == "#":
continue

items = line.split("\t")
print items
admin_type = items[11]
type = items[7]

Expand All @@ -57,11 +85,48 @@ def import_cities():
city.slug = slugify(city.name)
city.location = Point(float(items[4]), float(items[5]))
city.population = items[14]
city.country = Country.objects.get(code=items[8])
city.save()
print "Added city %s" % city

def import_sections():
region = None
if items[11].strip():
try:
code = "%s.%s" % (items[8], items[11]) # Try more specific region first
region = Region.objects.get(code=code.strip())
except:
pass

if not region:
try:
code = "%s.%s" % (items[8], items[10])
region = Region.objects.get(code=code.strip())
except:
print "Cannot find region %s for %s - skipping" % (code, city.name)
continue
city.region = region
try:
city.save()
except:
continue
#print "Added city %s" % city

def fix_regions():
"""Some large cities are placed in their own region. Fix those"""
regions = Region.objects.annotate(count=Count('city')).filter(count=1)
for r in regions:
city = r.city_set.all()[0]
try:
nearest_cities = City.objects.filter(region__country=r.country).annotate(count=Count('region__city')).filter(count__gt=1).distance(city.location).order_by('distance')[:4] # 0 would be the same city, 1 is the nearest
nearest_regions = {}
for c in nearest_cities:
nearest_regions[c.region] = 1 + nearest_regions.get(c.region, 0)
nearest_regions = sorted(nearest_regions.iteritems(), key=lambda (k,v): (v,k))
nearest_regions.reverse()
nearest_region = nearest_regions[0][0]
#print "Would move %s from %s ==> %s" % (city.name, r, nearest_region)
city.region = nearest_region
city.save()
except:
pass
def import_districts():
for line in codecs.open("cities1000.txt", "r", "utf-8"):
if line[0] == "#":
continue
Expand All @@ -73,15 +138,53 @@ def import_sections():

# See http://www.geonames.org/export/codes.html
if type == 'PPLX' or (admin_type == 'GLA' and type != 'PPLC'):
section = Section()
section.id = items[0]
section.name = items[1]
section.slug = slugify(section.name)
section.location = Point(float(items[4]), float(items[5]))
section.population = items[14]
section.city = City.objects.filter(population__gt=150000).distance(section.location).order_by('distance')[0] # Set the nearest city
section.save()
print "Added section %s" % section
district = District()
district.id = items[0]
district.name = items[1]
district.slug = slugify(district.name)
district.location = Point(float(items[4]), float(items[5]))
district.population = items[14]
if admin_type == 'GLA':
district.city = City.objects.filter(name='London').order_by('-population')[0] # Set city to London, UK
else:
district.city = City.objects.filter(population__gt=125000).distance(district.location).order_by('distance')[0] # Set the nearest city
district.save()
print "Added district %s" % district

def cleanup():
""" Delete all countries and regions that don't have any children, and any districts that are single children"""

# Fix places in "United Kingdom (general)
r = Region.objects.get(name='United Kingdom (general)')
for city in r.city_set.all():
try:
nearest_cities = City.objects.filter(region__country=r.country).distance(city.location).exclude(region=r).order_by('distance')[:5] # 0 would be the same city, 1 is the nearest
nearest_regions = {}
for c in nearest_cities:
nearest_regions[c.region] = 1 + nearest_regions.get(c.region, 0)
nearest_regions = sorted(nearest_regions.iteritems(), key=lambda (k,v): (v,k))
nearest_regions.reverse()
nearest_region = nearest_regions[0][0]
print "Moving %s to %s ==> %s" % (city.name, r, nearest_region)
city.region = nearest_region
city.save()
except:
pass


single_districts = District.objects.annotate(count=Count('city__district')).filter(count=1)
single_districts.delete()

empty_regions = Region.objects.filter(city__isnull=True)
empty_regions.delete()

empty_countries = Country.objects.filter(region__isnull=True)
empty_countries.delete()

if '__main__' == __name__:
import_sections()
import_countries()
import_regions()
import_cities()
import_districts()
fix_regions()
cleanup()
66 changes: 54 additions & 12 deletions cities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,86 @@
from django.contrib.gis.geos import Point
from django.contrib import admin

# Create your models here.
class Country(models.Model):
name = models.CharField(max_length = 200)
slug = models.CharField(max_length = 200, unique=True)
code = models.CharField(max_length = 2, db_index=True)
population = models.IntegerField()
continent = models.CharField(max_length = 2)
tld = models.CharField(max_length = 10)
tld = models.CharField(max_length = 5, unique=True)

objects = models.GeoManager()

def __unicode__(self):
return self.name

class City(models.Model):
@property
def hierarchy(self):
return [self]

class Region(models.Model):
name = models.CharField(max_length = 200)
slug = models.CharField(max_length = 200, db_index=True)
code = models.CharField(max_length = 10, db_index=True)
country = models.ForeignKey(Country)
objects = models.GeoManager()

def __unicode__(self):
return "%s, %s" % (self.name, self.country)

@property
def hierarchy(self):
list = self.country.hierarchy
list.append(self)
return list

class CityManager(models.GeoManager):
def nearest_to(self, lat, lon):
p = Point(float(lat), float(lon))
return self.nearest_to_point(p)

def nearest_to_point(self, point):
return self.distance(point).order_by('distance')[0]

class City(models.Model):
name = models.CharField(max_length = 200)
slug = models.CharField(max_length = 200, db_index=True)
region = models.ForeignKey(Region)
location = models.PointField()
population = models.IntegerField()

objects = models.GeoManager()
objects = CityManager()

def __unicode__(self):
return "%s, %s" % (self.name, self.country.name)
return "%s, %s" % (self.name, self.region)

class Section(models.Model):
@property
def hierarchy(self):
list = self.region.hierarchy
list.append(self)
return list

class District(models.Model):
name = models.CharField(max_length = 200)
slug = models.CharField(max_length = 200)
slug = models.CharField(max_length = 200, db_index=True)
city = models.ForeignKey(City)
location = models.PointField()
population = models.IntegerField()

objects = models.GeoManager()

def __unicode__(self):
return "%s, %s, %s" % (self.name, self.city.name, self.city.country.name)
return u"%s, %s" % (self.name, self.city)

@property
def hierarchy(self):
list = self.city.hierarchy
list.append(self)
return list

class SearchableAdmin(admin.ModelAdmin):
search_fields = ['name']

admin.site.register(Country)
admin.site.register(City)
admin.site.register(Section)
admin.site.register(Country, SearchableAdmin)
admin.site.register(Region, SearchableAdmin)
admin.site.register(City, SearchableAdmin)
admin.site.register(District, SearchableAdmin)

0 comments on commit 82bb511

Please sign in to comment.