From c78cbde96754275304412477287e941160ed0787 Mon Sep 17 00:00:00 2001 From: Philip Mateescu Date: Sun, 4 Dec 2011 17:22:57 -0600 Subject: [PATCH] id fields - untested --- couchdbexporter.py | 3 --- discogs.sql | 38 ++++++++++++++++-------------- discogsartistparser.py | 4 ++-- discogsreleaseparser.py | 5 ++-- model.py | 4 +++- mongodbexporter.py | 3 --- postgresexporter.py | 52 +++++++++++++++++++++-------------------- 7 files changed, 54 insertions(+), 55 deletions(-) diff --git a/couchdbexporter.py b/couchdbexporter.py index 773a2ab..8ffa186 100644 --- a/couchdbexporter.py +++ b/couchdbexporter.py @@ -32,14 +32,11 @@ def finish(self, completely_done = False): pass def storeLabel(self, label): - label.id = label.name self.execute(label) def storeArtist(self, artist): - artist.id = artist.name self.execute(artist) def storeRelease(self, release): - release.id = release.discogs_id self.execute(release) diff --git a/discogs.sql b/discogs.sql index f430e9a..c54632d 100644 --- a/discogs.sql +++ b/discogs.sql @@ -19,6 +19,7 @@ SET default_with_oids = false; -- CREATE TABLE artist ( + id integer NOT NULL, name text NOT NULL, realname text, urls text[], @@ -37,7 +38,7 @@ CREATE TABLE artist ( CREATE TABLE artists_images ( image_uri text, - artist_name text + artist_id integer ); @@ -89,6 +90,7 @@ CREATE TABLE image ( -- CREATE TABLE label ( + id integer NOT NULL, name text NOT NULL, contactinfo text, profile text, @@ -104,7 +106,7 @@ CREATE TABLE label ( CREATE TABLE labels_images ( image_uri text, - label_name text + label_id integer ); @@ -113,7 +115,7 @@ CREATE TABLE labels_images ( -- CREATE TABLE release ( - discogs_id integer NOT NULL, + id integer NOT NULL, status text, title text, country text, @@ -130,7 +132,7 @@ CREATE TABLE release ( CREATE TABLE releases_artists ( artist_name text, - discogs_id integer + release_id integer ); @@ -142,7 +144,7 @@ CREATE TABLE releases_artists_joins ( artist1 text, artist2 text, join_relation text, - discogs_id integer + release_id integer ); @@ -151,7 +153,7 @@ CREATE TABLE releases_artists_joins ( -- CREATE TABLE releases_extraartists ( - discogs_id integer, + release_id integer, artist_name text, roles text[] ); @@ -162,7 +164,7 @@ CREATE TABLE releases_extraartists ( -- CREATE TABLE releases_formats ( - discogs_id integer, + release_id integer, format_name text, qty integer, descriptions text[] @@ -175,7 +177,7 @@ CREATE TABLE releases_formats ( CREATE TABLE releases_images ( image_uri text, - discogs_id integer + release_id integer ); @@ -185,7 +187,7 @@ CREATE TABLE releases_images ( CREATE TABLE releases_labels ( label text, - discogs_id integer, + release_id integer, catno text ); @@ -204,7 +206,7 @@ CREATE TABLE role ( -- CREATE TABLE track ( - discogs_id integer, + release_id integer, title text, duration text, "position" text, @@ -261,7 +263,7 @@ CREATE TABLE tracks_extraartists_roles ( -- ALTER TABLE ONLY artist - ADD CONSTRAINT artist_pkey PRIMARY KEY (name); + ADD CONSTRAINT artist_pkey PRIMARY KEY (id); -- @@ -293,7 +295,7 @@ ALTER TABLE ONLY image -- ALTER TABLE ONLY label - ADD CONSTRAINT label_pkey PRIMARY KEY (name); + ADD CONSTRAINT label_pkey PRIMARY KEY (id); -- @@ -301,7 +303,7 @@ ALTER TABLE ONLY label -- ALTER TABLE ONLY release - ADD CONSTRAINT release_pkey PRIMARY KEY (discogs_id); + ADD CONSTRAINT release_pkey PRIMARY KEY (id); -- @@ -325,7 +327,7 @@ ALTER TABLE ONLY artists_images -- ALTER TABLE ONLY releases_labels - ADD CONSTRAINT foreign_did FOREIGN KEY (discogs_id) REFERENCES release(discogs_id); + ADD CONSTRAINT foreign_did FOREIGN KEY (release_id) REFERENCES release(id); -- @@ -345,11 +347,11 @@ ALTER TABLE ONLY labels_images -- --- Name: releases_formats_discogs_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: releases_formats_release_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - -- ALTER TABLE ONLY releases_formats - ADD CONSTRAINT releases_formats_discogs_id_fkey FOREIGN KEY (discogs_id) REFERENCES release(discogs_id); + ADD CONSTRAINT releases_formats_release_id_fkey FOREIGN KEY (release_id) REFERENCES release(id); -- @@ -361,11 +363,11 @@ ALTER TABLE ONLY releases_formats -- --- Name: releases_images_discogs_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: releases_images_release_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - -- ALTER TABLE ONLY releases_images - ADD CONSTRAINT releases_images_discogs_id_fkey FOREIGN KEY (discogs_id) REFERENCES release(discogs_id); + ADD CONSTRAINT releases_images_release_id_fkey FOREIGN KEY (release_id) REFERENCES release(id); -- diff --git a/discogsartistparser.py b/discogsartistparser.py index eca49c7..51f7671 100644 --- a/discogsartistparser.py +++ b/discogsartistparser.py @@ -72,6 +72,8 @@ def endDocument(self): def endElement(self, name): self.buffer = self.buffer.strip() + if name == 'id': + self.artist.id = int(a) if name == 'name': if len(self.buffer) != 0: if self.inElement['namevariations']: @@ -101,8 +103,6 @@ def endElement(self, name): else: self.artist.urls['other'].append(self.buffer) ''' - elif name == 'id': - self.artist.artist_id = self.buffer elif name == "artist": self.exporter.storeArtist(self.artist) diff --git a/discogsreleaseparser.py b/discogsreleaseparser.py index 63a654a..05778ed 100644 --- a/discogsreleaseparser.py +++ b/discogsreleaseparser.py @@ -78,7 +78,7 @@ def startElement(self, name, attrs): self.stack.append(name) if name == 'release': self.release = model.Release() - self.release.discogs_id = attrs['id'] + self.release.id = attrs['id'] self.release.status = attrs['status'] elif name == 'track': self.release.tracklist.append(model.Track()) @@ -232,7 +232,6 @@ def endElement(self, name): #global releases #releases.append(self.release) #print releaseCounter - #print self.release.discogs_id #if releaseCounter > 1000: # self.endDocument() #print self.release.title @@ -250,7 +249,7 @@ def endElement(self, name): if len(releases) > 50: for release in releases: print "------------------------------------------------" - print "id, title, status: " + release.discogs_id + ", " + release.title + ", " + release.status + print "id, title, status: " + release.id + ", " + release.title + ", " + release.status print "country: " + release.country print "releasedate: " + release.released print "notes: " + release.notes diff --git a/model.py b/model.py index 363654d..b122a39 100644 --- a/model.py +++ b/model.py @@ -1,5 +1,6 @@ class Artist: def __init__(self): + self.id = 0 self.name = '' self.realname = '' self.images = [] @@ -15,7 +16,7 @@ def __init__(self): class Release: def __init__(self): - self.discogs_id = '' + self.id = 0 self.status = '' self.title = '' self.country = '' @@ -50,6 +51,7 @@ def __init__(self): class Label: def __init__(self): + self.id = 0 self.name = '' self.images = [] self.contactinfo = '' diff --git a/mongodbexporter.py b/mongodbexporter.py index bbc8c71..61e5845 100644 --- a/mongodbexporter.py +++ b/mongodbexporter.py @@ -121,17 +121,14 @@ def finish(self, completely_done=False): self.db.connection.disconnect() def storeLabel(self, label): - label.id = label.name label.l_name = label.name.lower() self.execute('labels', label) def storeArtist(self, artist): - artist.id = artist.name artist.l_name = artist.name.lower() self.execute('artists', artist) def storeRelease(self, release): - release.id = release.discogs_id release.l_artist = release.artist.lower() release.l_title = release.title.lower() self.execute('releases', release) diff --git a/postgresexporter.py b/postgresexporter.py index a88d070..98de0a2 100644 --- a/postgresexporter.py +++ b/postgresexporter.py @@ -58,8 +58,9 @@ def finish(self, completely_done = False): def storeLabel(self, label): values = [] + values.append(label.id) values.append(label.name) - columns = "name" + columns = "id,name" if len(label.contactinfo) != 0: values.append(label.contactinfo) @@ -102,13 +103,14 @@ def storeLabel(self, label): imgQuery = "INSERT INTO image(" + imgCols + ") VALUES(%s,%s,%s,%s,%s);" self.execute(imgQuery, imgValues) self.imgUris[img.uri] = True - self.execute("INSERT INTO labels_images(image_uri, label_name) VALUES(%s,%s);", (img.uri, label.name)) + self.execute("INSERT INTO labels_images(image_uri, label_id) VALUES(%s,%s);", (img.uri, label.id)) def storeArtist(self, artist): values = [] + value.append(artist.id) values.append(artist.name) - columns = "name" + columns = "id,name" if len(artist.realname) != 0: values.append(artist.realname) @@ -158,16 +160,16 @@ def storeArtist(self, artist): imgQuery = "INSERT INTO image(" + imgCols + ") VALUES(%s,%s,%s,%s,%s);" self.execute(imgQuery, imgValues) self.imgUris[img.uri] = True - self.execute("INSERT INTO artists_images(image_uri, artist_name) VALUES(%s,%s);", (img.uri, artist.name)) + self.execute("INSERT INTO artists_images(image_uri, artist_id) VALUES(%s,%s);", (img.uri, artist.id)) def storeRelease(self, release): values = [] - values.append(release.discogs_id) + values.append(release.id) values.append(release.title) values.append(release.status) - columns = "discogs_id, title, status" + columns = "id, title, status" if len(release.country) != 0: values.append(release.country) @@ -214,8 +216,8 @@ def storeRelease(self, release): imgQuery = "INSERT INTO image(" + imgCols + ") VALUES(%s,%s,%s,%s,%s);" self.execute(imgQuery, imgValues) self.imgUris[img.uri] = True - self.execute("INSERT INTO releases_images(image_uri, discogs_id) VALUES(%s,%s);", - (img.uri, release.discogs_id)) + self.execute("INSERT INTO releases_images(image_uri, release_id) VALUES(%s,%s);", + (img.uri, release.id)) for fmt in release.formats: if len(release.formats) != 0: if not fmt.name in self.formatNames: @@ -224,44 +226,44 @@ def storeRelease(self, release): self.execute("INSERT INTO format(name) VALUES(%s);", (fmt.name, )) except PostgresExporter.ExecuteError, e: print "%s" % (e.args) - query = "INSERT INTO releases_formats(discogs_id, format_name, qty, descriptions) VALUES(%s,%s,%s,%s);" - self.execute(query, (release.discogs_id, fmt.name, fmt.qty, fmt.descriptions)) - labelQuery = "INSERT INTO releases_labels(discogs_id, label, catno) VALUES(%s,%s,%s);" + query = "INSERT INTO releases_formats(release_id, format_name, qty, descriptions) VALUES(%s,%s,%s,%s);" + self.execute(query, (release.id, fmt.name, fmt.qty, fmt.descriptions)) + labelQuery = "INSERT INTO releases_labels(release_id, label, catno) VALUES(%s,%s,%s);" for lbl in release.labels: - self.execute(labelQuery, (release.discogs_id, lbl.name, lbl.catno)) + self.execute(labelQuery, (release.id, lbl.name, lbl.catno)) if len(release.artists) > 1: for artist in release.artists: - query = "INSERT INTO releases_artists(discogs_id, artist_name) VALUES(%s,%s);" - self.execute(query, (release.discogs_id, artist)) + query = "INSERT INTO releases_artists(release_id, artist_name) VALUES(%s,%s);" + self.execute(query, (release.id, artist)) for aj in release.artistJoins: query = """INSERT INTO releases_artists_joins - (discogs_id, join_relation, artist1, artist2) + (release_id, join_relation, artist1, artist2) VALUES(%s,%s,%s,%s);""" artistIdx = release.artists.index(aj.artist1) + 1 #The last join relation is not between artists but instead #something like "Bob & Alice 'PRESENTS' - Cryptographic Tunes": if artistIdx >= len(release.artists): - values = (release.discogs_id, aj.join_relation, '', '') # join relation is between all artists and the album + values = (release.id, aj.join_relation, '', '') # join relation is between all artists and the album else: - values = (release.discogs_id, aj.join_relation, aj.artist1, release.artists[artistIdx]) + values = (release.id, aj.join_relation, aj.artist1, release.artists[artistIdx]) self.execute(query, values) else: if len(release.artists) == 0: # use anv if no artist name - self.execute("INSERT INTO releases_artists(discogs_id, artist_name) VALUES(%s,%s);", - (release.discogs_id, release.anv)) + self.execute("INSERT INTO releases_artists(release_id, artist_name) VALUES(%s,%s);", + (release.id, release.anv)) else: - self.execute("INSERT INTO releases_artists(discogs_id, artist_name) VALUES(%s,%s);", - (release.discogs_id, release.artists[0])) + self.execute("INSERT INTO releases_artists(release_id, artist_name) VALUES(%s,%s);", + (release.id, release.artists[0])) for extr in release.extraartists: - self.execute("INSERT INTO releases_extraartists(discogs_id, artist_name, roles) VALUES(%s,%s,%s);", - (release.discogs_id, extr.name, extr.roles)) + self.execute("INSERT INTO releases_extraartists(release_id, artist_name, roles) VALUES(%s,%s,%s);", + (release.id, extr.name, extr.roles)) for trk in release.tracklist: trackid = str(uuid.uuid4()) - self.execute("INSERT INTO track(discogs_id, title, duration, position, track_id) VALUES(%s,%s,%s,%s,%s);", - (release.discogs_id, trk.title, trk.duration, trk.position, trackid)) + self.execute("INSERT INTO track(release_id, title, duration, position, track_id) VALUES(%s,%s,%s,%s,%s);", + (release.id, trk.title, trk.duration, trk.position, trackid)) for artist in trk.artists: query = "INSERT INTO tracks_artists(track_id, artist_name) VALUES(%s,%s);" self.execute(query, (trackid, artist))