Skip to content

Commit

Permalink
Divide project and document models into 3 models
Browse files Browse the repository at this point in the history
Because of polymorphism
  • Loading branch information
Hironsan committed Feb 18, 2019
1 parent 9ec7014 commit a43738a
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 162 deletions.
244 changes: 91 additions & 153 deletions app/server/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
from django.core.exceptions import ValidationError
from django.db import models
from django.urls import reverse
Expand All @@ -10,16 +9,16 @@
class Project(models.Model):
DOCUMENT_CLASSIFICATION = 'DocumentClassification'
SEQUENCE_LABELING = 'SequenceLabeling'
Seq2seq = 'Seq2seq'
SEQ2SEQ = 'Seq2seq'

PROJECT_CHOICES = (
(DOCUMENT_CLASSIFICATION, 'document classification'),
(SEQUENCE_LABELING, 'sequence labeling'),
(Seq2seq, 'sequence to sequence'),
(SEQ2SEQ, 'sequence to sequence'),
)

name = models.CharField(max_length=100)
description = models.CharField(max_length=500)
description = models.TextField()
guideline = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
Expand All @@ -29,94 +28,80 @@ class Project(models.Model):
def get_absolute_url(self):
return reverse('upload', args=[self.id])

def is_type_of(self, project_type):
return project_type == self.project_type
def __str__(self):
return self.name


class TextClassificationProject(Project):

def get_progress(self, user):
docs = self.get_documents(is_null=True, user=user)
total = self.documents.count()
remaining = docs.count()
return {'total': total, 'remaining': remaining}
class Meta:
proxy = True

@property
def image(self):
if self.is_type_of(self.DOCUMENT_CLASSIFICATION):
url = staticfiles_storage.url('images/cat-1045782_640.jpg')
elif self.is_type_of(self.SEQUENCE_LABELING):
url = staticfiles_storage.url('images/cat-3449999_640.jpg')
elif self.is_type_of(self.Seq2seq):
url = staticfiles_storage.url('images/tiger-768574_640.jpg')

return url
return staticfiles_storage.url('images/cats/text_classification.jpg')

def get_template_name(self):
if self.is_type_of(Project.DOCUMENT_CLASSIFICATION):
template_name = 'annotation/document_classification.html'
elif self.is_type_of(Project.SEQUENCE_LABELING):
template_name = 'annotation/sequence_labeling.html'
elif self.is_type_of(Project.Seq2seq):
template_name = 'annotation/seq2seq.html'
else:
raise ValueError('Template does not exist')

return template_name

def get_documents(self, is_null=True, user=None):
docs = self.documents.all()
if self.is_type_of(Project.DOCUMENT_CLASSIFICATION):
if user:
docs = docs.exclude(doc_annotations__user=user)
else:
docs = docs.filter(doc_annotations__isnull=is_null)
elif self.is_type_of(Project.SEQUENCE_LABELING):
if user:
docs = docs.exclude(seq_annotations__user=user)
else:
docs = docs.filter(seq_annotations__isnull=is_null)
elif self.is_type_of(Project.Seq2seq):
if user:
docs = docs.exclude(seq2seq_annotations__user=user)
else:
docs = docs.filter(seq2seq_annotations__isnull=is_null)
else:
raise ValueError('Invalid project_type')

return docs
return 'annotation/document_classification.html'

def get_document_serializer(self):
from .serializers import ClassificationDocumentSerializer
from .serializers import SequenceDocumentSerializer
from .serializers import Seq2seqDocumentSerializer
if self.is_type_of(Project.DOCUMENT_CLASSIFICATION):
return ClassificationDocumentSerializer
elif self.is_type_of(Project.SEQUENCE_LABELING):
return SequenceDocumentSerializer
elif self.is_type_of(Project.Seq2seq):
return Seq2seqDocumentSerializer
else:
raise ValueError('Invalid project_type')
return ClassificationDocumentSerializer

def get_annotation_serializer(self):
from .serializers import DocumentAnnotationSerializer
return DocumentAnnotationSerializer

def get_annotation_class(self):
return DocumentAnnotation


class SequenceLabelingProject(Project):

class Meta:
proxy = True

@property
def image(self):
return staticfiles_storage.url('images/cats/sequence_labeling.jpg')

def get_template_name(self):
return 'annotation/sequence_labeling.html'

def get_document_serializer(self):
from .serializers import SequenceDocumentSerializer
return SequenceDocumentSerializer

def get_annotation_serializer(self):
from .serializers import SequenceAnnotationSerializer
from .serializers import Seq2seqAnnotationSerializer
if self.is_type_of(Project.DOCUMENT_CLASSIFICATION):
return DocumentAnnotationSerializer
elif self.is_type_of(Project.SEQUENCE_LABELING):
return SequenceAnnotationSerializer
elif self.is_type_of(Project.Seq2seq):
return Seq2seqAnnotationSerializer
return SequenceAnnotationSerializer

def get_annotation_class(self):
if self.is_type_of(Project.DOCUMENT_CLASSIFICATION):
return DocumentAnnotation
elif self.is_type_of(Project.SEQUENCE_LABELING):
return SequenceAnnotation
elif self.is_type_of(Project.Seq2seq):
return Seq2seqAnnotation
return SequenceAnnotation

def __str__(self):
return self.name

class Seq2seqProject(Project):

class Meta:
proxy = True

@property
def image(self):
return staticfiles_storage.url('images/cats/seq2seq.jpg')

def get_template_name(self):
return 'annotation/seq2seq.html'

def get_document_serializer(self):
from .serializers import Seq2seqDocumentSerializer
return Seq2seqDocumentSerializer

def get_annotation_serializer(self):
from .serializers import Seq2seqAnnotationSerializer
return Seq2seqAnnotationSerializer

def get_annotation_class(self):
return Seq2seqAnnotation


class Label(models.Model):
Expand Down Expand Up @@ -144,84 +129,37 @@ class Document(models.Model):
project = models.ForeignKey(Project, related_name='documents', on_delete=models.CASCADE)
metadata = models.TextField(default='{}')

def get_annotations(self):
if self.project.is_type_of(Project.DOCUMENT_CLASSIFICATION):
return self.doc_annotations.all()
elif self.project.is_type_of(Project.SEQUENCE_LABELING):
return self.seq_annotations.all()
elif self.project.is_type_of(Project.Seq2seq):
return self.seq2seq_annotations.all()

def to_csv(self):
return self.make_dataset()

def make_dataset(self):
if self.project.is_type_of(Project.DOCUMENT_CLASSIFICATION):
return self.make_dataset_for_classification()
elif self.project.is_type_of(Project.SEQUENCE_LABELING):
return self.make_dataset_for_sequence_labeling()
elif self.project.is_type_of(Project.Seq2seq):
return self.make_dataset_for_seq2seq()

def make_dataset_for_classification(self):
annotations = self.get_annotations()
dataset = [[self.id, self.text, a.label.text, a.user.username, self.metadata]
for a in annotations]
return dataset

def make_dataset_for_sequence_labeling(self):
annotations = self.get_annotations()
dataset = [[self.id, ch, 'O', self.metadata] for ch in self.text]
for a in annotations:
for i in range(a.start_offset, a.end_offset):
if i == a.start_offset:
dataset[i][2] = 'B-{}'.format(a.label.text)
else:
dataset[i][2] = 'I-{}'.format(a.label.text)
return dataset

def make_dataset_for_seq2seq(self):
annotations = self.get_annotations()
dataset = [[self.id, self.text, a.text, a.user.username, self.metadata]
for a in annotations]
return dataset

def to_json(self):
return self.make_dataset_json()

def make_dataset_json(self):
if self.project.is_type_of(Project.DOCUMENT_CLASSIFICATION):
return self.make_dataset_for_classification_json()
elif self.project.is_type_of(Project.SEQUENCE_LABELING):
return self.make_dataset_for_sequence_labeling_json()
elif self.project.is_type_of(Project.Seq2seq):
return self.make_dataset_for_seq2seq_json()

def make_dataset_for_classification_json(self):
annotations = self.get_annotations()
labels = [a.label.text for a in annotations]
username = annotations[0].user.username
dataset = {'doc_id': self.id, 'text': self.text, 'labels': labels, 'username': username, 'metadata': json.loads(self.metadata)}
return dataset

def make_dataset_for_sequence_labeling_json(self):
annotations = self.get_annotations()
entities = [(a.start_offset, a.end_offset, a.label.text) for a in annotations]
username = annotations[0].user.username
dataset = {'doc_id': self.id, 'text': self.text, 'entities': entities, 'username': username, 'metadata': json.loads(self.metadata)}
return dataset

def make_dataset_for_seq2seq_json(self):
annotations = self.get_annotations()
sentences = [a.text for a in annotations]
username = annotations[0].user.username
dataset = {'doc_id': self.id, 'text': self.text, 'sentences': sentences, 'username': username, 'metadata': json.loads(self.metadata)}
return dataset

def __str__(self):
return self.text[:50]


class TextClassificationDocument(Document):

class Meta:
proxy = True

def get_annotations(self):
return self.doc_annotations.all()


class SequenceLabelingDocument(Document):

class Meta:
proxy = True

def get_annotations(self):
return self.seq_annotations.all()


class Seq2seqDocument(Document):

class Meta:
proxy = True

def get_annotations(self):
return self.seq2seq_annotations.all()


class Annotation(models.Model):
prob = models.FloatField(default=0.0)
manual = models.BooleanField(default=False)
Expand All @@ -232,15 +170,15 @@ class Meta:


class DocumentAnnotation(Annotation):
document = models.ForeignKey(Document, related_name='doc_annotations', on_delete=models.CASCADE)
document = models.ForeignKey(TextClassificationDocument, related_name='doc_annotations', on_delete=models.CASCADE)
label = models.ForeignKey(Label, on_delete=models.CASCADE)

class Meta:
unique_together = ('document', 'user', 'label')


class SequenceAnnotation(Annotation):
document = models.ForeignKey(Document, related_name='seq_annotations', on_delete=models.CASCADE)
document = models.ForeignKey(SequenceLabelingDocument, related_name='seq_annotations', on_delete=models.CASCADE)
label = models.ForeignKey(Label, on_delete=models.CASCADE)
start_offset = models.IntegerField()
end_offset = models.IntegerField()
Expand All @@ -254,7 +192,7 @@ class Meta:


class Seq2seqAnnotation(Annotation):
document = models.ForeignKey(Document, related_name='seq2seq_annotations', on_delete=models.CASCADE)
document = models.ForeignKey(Seq2seqDocument, related_name='seq2seq_annotations', on_delete=models.CASCADE)
text = models.TextField()

class Meta:
Expand Down
File renamed without changes

0 comments on commit a43738a

Please sign in to comment.