In [1]:
from abc import ABC, abstractmethod
from typing import List
import pandas as pd
import docx

In [2]:
"""
Simple QuoteModel implementation class to encapsulate the body and author
"""
class QuoteModel:
    """
    Implementation class to encapsulate a quote body and a quote author.
    The purpose of our quote generator contains a body and an author
    """

    def __init__(self, body: str, author: str):
        """Create a simple model using a quote author and a quote body."""
        self.author = str(author)
        self.body = str(body)
        

    def __str__(self):
        """Override method representing the QuoteModel class objects as a string"""
        return f"Author {self.author} of quote {self.body} in Simple Quote Model"

    def __repr__(self):
        """Override method used to represent a class's objects as a string"""
        return f"Quote Model Author: {self.author}, Quote Model body: {self.body}"


In [3]:
class IngestorInterface(ABC):

    accepted_files = []
    
    @classmethod
    def can_ingest(cls, path: str) -> bool:

        flag = path.split('.')[-1] in cls.accepted_files
        return flag
    
    @classmethod
    @abstractmethod
    def parse(cls, path: str) -> List[QuoteModel]:
        pass

In [4]:
class CSVInterface(IngestorInterface):
    """A class that handles the ingestion of csv files."""

    accepted_files = ["csv"]

    @classmethod
    def parse(cls, path: str) -> List[QuoteModel]:
        """Parse QuoteModel obj of csv files and return a list of them."""
        if not cls.can_ingest(path):
            raise ValueError(f'File {path} cannot be processed')

        
        df = pd.read_csv(path)
        quotes = [QuoteModel(df.loc[idx, 'body'], df.loc[idx, 'author']) for idx in range(len(df))]
        return quotes

In [5]:
csv_quote = CSVInterface()
csv_quote.parse("..\\_data\\DogQuotes\\DogQuotesCSV.csv")

[Quote Model Author: Skittle, Quote Model body: Chase the mailman,
 Quote Model Author: Mr. Paws, Quote Model body: When in doubt, go shoe-shopping]

In [6]:
class TXTInterface(IngestorInterface):
    """A class that handles the ingestion of txt files."""

    accepted_files = ["txt"]

    @classmethod
    def parse(cls, path: str) -> List[QuoteModel]:
        """
        Parse QuoteModel objects of txt files and return a list of QuoteModels.
        """
        if not cls.can_ingest(path):
            raise ValueError(f'File {path} cannot be processed')

        with open(path, "r", encoding="utf-8-sig") as infile:
            filelines = infile.readlines()
        
        rows = [row.replace("\n", "").split("-") for row in filelines]
        quotes = [QuoteModel(quote[0].strip(), quote[1].strip()) for quote in rows]
        return quotes

In [7]:
txt_quote = TXTInterface()
txt_quote.parse("..\\_data\\DogQuotes\\DogQuotesTXT.txt")

[Quote Model Author: Bork, Quote Model body: To bork or not to bork,
 Quote Model Author: Stinky, Quote Model body: He who smelt it...]

In [18]:
class DOCXInterface(IngestorInterface):
    """A class that handles the ingestion of docx files."""

    accepted_files = ["docx"]

    @classmethod
    def parse(cls, path: str) -> List[QuoteModel]:
        """Parse QuoteModel obj of docx files and return a list of them."""
        if not cls.can_ingest(path):
            raise ValueError(f'File {path} cannot be processed')
            
        doc = docx.Document(path)
        
        rows = [row.text.split('-') for row in doc.paragraphs if row.text != ""]
        quotes = [QuoteModel(quote[0].strip(), quote[1].strip()) for quote in rows]


        return quotes

In [19]:
docx_quote = DOCXInterface()
docx_quote.parse("..\\_data\\DogQuotes\\DogQuotesDOCX.docx")

[Quote Model Author: Rex, Quote Model body: "Bark like no one’s listening",
 Quote Model Author: Chewy, Quote Model body: "RAWRGWAWGGR",
 Quote Model Author: Peanut, Quote Model body: "Life is like peanut butter: crunchy",
 Quote Model Author: Tiny, Quote Model body: "Channel your inner husky"]