> Chalkiopoulos Georgios, Electrical and Computer Engineer NTUA <br />
> Data Science postgraduate Student <br />
> gchalkiopoulos@aueb.gr

# Import Libraries

In [50]:
import csv
from pathlib import Path
from dataclasses import dataclass
from datetime import datetime

from typing import List, Optional, Union, Set

import logging

# Setup Logger

In [51]:
# Create a base Logger class
class LoggingHandler:

    def __init__(self, *args, **kwargs):
        self.logger = self._setup_logger()

    def _setup_logger(self):
        """Setup up logger"""

        # Create logger
        logger = logging.getLogger(self.__class__.__name__)
        logger.setLevel(logging.INFO)

        if not logger.handlers:
            # Create console handler and set level to debug
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)

            # Create formatter
            formatter = logging.Formatter('[%(asctime)s] %(levelname)s [%(name)s] - %(message)s')

            # Add formatter to ch
            ch.setFormatter(formatter)

            # Add ch to logger
            logger.addHandler(ch)

        return logger

# Read data

In [43]:
@dataclass
class BeerReview:
    index: int
    brewery_id: int
    brewery_name: str
    review_time: datetime.timestamp
    review_overall: float
    review_aroma: float
    review_appearance: float
    review_profilename: str
    beer_style: str
    review_palate: float
    review_taste: float
    beer_name: str
    beer_abv: float
    beer_beerid: int
    user_id: Optional[Union[int, None]] = None

In [52]:
class ReviewReader(LoggingHandler):
    """Read reviews based on an input file"""
    reviews: List[BeerReview] = []

    def __init__(self, file_path: str, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.file_path = file_path

    @property
    def input_file(self) -> Path:
        """
        Creates a Path object containing the input file.
        Raises an exception if the file doesn't exist

        Returns:
            Path object
        """
        input_file: Path = Path(self.file_path)
        if not input_file.is_file():
            self.logger.error(f"{input_file.name} file doesn't exist.")

        return input_file

    def read_reviews(self) -> List[BeerReview]:
        """Read the reviews based on the """
        with open(self.input_file, encoding="utf8") as f:

            self.logger.info(f"Loading {self.input_file}")
            users: dict = {}
            user_id: int = 0

            for i, row in enumerate(csv.DictReader(f)):

                if not users.get(row["review_profilename"]):
                    users[row["review_profilename"]] = user_id
                    user_id += 1


                # create a review object
                review: BeerReview = BeerReview(
                    int(row["index"]) if row["index"].isnumeric() else None,
                    int(row["brewery_id"]) if row["brewery_id"].isnumeric() else None,
                    row["brewery_name"].strip(),
                    datetime.fromtimestamp(int(row["review_time"])),
                    float(row["review_overall"]),
                    float(row["review_aroma"]),
                    float(row["review_appearance"]),
                    row["review_profilename"],
                    row["beer_style"].strip(),
                    row["review_palate"],
                    row["review_taste"],
                    row["beer_name"],
                    row["beer_abv"],
                    row["beer_beerid"],
                    users[row["review_profilename"]])

                # add the review object to the total reviews
                self.reviews.append(review)

                # add username to set
                users
                if i % 300000 == 0 and i != 0:
                    self.logger.info(f"{i} reviews loaded.")

        self.logger.info(f"All reviews loaded. Total reviews: {len(self.reviews)}")
        f.close()

        return self.reviews

In [53]:
file_path: str = "beer_reviews.csv"
reviews: List[BeerReview] = ReviewReader(file_path=file_path).read_reviews()

[2023-02-24 00:52:15,435] INFO [ReviewReader] - Loading beer_reviews.csv
[2023-02-24 00:52:20,662] INFO [ReviewReader] - 300000 reviews loaded.
[2023-02-24 00:52:25,694] INFO [ReviewReader] - 600000 reviews loaded.
[2023-02-24 00:52:32,105] INFO [ReviewReader] - 900000 reviews loaded.
[2023-02-24 00:52:40,550] INFO [ReviewReader] - 1200000 reviews loaded.
[2023-02-24 00:52:45,665] INFO [ReviewReader] - 1500000 reviews loaded.
[2023-02-24 00:52:47,232] INFO [ReviewReader] - All reviews loaded. Total reviews: 1586614


In [57]:
reviews[4]

BeerReview(index=4, brewery_id=1075, brewery_name='Caldera Brewing Company', review_time=datetime.datetime(2010, 12, 30, 20, 53, 26), review_overall=4.0, review_aroma=4.5, review_appearance=4.0, review_profilename='johnmichaelsen', beer_style='American Double / Imperial IPA', review_palate='4.0', review_taste='4.5', beer_name='Cauldron DIPA', beer_abv='7.7', beer_beerid='64883', user_id=2)

In [14]:
import logging


# Create a base class
class LoggingHandler:
    def __init__(self, *args, **kwargs):
        self.log = logging.getLogger(self.__class__.__name__)


# Create test class A that inherits the base class
class testclassa(LoggingHandler):
    def testmethod1(self):
        # call self.log.<log level> instead of logging.log.<log level>
        self.log.error("error from test class A")


# Create test class B that inherits the base class
class testclassb(LoggingHandler):
    def testmethod2(self):
        # call self.log.<log level> instead of logging.log.<log level>
        self.log.error("error from test class B")


testclassa().testmethod1()
testclassb().testmethod2()

error from test class A
error from test class B
