In [1]:
#  Please write a number of classes to process and represent simple data, as follows:
#   1 - A superclass to represent a single data entry with two features. Superclass should have functionality to
#       store underlying "raw" data and to display formatted data.
#       Extend with a subclass to represent an entry from a more specific source containing additional numerical
#       data. 
#   2 - A factory class to produce an object given a "row" of input data, such as might be obtained from reading 
#       in a CSV file.
#   3 - Iterate over all sample rows given below ('rows'), using the factory to create objects.
#   4 - Very briefly describe why this type of code might be valuable or important.
#
# Note: include code to "standardize" data from each row (process strings, ensure data types, etc)

super_rows = [
    ["Pegasus", "True"],
    ["Unicorn", "False"]
]
sub_rows = [
    ["Alicorn", "23.1", "True"],
    ["basilisk", "120.433", "False"],
    ["ChiMera", "101.8", "True"],
    ["Nemean lion", "2", "False"],
    ["Griffin", "65.82,465.21", "False"],
    ["0rthros", "52.01", "False"],
    ["Cockatrice", "nan", "True"],
]

rows = super_rows + sub_rows

# Add ingestion code here

In [50]:
from dataclasses import dataclass
import numpy as np
import pandas as pd

@dataclass
class ParentClass:
    str_var: str
    bool_var: str

    def process_row(self, return_values=True, print_values=False):
        str_var_format = self.str_var.lower().capitalize()
        bool_var_format = True if self.bool_var.lower() == 'true' else False 
        if print_values:
            print(f'Name is: { str_var_format }\nBinary is: { bool_var_format } ' )
        if return_values:
            return str_var_format, bool_var_format

In [51]:
# instance of the "ParentClass"
parent = ParentClass("Pegasus", "True")

In [4]:
# stored values
print(parent.str_var)
print(parent.bool_var)

Pegasus
True


In [53]:
# display formated (no return)
parent.process_row(return_values=False, print_values=True)

Name is: Pegasus
Binary is: True 


In [59]:
@dataclass
class ChildClass(ParentClass):
    # (!) With subclassing we avoid repeating, just add what is new - specific to child.
    # str_var: str
    num_var: str
    # bool_var: str
            
    # add conditional return.
    def process_row(self, return_values=True, print_values=False):
        str_var_format = self.str_var.lower().capitalize()
        bool_var_format = True if self.bool_var.lower() == 'true' else False 
        try:
            num_var_format = float(self.num_var)
        except:
            num_var_format = np.nan
            
        if print_values:
            print(f'Name is: { str_var_format }\nBinary is: {bool_var_format} \nValue is: {num_var_format}' )
        if return_values:
            return str_var_format, num_var_format, bool_var_format
        

In [60]:
# instance of the "ParentClass"
child = ChildClass(str_var="Alicorn", num_var="-", bool_var="True")

In [61]:
# stored values
print(child.str_var)
print(child.bool_var)
print(child.num_var)

Alicorn
True
-


In [63]:
# display formated 
child.process_row(return_values=False, print_values=True)

Name is: Alicorn
Binary is: True 
Value is: nan


In [64]:
class FactoryClass:
    def process_row(example):
        if len(example) == 2:
            parent_instance = ParentClass(str_var=example[0], bool_var=example[1])
            return parent_instance.process_row()
        else:
            child_instance = ChildClass(str_var=example[0], num_var=example[1], bool_var=example[2])
            return child_instance.process_row(print_values=True)
    

In [65]:
row = FactoryClass.process_row(["Alicorn", "23.1", "True"])
row

Name is: Alicorn
Binary is: True 
Value is: 23.1


('Alicorn', 23.1, True)

In [46]:
string_list = []
bool_list = []
number_list = []

for example in rows:
    row = FactoryClass.process_example(example)
    string_list.append(row.str_var)
    bool_list.append(row.bool_var)
    try:
        number_list.append(row.num_var)
    except:
        number_list.append(np.nan)

AttributeError: type object 'FactoryClass' has no attribute 'process_example'

In [13]:
pd.DataFrame({
    'strings': string_list,
    'booleans': bool_list,
    'numbers': number_list
})

Unnamed: 0,strings,booleans,numbers
0,Pegasus,True,
1,Unicorn,False,
2,Alicorn,True,23.1
3,basilisk,False,120.433
4,ChiMera,True,101.8
5,Nemean lion,False,2
6,Griffin,False,"65.82,465.21"
7,0rthros,False,52.01
8,Cockatrice,True,
