## Libraries

We will use the following packages:
- **pandas**: for data manipulation
- **re**: regular expressions for creating the rules for invalid characters detection
- **operator**: a way of definying the math operations. We will create a dictionnary where we assign the common signs of math operations ("+", "-","*") as keys and assign to them the real mathematical meaning

In [3]:
import pandas as pd
import re
import operator

## 1. Universal validation function for any input: is_valid_label()

In [4]:
# Checks if a column label consists strictly of letters and underscores.
def is_valid_label(label: str) -> bool:
    # 1. If the label isn't a string (ex. an integer column name), it's invalid
    if not isinstance(label, str):
        return False
        
    # 2. The regex pattern looks for letters and underscores
    pattern = r'^[a-zA-Z_]+$'
    
    # let's ensure the whole string matches the pattern
    return bool(re.fullmatch(pattern, label))

## 2. Function extracting 3 elements from the Role string input - left_column, operator, right_column

In [5]:
# Extracts the column names and the mathematical operation from the role string. 
# Returns: (left_column_name, operator_function, right_column_name) or (None, None, None)

def parse_role(role: str):
    # Map the allowed string symbols to actual math operations
    allowed_ops = {
        '+': operator.add,
        '-': operator.sub,
        '*': operator.mul
    }
    
    # Check which allowed operators are present in the role string
    present_ops = [op for op in allowed_ops.keys() if op in role]
    
    # Check whether we have only one operator
    if len(present_ops) != 1:
        return None, None, None
        
    op_symbol = present_ops[0]
    
    # Split the string by the operator. It should contain two elements - left column and the right one
    parts = role.split(op_symbol)
    
    # Ensure we have only two columns
    if len(parts) != 2:
        return None, None, None
        
    # Remove any leading or trailing whitespace
    left_col = parts[0].strip()
    right_col = parts[1].strip()
    
    return left_col, allowed_ops[op_symbol], right_col

## 3. Final function

In [6]:
# Function creates a new DataFrame with an added virtual column based on a specified rule.
def add_virtual_column(df: pd.DataFrame, role: str, new_column: str) -> pd.DataFrame:
    
    # 1. Validate the requested new_column name
    if not is_valid_label(new_column):
        return pd.DataFrame()
        
    # 2. Parse the role. If parsing fails, return empty df. By the way, check whether the requested operator is valid
    left_col, op_function, right_col = parse_role(role)
    if op_function is None:
        return pd.DataFrame()
        
    # 3. Validate the extracted column names from the role
    if not is_valid_label(left_col) or not is_valid_label(right_col):
        return pd.DataFrame()
        
    # 4. Ensure those columns actually exist in the provided DataFrame
    if left_col not in df.columns or right_col not in df.columns:
        return pd.DataFrame()
        
    # 5. Check if all existing DataFrame column labels are valid 
    for col in df.columns:
        if not is_valid_label(col):
            return pd.DataFrame()
            
    # 6. Perform the safe math operation on a copy of the dataframe
    result_df = df.copy()
    result_df[new_column] = op_function(result_df[left_col], result_df[right_col])
    
    return result_df

## 4. Final function tests

In [14]:
def test_sum_of_two_columns():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 2]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one+label_two", "label_three")
    assert df_result.equals(df_expected), f"The function should sum the columns: label_one and label_two.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"


def test_multiplication_of_two_columns():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 1]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one * label_two", "label_three")
    assert df_result.equals(df_expected), f"The function should multiply the columns: label_one and label_two.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"


def test_subtraction_of_two_columns():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 0]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one - label_two", "label_three")
    assert df_result.equals(df_expected), f"The function should subtract the columns: label_one and label_two.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"


def test_empty_result_when_invalid_labels():
    df = pd.DataFrame([[1, 2]] * 3, columns = ["label_one", "label_two"])
    df_result = add_virtual_column(df, "label_one + label_two", "label3")
    assert df_result.empty, f"Should return an empty df when the \"new_column\" is invalid.\n\nResult:\n\n{df_result}\n\nExpected:\n\nEmpty df"


def test_empty_result_when_invalid_rules():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_result = add_virtual_column(df, "label&one + label_two", "label_three")
    assert df_result.empty, f"Should return an empty df when the role have invalid character: '&'.\n\nResult:\n\n{df_result}\n\nExpected:\n\nEmpty df"
    df_result = add_virtual_column(df, "label_five + label_two", "label_three")
    assert df_result.empty, f"Should return an empty df when the role have a column which isn't in the df: 'label_five'.\n\nResult:\n\n{df_result}\n\nExpected:\n\nEmpty df"


def test_when_extra_spaces_in_rules():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 2]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one + label_two ", "label_three")
    assert df_result.equals(df_expected), f"Should work when the role have spaces between the operation and the column.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"
    df_result = add_virtual_column(df, "  label_one + label_two ", "label_three")
    assert df_result.equals(df_expected), f"Should work when the role have extra spaces in the start/end.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"

In [15]:
# Run the tests to verify the logic
test_sum_of_two_columns()
test_multiplication_of_two_columns()
test_subtraction_of_two_columns()
test_empty_result_when_invalid_labels()
test_empty_result_when_invalid_rules()
test_when_extra_spaces_in_rules()
print("All tests passed successfully!")

All tests passed successfully!
