In [6]:
import polars as pl
import pandas as pd

file_path = "./BTP/BTP.csv"

btp = pl.read_csv(source=file_path, has_header=True)
#btp = pd.read_csv(filepath_or_buffer=file_path)
print(btp)

question_row = btp.row(1)
print(question_row)


shape: (4, 4)
┌───────┬────────────────────┬───────────────────┬───────────────────────────────────┐
│ Index ┆ Question           ┆ TableFormat       ┆ AttributeValues                   │
│ ---   ┆ ---                ┆ ---               ┆ ---                               │
│ i64   ┆ str                ┆ str               ┆ str                               │
╞═══════╪════════════════════╪═══════════════════╪═══════════════════════════════════╡
│ 1     ┆ Q1.Overall Liking  ┆ Full,Mean,T2B,B2B ┆ 10=Like Extremely\n9=9\n8=8\n7=7… │
│ 2     ┆ Q2.Purchase Intent ┆ 1,2,T2B,3,B2B,4,5 ┆ 5=Definitely Would Purchase\n4=P… │
│ 3     ┆ Q3.Scent Liking    ┆ Full,Mean,T2B,B2B ┆ 10=Like Extremely\n9=9\n8=8\n7=7… │
│ 4     ┆ Q4.Scent JAR       ┆ JAR,T2B,B2B       ┆ 5=Much Too Strong\n4=Very Strong… │
└───────┴────────────────────┴───────────────────┴───────────────────────────────────┘
(2, 'Q2.Purchase Intent', '1,2,T2B,3,B2B,4,5', '5=Definitely Would Purchase\\n4=Probably Would Purchase\\n3=Might or

In [9]:
columns = btp.columns

In [10]:
row_dict = dict(zip(columns, question_row))
row_dict

{'Index': 2,
 'Question': 'Q2.Purchase Intent',
 'TableFormat': '1,2,T2B,3,B2B,4,5',
 'AttributeValues': '5=Definitely Would Purchase\\n4=Probably Would Purchase\\n3=Might or Might Not Purchase\\n2=Probably Would Not Purchase\\n1=Definitely Would Not Purchase'}

# Processing

In [2]:
question_row = btp.iloc[1, :]

In [3]:
# The question itself
question = question_row["Question"]

In [4]:
question_row["TableFormat"]

'1,2,T2B,3,B2B,4,5'

Add space between period (.) and title

In [6]:
print(question)
question_final = ''
# Question: Q1.Overall Liking
if question.find('.') != -1 and question[question.find('.') + 1] != ' ':
    # Add a space after the period
    question_final = question.replace('.', '. ')
print(question_final)

Q1.Purchase Intent
Q1. Purchase Intent


## Make into a function

In [7]:
def add_space(question_title: str) -> str:
    """Adds a space after the period in the question title."""
    if question_title.find('.') != -1 and question_title[question_title.find('.') + 1] != ' ':
        # Add space
        question_title = question_title.replace('.', '. ')
    return question_title

Test it out:

In [9]:
# Print before
print(question)

# Run function
question_clean = add_space(question)

# Print new title
print(question_clean)

Q1.Purchase Intent
Q1. Purchase Intent


# Attribute Values

Need to split the attribute values and put them in a sensible data structure.

In [15]:
attribute_pair = question_row["AttributePair"]

In [17]:
attribute_pair

'5=Definitely Would Purchase\n4=Probably Would Purchase\n3=Might or Might Not Purchase\n2=Probably Would Not Purchase\n1=Definitely Would Not Purchase'

In [21]:
lines = attribute_pair.split("\n")
print(lines)

line_pairs = []
for line in lines:
    line_pairs.append(tuple(line.split('=')))
print(line_pairs)

['5=Definitely Would Purchase', '4=Probably Would Purchase', '3=Might or Might Not Purchase', '2=Probably Would Not Purchase', '1=Definitely Would Not Purchase']
[('5', 'Definitely Would Purchase'), ('4', 'Probably Would Purchase'), ('3', 'Might or Might Not Purchase'), ('2', 'Probably Would Not Purchase'), ('1', 'Definitely Would Not Purchase')]


In [24]:
df = pd.DataFrame(line_pairs, columns=["Value", "Attribute"])
print(df)
df['Value'] = df['Value'].astype(int)
print(df)

  Value                      Attribute
0     5      Definitely Would Purchase
1     4        Probably Would Purchase
2     3    Might or Might Not Purchase
3     2    Probably Would Not Purchase
4     1  Definitely Would Not Purchase
   Value                      Attribute
0      5      Definitely Would Purchase
1      4        Probably Would Purchase
2      3    Might or Might Not Purchase
3      2    Probably Would Not Purchase
4      1  Definitely Would Not Purchase


In [29]:
# Get T2B
question_row["TableFormat"]

'T2B,B2B,Full'

In [87]:
def add_space(question_title: str) -> str:
    """Adds a space after the period in the question title."""
    if question_title.find('.') != -1 and question_title[question_title.find('.') + 1] != ' ':
        # Add space
        question_title = question_title.replace('.', '. ')
    return question_title

class SurveyQuestion:
    def __init__(self, question: str, attribute_pair: str, table_format: str):
        self.question = self.process_question(question)
        self.table_format = table_format.split(',')
        self.attribute_frame = self._process_attribute_pair(attribute_pair)

    def _process_attribute_pair(self, attribute_pair: str):
        lines = attribute_pair.split('\n')
        #print(lines)
        line_pairs = []
        for line in lines:
            line_pairs.append(tuple(line.split('=')))
        #print(line_pairs)
        df = pd.DataFrame(line_pairs, columns=['Value', 'Attribute'])
        df["Value"] = df["Value"].astype(int)
        return df

    def process_question(self, question):
        return add_space(question)

In [88]:
survey_question = SurveyQuestion(question, attribute_pair, question_row["TableFormat"]) 

In [91]:
survey_question.attribute_frame

Unnamed: 0,Value,Attribute
0,5,Definitely Would Purchase
1,4,Probably Would Purchase
2,3,Might or Might Not Purchase
3,2,Probably Would Not Purchase
4,1,Definitely Would Not Purchase


# Table Format

Table format is a comma separated string

In [11]:
# "T3B,T2B,B2B,Full,Mean"
print(question_row["TableFormat"])

T2B,B2B,Full


In [None]:
def create_t2b(attribute_values: str) -> str:
    pass

## Polars

In [14]:
import polars as pl
import pandas as pd

file_path = "./BTP/BTP.csv"

btp = pl.read_csv(source=file_path, has_header=True)
print(btp)

btp.row(1)


shape: (2, 4)
┌───────┬────────────────────┬───────────────────────┬─────────────────────────────┐
│ Index ┆ Question           ┆ TableFormat           ┆ AttributePair               │
│ ---   ┆ ---                ┆ ---                   ┆ ---                         │
│ i64   ┆ str                ┆ str                   ┆ str                         │
╞═══════╪════════════════════╪═══════════════════════╪═════════════════════════════╡
│ 1     ┆ Q1.Overall Liking  ┆ T3B,T2B,B2B,Full,Mean ┆ 10=Like Extremely           │
│       ┆                    ┆                       ┆ 9=9                         │
│       ┆                    ┆                       ┆ 8=8                         │
│       ┆                    ┆                       ┆ 7=7                         │
│       ┆                    ┆                       ┆ 6=…                         │
│ 2     ┆ Q2.Purchase Intent ┆ T2B,B2B,Full          ┆ 5=Definitely Would Purchase │
│       ┆                    ┆                     

(2,
 'Q2.Purchase Intent',
 'T2B,B2B,Full',
 '5=Definitely Would Purchase\n4=Probably Would Purchase\n3=Might or Might Not Purchase\n2=Probably Would Not Purchase\n1=Definitely Would Not Purchase')

In [28]:
btp.filter(pl.col("Index")==1).select(pl.col("TableFormat")).to_dict()['TableFormat'][0]

'T3B,T2B,B2B,Full,Mean'

In [33]:
btp.filter(pl.col("Index") == 1).select(pl.col("TableFormat")).to_dict()["TableFormat"][0]

'T3B,T2B,B2B,Full,Mean'

## Pure Python


In [2]:
import polars as pl
import pandas as pd

file_path = "./BTP/BTP.csv"
csv = pl.read_csv(file_path)
question_list = csv.to_dicts()

In [3]:
question_row = question_list[0]
question_1 = question_row["Question"]

In [4]:
question_1

'Q1.Overall Liking'

## Question Title

In [5]:
class QuestionTitle:
    def __init__(self, raw_question: str):
        # Store the raw question
        self.raw_question = raw_question
        # Split the raw question into the question variable and question label
        self.question_variable, self.question_label = self._split_question(raw_question)
    
    def _split_question(self, question: str) -> tuple:
        # Split the question string into two parts using the period (.) as the delimiter
        parts = question.split('.', 1)
        # If the question string does not contain a period, raise an error
        if len(parts) != 2:
            raise ValueError("Invalid question format. Expected format: 'Qx.Question Label'")
        # Return the question variable and question label, both stripped of leading and trailing spaces
        return parts[0].strip(), parts[1].strip()

    def clean_question(self) -> str:
        # If the question variable or question label contains non-ASCII characters, raise an error
        if not self.question_variable.isascii() or not self.question_label.isascii():
            raise ValueError("Question contains non-ASCII characters.")
        # Return the cleaned question string, which is the question variable, a period, a space, and the question label
        return f"{self.question_variable}. {self.question_label}"

In [6]:
class QuestionTitle:
    def __init__(self, raw_question: str):
        self.raw_question = raw_question
    
    def clean_question(self, question: str) -> str:
        return question.replace(".", ". ")

In [7]:
def clean_question(question: str) -> str:
    return question.replace(".", ". ")

clean_question(question_1)

'Q1. Overall Liking'

In [8]:
def split_table_format(table_format: str) -> str:
    return table_format.split(",")

In [9]:
split_table_format(question_row["TableFormat"])

['Full', 'Mean', 'T2B', 'B2B']

## Attribute Value Pairs

In [10]:
question_row["AttributeValues"]

'10=Like Extremely\\n9=9\\n8=8\\n7=7\\n6=6\\n5=5\\n4=4\\n3=3\\n2=2\\n1=Dislike Extremely'

In [11]:
class AttributeValuePair:
    def __init__(self, attribute_value_pair: str, table_format: str):
        self.raw_attribute_value_pair = attribute_value_pair
        self.caret = "^"
        self.open_bracket = "("
        self.close_bracket = ")"
        self.full = self.make_full(self.raw_attribute_value_pair)
        self.table_format = table_format
        self.split_table_format = self.split_table_format(self.table_format)

    def split_pairs(self, raw_attribute_value_pair: str) -> str:
        return raw_attribute_value_pair.split("\\n")

    def get_attr_value_pairs(self, attr_pair_list: str) -> list[list[str]]:
        return [q.split("=") for q in attr_pair_list]   

    def format_wc_value_no_q(self, value: str) -> str:
        return "".join([self.caret, self.open_bracket, value, self.close_bracket, self.caret])
    
    def make_full(self, raw_attribute_value_pair: str) -> str:
        split_attribute_values = self.split_pairs(raw_attribute_value_pair)
        list_of_attribute_values = self.get_attr_value_pairs(split_attribute_values)

        out = ""
        for pair in list_of_attribute_values:
            out = out + "".join([pair[1], " ", self.format_wc_value_no_q(pair[0]), "\n"])
        return out
    
    # Add more checks to this
    def split_table_format(self, table_format):
        return table_format.split(",")
    
    def make_top_2_box(self, raw_attribute_values: str) -> str:
        split_attribute_values = self.split_pairs(raw_attribute_values)
        list_of_attribute_values = self.get_attr_value_pairs(split_attribute_values)
        print(list_of_attribute_values)
        values = []
        for value, attribute in list_of_attribute_values:
            values.append(int(value))
        # sort descending
        return values

In [12]:
attrpair = AttributeValuePair(question_row["AttributeValues"], question_row["TableFormat"])
attrpair.full

'Like Extremely ^(10)^\n9 ^(9)^\n8 ^(8)^\n7 ^(7)^\n6 ^(6)^\n5 ^(5)^\n4 ^(4)^\n3 ^(3)^\n2 ^(2)^\nDislike Extremely ^(1)^\n'

In [13]:
attrpair.make_top_2_box(attrpair.raw_attribute_value_pair)

[['10', 'Like Extremely'], ['9', '9'], ['8', '8'], ['7', '7'], ['6', '6'], ['5', '5'], ['4', '4'], ['3', '3'], ['2', '2'], ['1', 'Dislike Extremely']]


[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]

In [46]:
question_row["AttributeValues"].split("\\n")

['10=Like Extremely',
 '9=9',
 '8=8',
 '7=7',
 '6=6',
 '5=5',
 '4=4',
 '3=3',
 '2=2',
 '1=Dislike Extremely']

In [43]:
def get_attr_value_pairs(attr_pair_list: str) -> list[list[str]]:
    return [q.split("=") for q in attr_pair_list]

def format_wc_value_no_q(attr_value: str) -> str:
    caret = "^"
    open_bracket = "("
    closed_bracket = ")"
    return "".join([caret, open_bracket, attr_value, closed_bracket, caret])

def attribute_values_full(attribute_values: str) -> str:
    split_attribute_values = attribute_values.split("\\n")
    list_of_attribute_values = get_attr_value_pairs(split_attribute_values)

    out = ""
    for pair in list_of_attribute_values:
        out = out + "".join([pair[1]," ", format_wc_value_no_q(pair[0]), "\n"])
    return out

full_table = attribute_values_full(question_row["AttributeValues"])
full_table

'Like Extremely ^(10)^\n9 ^(9)^\n8 ^(8)^\n7 ^(7)^\n6 ^(6)^\n5 ^(5)^\n4 ^(4)^\n3 ^(3)^\n2 ^(2)^\nDislike Extremely ^(1)^\n'

In [12]:
question_row["AttributeValues"].split("\\n")

['10=Like Extremely',
 '9=9',
 '8=8',
 '7=7',
 '6=6',
 '5=5',
 '4=4',
 '3=3',
 '2=2',
 '1=Dislike Extremely']

In [18]:
split_attr_pair = question_row["AttributeValues"].split("\\n")

In [19]:
split_attr_pair

['10=Like Extremely',
 '9=9',
 '8=8',
 '7=7',
 '6=6',
 '5=5',
 '4=4',
 '3=3',
 '2=2',
 '1=Dislike Extremely']

In [16]:
def get_attr_value_pairs(attr_pair: str) -> list[list[str]]:
    return [q.split("=") for q in attr_pair]

In [20]:
get_attr_value_pairs(split_attr_pair)

[['10', 'Like Extremely'],
 ['9', '9'],
 ['8', '8'],
 ['7', '7'],
 ['6', '6'],
 ['5', '5'],
 ['4', '4'],
 ['3', '3'],
 ['2', '2'],
 ['1', 'Dislike Extremely']]

In [14]:
def format_wc_value_no_q(attr_value: str) -> str:
    caret = "^"
    open_bracket = "("
    closed_bracket = ")"
    return "".join([caret, open_bracket, attr_value, closed_bracket, caret])

In [21]:
attr_pair = attribute_values_full(question_row["AttributeValues"])
out = ""
for pair in attr_pair:
    out = out + "".join([pair[1]," ", format_wc_value_no_q(pair[0]), "\n"])
print(out)

NameError: name 'attribute_values_full' is not defined

In [44]:
out_str = attribute_values_full(question_row["AttributeValues"])
with open("./out_str.txt", "w") as f:
    f.write(out_str)

In [None]:
from string import Template
from abc import ABC, abstractproperty

class WCTable(ABC):
    def __init__(self):
        pass
    
    @abstractproperty
    def settings(self) -> str:
        pass

class WCShortTable:

class WCTableFactory(self, long_short: str):

All tables start with a `table_index` line.
- For short tables, each `question` has its own table_index
- For long tables, one table can hold many questions

All tables have a `settings` line
- Most of the time, the default settings are sufficient for both types of tables

# Table Settings

In [1]:
import re

class TableSettings:
    """WinCross Script table settings"""
    def __init__(self):
        self.default = "OF,OR,OQ,OI,O%,S1,OB"
        self.__ensure_no_duplicates(self.default)
        self.final = self.default

    def __ensure_no_duplicates(self, settings: str):
        settings_list = settings.split(",")
        sorted_settings_list = sorted(set(settings_list))
        self.final = ",".join(sorted_settings_list)

    def add_base(self) -> bool:
        final_list = self.final.split(",")
        final_list.append("OD")
        final_final = ",".join(final_list)
        self.__ensure_no_duplicates(final_final)
    
    def validate(self, settings: str) -> bool:
        settings_valid = False
        pattern = r"([A-Za-z%0-9]{2},)*([A-Za-z%0-9]{2})$"
        match = re.fullmatch(pattern, settings)
        if match:
            settings_valid = True
        return settings_valid
        
    def __str__(self):
        return f"{self.final}"
    
    def __repr__(self):
        return f"{self.final}"

Improved class

In [9]:
import re
from typing import List

class TableOptions:
    """WinCross Script table settings"""
    _DEFAULT_SETTINGS = "OR,OV,OI,O%,RV,S1,P0,V2,SA,SP"
    _VALID_SETTINGS = set(_DEFAULT_SETTINGS.split(",") + ["OD", "OO", "OS", "OW", "ON", "OV", "OH", "CP", "DW", "O$", "OC", "OT", "ST", "OF", "S", "F", "F1", "F2", "F3", "P0", "P1", "P2", "P3", "OA", "R%", "RA", "RT#"])
    _FREQ_SETTINGS = set(["F1", "F2", "F3"])
    _PERCENT_SETTINGS = set(["P0", "P1", "P2", "P3"])

    def __init__(self):
        self.final = self._ensure_no_duplicates(self._DEFAULT_SETTINGS)

    @staticmethod
    def _ensure_no_duplicates(settings: str) -> str:
        """Return a string of unique, sorted settings from the input string."""
        return ",".join(sorted(set(settings.split(","))))

    def add_rank_descending(self) -> None:
        """Add 'OD' to the settings if it's not already there."""
        if "OD" not in self.final:
            self.final += ",OD"
            self.final = self._ensure_no_duplicates(self.final)

    def validate(self, settings: str) -> bool:
        """Check if the settings string is valid."""
        settings_set = set(settings.split(","))
        if not settings_set.issubset(self._VALID_SETTINGS):
            return False
        if len(self._FREQ_SETTINGS.intersection(settings_set)) > 1:
            return False
        if len(self._PERCENT_SETTINGS.intersection(settings_set)) > 1:
            return False
        return True

    def __str__(self) -> str:
        return self.final

    def __repr__(self) -> str:
        return f"TableSettings({self.final})"

In [10]:
settings = TableOptions()
settings

TableSettings(O%,OI,OR,OV,P0,RV,S1,SA,SP,V2)

In [12]:
settings.add_rank_descending()

In [14]:
print(settings)

O%,OD,OI,OR,OV,P0,RV,S1,SA,SP,V2


Table options

OQ = Show the Total row
OR = Show Frequencies -- Only per request
OV = Show Vertical percents
OH = Show Horizontal percents -- Only for 1 client
OB = Hide blank rows -- Only for the same 1 client
OI = Indent levels on NET and SUB rows
O% = Show percent sign on percents
F1 = Show one decimal place on frequencies -- either or F2
F2 = Show two decimal places on frequencies -- either or F1
P0 = Show no decimal places on percents -- either or P1/P2
P1 = Show one decimal place on percents -- either or P0/P2
P2 = Show two decimal places on percents -- either or P0/P1

**Rank**
OD = Rank descending
OA = Rank ascending
R% = Rank by percents/means (Default: Rank by frequencies/means)

**Statistics Options**
SB = Show Sample size for statistic base
SM = Show Mean
SV = Show Standard deviation
SR = Show Standard error
SD = Show Median

**Statistic Decimal Places**
S0 = (Central tendency) Show no decimal places on statistics (except Standard deviation and Standard error)
S1 = (Central tendency) Show one decimal place on statistics (except Standard deviation and Standard error)
S2 = (Central tendency) Show two decimal places on statistics (except Standard deviation and Standard error)
S3 = (Central tendency) Show three decimal places on statistics (except Standard deviation and Standard error)
V0 = (Variability) Show no decimal places on Standard deviation and Standard error
V1 = (Variability) Show one decimal place on Standard deviation and Standard error
V2 = (Variability) Show two decimal places on Standard deviation and Standard error
V3 = (Variability) Show three decimal places on Standard deviation and Standard error

## Row Options