In [1]:
from __future__ import annotations
from typing import List
import csv

class Group:
    def __init__(self, total_time: float = None, members: List[str] = None):
        self.total_time = total_time
        self.members = members if members is not None else []
    
    def __str__(self):
        members_str = ", ".join(self.members)
        return f"Total time: {self.total_time}\nMembers: {members_str}\n"

class Time:
    def __init__(self, total_time: float = None, groups: List[Group] = None):
        self.total_time = total_time
        self.groups = groups if groups is not None else []
    
    def __str__(self):
        return f"Total time: {self.total_time}\nGroups: {'/n'.join([str(group) for group in self.groups])}"

class Reply:
    def __init__(self, row, survey:Survey = None):
        index = 0
        index_dict_iterator = iter(survey.index_dict)

        # delete all columns that are not needed
        for i in survey.deletions:
            del row[i]
    
        
        section = next(index_dict_iterator)     # section = 'metadata'
        members = survey.index_dict[section]    # members = ['Response ID', ...]
        member_iterator = iter(members)         # member_iterator = iter(['Response ID', ...])
        end = index + len(members)
        while index < end:
            value = next(member_iterator)       # value = 'Response ID'
            if value == "Response ID":
                self.response_id = row[index]
            elif value == "Date submitted":
                self.date_submitted = row[index]
            elif value == "Last page":
                self.last_page = row[index]
            elif value == "Start language":
                self.start_language = row[index]
            elif value == "Seed":
                self.seed = row[index]
            elif value == "Date started":
                self.date_started = row[index]
            elif value == "Date last action":
                self.date_last_action = row[index]
            elif value == "Referrer URL":
                self.referrer_url = row[index]
            index += 1

        # entry 8 to 74 are the requirements
        # entry 43 and 48 need to be skipped, they are outdated
                
        self.scores = []

        section = next(index_dict_iterator)     # section = 'requirements'
        members = survey.index_dict[section]    # members = [1, 2, ...]
        end = index + len(members)

        while index < end:
            self.scores.append(row[index])
            index += 1

        section = next(index_dict_iterator)     # section = 'requirements'
        members = survey.index_dict[section]    # members = ['age', 'education', ...]
        member_iterator = iter(members)         # member_iterator = iter(['Response ID', ...])
        end = index + len(members)
        while index < end:
            value = next(member_iterator)       # value = 'Response ID'
            if value == "demographics":
                self.response_id = row[index]
            elif value == "age":
                self.age = row[index]
            elif value == "education":
                self.education = row[index]
            elif value == "education_other":
                self.education_other = row[index]
            elif value == "survey_count":
                self.survey_count = row[index]
            elif value == "experience":
                self.experience = row[index]
            elif value == "comments":
                self.comments = row[index]
            index += 1

        # the time is an object and holds nested values for the 10 groups
        self.time = Time()
        self.time.total_time = row[index]
        index += 1

        # iterate over groups
        while index < len(row)-1:
            if survey.labels[index].startswith("Group time:"):
                self.time.groups.append(Group())
                self.time.groups[-1].total_time = row[index]
            else:
                self.time.groups[-1].members.append(row[index])
            index += 1
        
    def __str__(self):
        return f"Reply: {self.response_id}\nDate submitted: {self.date_submitted}\nLast page: {self.last_page}\nStart language: {self.start_language}\nSeed: {self.seed}\nDate started: {self.date_started}\nDate last action: {self.date_last_action}\nReferrer URL: {self.referrer_url}\nScores: {self.scores}\nAge: {self.age}\nEducation: {self.education}\nEducation other: {self.education_other}\nSurvey count: {self.survey_count}\nExperience: {self.experience}\nComments: {self.comments}\n{self.time}\n"

class Survey:
    # different types of survey, with different selection_last_index and deletions
    types = {
        "default": {
            "index_dict": {
                "metadata": [
                    "Response ID",
                    "Date submitted",
                    "Last page",
                    "Start language",
                    "Seed",
                    "Date started",
                    "Date last action",
                    "Referrer URL",
                ],
                "requirements" : [i for i in range(1, 65)],
                "demographics": [
                    "age",
                    "education",
                    "education_other",
                    "survey_count",
                    "experience",
                    "comments",
                ],
                "time" : -1,
            },
            "deletions": [],
            "expected_last_page": "10",
        },
        "555283": {
            "index_dict": {
                "metadata": [
                    "Response ID",
                    "Date submitted",
                    "Last page",
                    "Start language",
                    "Seed",
                    "Date started",
                    "Date last action",
                    "Referrer URL",
                ],
                "requirements" : [i for i in range(1, 65)],
                "demographics": [
                    "age",
                    "education",
                    "education_other",
                    "survey_count",
                    "experience",
                    "comments",
                ],
                "time" : -1,
            },
            "deletions": [51, 56],
            "expected_last_page": "10",
        },
        "628237": {
            "index_dict": {
                "metadata": [
                    "Response ID",
                    "Date submitted",
                    "Last page",
                    "Start language",
                    "Seed",
                    "Date started",
                    "Date last action",
                    "Referrer URL",
                    "Please choose a tool you want to evaluate for this survey.   ",
                    "Please choose a tool you want to evaluate for this survey [Other]"
                ],
                "requirements" : [i for i in range(1, 65)],
                "demographics": [
                    "age",
                    "education",
                    "education_other",
                    "survey_count",
                    "experience",
                    "comments",
                ],
                "time" : -1,
            },
            "deletions": [53, 58],
            "expected_last_page": "11",
        },
    }

    # initialize survey
    def __init__(self, labels: List[str] = None, replies: List[Reply] = None, type: str = "default", csv_file: str = None):
        self.labels = labels if labels is not None else []
        self.replies = replies if replies is not None else []
        # dict of indexes of each section
        self.index_dict = {}
        self.deletions = []

        if csv_file is not None:
            self.init_from_csv(csv_file, type)
    
    def __str__(self):
        return f"Labels: {self.labels}\nReplies: {'/n'.join([str(reply) for reply in self.replies])}"
    
    # init from csv file
    def init_from_csv(self, csv_file: str, type: str = "default"):
        self.type = type
        self.index_dict = self.types[type]["index_dict"]
        self.deletions = self.types[type]["deletions"]
        expected_last_page = self.types[type]["expected_last_page"]

        with open(csv_file, 'r') as file:
            csv_reader = csv.reader(file)

            self.labels = next(csv_reader)

            rows = []

            for row in csv_reader:
                rows.append(row)
            
            last_page_index = self.labels.index("Last page")

            for i, row in enumerate(rows):
                # Skip rows that did not complete the survey
                if not row or row[last_page_index] != expected_last_page:
                    continue
                # Create a reply object
                reply = Reply(row, self)

                # Append the reply to the list of replies
                self.replies.append(reply)

In [2]:
import glob
import os
from typing import List

# Specify the folder where your CSV files are located
bnw_data_path = os.path.join("c:\\", "workspace", "borgnetzwerk", "tools", "scripts", "SWARM-SLR", "data")
folder_path = os.path.join("c:\\", "workspace", "surveys")

# Create a pattern to match files starting with "results-survey" and ending with ".csv"
file_pattern = os.path.join(folder_path, 'results-survey*.csv')

# Get a list of file paths matching the pattern
csv_files = glob.glob(file_pattern)

surveys:List[Survey] = []

# Iterate through each CSV file
for csv_file in csv_files:
    # print(f"Reading data from file: {csv_file}")
    filename = os.path.basename(csv_file)
    type = filename.split("results-survey")[1].split(".csv")[0]
    surveys.append(Survey(csv_file=csv_file, type=type))

IndexError: list index out of range

In [None]:
import matplotlib.pyplot as plt
from  matplotlib.colors import LinearSegmentedColormap

def create_box_plot(data, labels=None, vert=True):
    font = {'size'   : 4}
    # plt.minorticks_on()


    plt.rc('font', **font)
    c = ["#FF7F0E","#FFBB78","#FFBB78","white","#AEC7E8","#AEC7E8","#1F77B4"]
    v = [0,.175,.4,.5,0.6,.825,1.]
    l = list(zip(v,c))

    cmap=LinearSegmentedColormap.from_list('rg',l, N=256)

    # agreement = ["strongly agree", "", "agree", "", "neither agree\nnor disagree", "", "disagree", "", "strongly disagree"]
    agreement = ["strongly agree", "agree", "neither agree\nnor disagree", "disagree", "strongly disagree"]

    if vert:
        fig, ax = plt.subplots(dpi=300, figsize=(30, 1))
        plotlim = plt.xlim(0,len(data)+1) + plt.ylim(10,0)  

        ax.imshow([[0.5,0.5],[0,0]], cmap=cmap, interpolation='bicubic', extent=plotlim)  
        
        ax.set_ylabel('agreement with requirement')
        # set the x ticks from 1 to 9
        # ax.set_yticks(range(1,10))
        ax.set_yticks([1, 3, 5, 7, 9])
        ax.set_yticklabels(agreement)

        ax.set_xlabel('SLR-tool requirement')
        plt.setp(ax.get_xticklabels(), rotation=90, horizontalalignment='right')

    else:
        fig, ax = plt.subplots(dpi=300, figsize=(1, 30))
        plotlim = plt.xlim(1,9) + plt.ylim(len(data),0)  

        ax.imshow([[1,0],[1,0]], cmap=cmap, interpolation='bicubic', extent=plotlim)  
      
        ax.set_xlabel('Values')

        ax.set_ylabel('Requirement')

    ax.set_title('Tool Assisted Literature Surveys - A Requirements Review')
    ax.boxplot(data, vert=vert, labels=labels)


    # ax.legend(labels, title='Categories', loc='upper left', bbox_to_anchor=(1, 1))

    plt.show()
    path = os.path.join(bnw_data_path, 'Requirements Review.png')
    path = os.path.join(bnw_data_path, 'Requirements Review.svg')
    fig.savefig(path, bbox_inches='tight')

# Example usage:
csv_file_path = 'your_data.csv'

data = []
labels = []

for survey in surveys:
    for reply in survey.replies:
        for i, score in enumerate(reply.scores):
            res = None
            if score:
                res = int(score[1:])
            if i >= len(data):
                data.append([])
            if res:
                data[i].append(res)
        if not labels:
            # this is horribly large
            labels = reply.questions
            for i, label in enumerate(labels):
                labels[i] = f"R{i+1}"

    if not data:
        print(f"File '{csv_file_path}' not found.")
        data = [
            [1, 2, 3, 4, 5],
            [2, 3, 4, 5, 6],
            [3, 4, 5, 6, 7]
        ]
        labels = ['Category 1', 'Category 2', 'Category 3']


    if data:
        create_box_plot(data, labels)