In [1]:
## set up
import io
import requests
import pandas as pd
from pandas import DataFrame
pd.options.display.max_columns = 200
pd.options.display.max_rows = 4000
from sklearn.model_selection import train_test_split




In [2]:
SHEETURL: str = (
    "https://docs.google.com/spreadsheets/d/"
    "1sptWDnGOyRcEyCHFYhyC8Y_zXGGM5jMpePRVusoSkFs/"
    "edit?resourcekey=&gid=1530912831#gid=1530912831"
)

# format for CSV https://docs.google.com/spreadsheets/d/
# <SHEET_ID>/gviz/tq?tqx=
# out:csv&sheet=<SHEET_NAME>
SHEET_CSV_URL: str = (
    "https://docs.google.com/spreadsheets/d/"
    "1sptWDnGOyRcEyCHFYhyC8Y_zXGGM5jMpePRVusoSkFs/"
    "gviz/tq?tqx=out:csv&sheet=Altered/congregrated data"
)

In [3]:
# global variables for our current data purposes
Y_COLS = ["On a scale of 1 - 5 how successful do you feel you are in SEAL lab?",
            "On a scale of 1 - 5, how successful to do you feel your teammates are in SEAL lab?",
            "On a scale of 1 - 5, how successful do your peers think you are in SEAL lab?",
            "On whole, how would you rate your satisfaction in SEAL lab?"
            ]
#G:N
X_DEMO_COLS = ["What group are you primarily affiliated with in SEAL Life (shows up in SEAL clan life)?",
            "AGE (Congregated)",
            "Gender (CONGREGATED)",
            "How do you describe your sexual orientation?",	
            "Which categories best describe you?",
            "Do you have any chronic condition that substantially limit your life activities?",
            "If you have a disability, please indicate (if comfortable) the terms"
            "that best describe the condition(s)",	
            "Which economic class do you identify with?",
            "RELIGION (Congregated)"
            ]
# O:U
X_GAME_COLS = ["When playing games, I am most motivated by...",
            "I consider myself to be...",
            "When playing games, I consider myself to be...",
            "When playing games, I am generally...",
            "When playing games, I prefer to be...",
            "When playing games, I consider myself to be...",
            "When playing games, I generally..."
            ]
#C, V:AJ needs to be converted to scale of 1-5
X_SEAL_COLS = ["When I use the SEAL Sudoku Sheet Tools, I feel like I am playing a game.",
            "I consider myself to be highly experienced with the SEAL Sheet Tools.",
            "I find the Sudoku Sheet Tools to be aesthetically pleasing.",
            "I think SEAL rank reflect my work and my team's work accurately.",
            "I think SEAL leaderboard reflect my work and my team's work accurately.",
            "I think SEAL YBR reflect my work and my team's work accurately.",
            "I think SEAL VisTools reflect my work and my team's work accurately.",
            "I think SEAL RaceTrack reflect my work and my team's work accurately.",
            "I think SEAL Battle Station reflect my work and my team's work accurately.",
            "I think SEAL Command Center reflect my work and my team's work accurately.",
            "I understand what my SEAL statistics mean (Lab HP, Sheet HP, YBR Gold Delta, and Training Score).",
            "I know exactly how my actions affect my lab statistics (Lab HP, Sheet HP, YBR Gold Delta, and Training Score).",
            "Using the Sudoku Sheet Tools helps me and my team stay on track.",	
            "Using the Sudoku Sheet Tools encourages me to take risks and challenge myself.",
            "Using the Sudoku Sheet Tools makes my work in SEAL more enjoyable."
            ]
#AO:AX
X_USABILITY_COLS = ["I think that I would like to use this system frequently.",
                    "I found the system unnecessarily complex.",
                    "I thought the system was easy to use.",
                    "I think that I would need the support of a technical person to be able to use this system.",
                    "I found the various functions in this system were well integrated.",
                    "I thought there was too much inconsistency in this system.",
                    "I would imagine that most people would learn to use this system very quickly.",
                    "I found the system very cumbersome to use.",	
                    "I felt very confident using the system.",	
                    "I needed to learn a lot of things before I could get going with this system."
                ]
X_DROP_COLS = ['Timestamp',
                "Sudoku Sheet Tools are all the tools you use when actively engaging with SEAL life."
                " Like Sudoku Clan Life, Dashboard, VisTools, RaceTrack, YBR, Kanban, Rank, Battle station, Venue, etc.",
                "What groups are you affiliated with in SEAL Life?",
                "Have you ever developed software as a programmer for Sudoku Sheet Tools?",
                "What is your current age?",
                "On scale of 1-10, how confusing were the questions on this survey?",
                "If you have any, we appreciate any additional feedback on the structure and questions within the survey",
                "SUS Overall score",	
                "Learnability subscore",
                "Usability subscore"
]

In [4]:
def get_data() -> DataFrame:  # idealy we dont want to us Any, but for now
    """function to get the data from the google sheet
    raises: HTTPError: if the request fails (meaning url wrong or no inter)

    @returns: @type(DataFrame): the data from the google sheet"""

    response = requests.get(SHEET_CSV_URL)
    response.raise_for_status()  # Raise error if request fails
    df: DataFrame = pd.read_csv(io.StringIO(response.text))
    return df

In [92]:
## ORIGINAL
data = get_data()
columns = data.columns
#print(columns)
data.shape # 68 x 55
#data.head()
X_SEAL_COLS = columns[21:36]
print(X_SEAL_COLS)

Index(['When I use the SEAL Sudoku Sheet Tools, I feel like I am playing a game. ',
       'I consider myself to be highly experienced with the SEAL Sheet Tools.',
       'I find the Sudoku Sheet Tools to be aesthetically pleasing.',
       'I think SEAL rank reflect my work and my team's work accurately.',
       'I think SEAL leaderboard reflect my work and my team's work accurately.',
       'I think SEAL YBR reflect my work and my team's work accurately.',
       'I think SEAL VisTools reflect my work and my team's work accurately.',
       'I think SEAL RaceTrack reflect my work and my team's work accurately.',
       'I think SEAL Battle Station reflect my work and my team's work accurately.',
       'I think SEAL Command Center reflect my work and my team's work accurately.',
       'I understand what my SEAL statistics mean (Lab HP, Sheet HP, YBR Gold Delta, and Training Score).',
       'I know exactly how my actions affect my lab statistics (Lab HP, Sheet HP, YBR Gold Delta, 

In [None]:

#print(columns)
X_SEAL_COLS = columns[21:36]
#print(X_SEAL_COLS)

x_data = data.drop(columns = X_DROP_COLS + Y_COLS) # 68 x 49
#handle X_SEAL_COLS: map disagree - agree as 1-5
options_map = {'Strongly disagree': 1,
                'Disagree': 2, 
                'Neutral': 3,
                'Agree': 4,
                'Strongly agree': 5
            }
x_data[X_SEAL_COLS] = x_data[X_SEAL_COLS].replace(options_map)
# one-hot-encoding for categorical data (demographics)
x_data = pd.get_dummies(x_data)
print(x_data.columns)

y1_data, y2_data, y3_data, y4_data = [data[col].to_numpy() for col in Y_COLS]

Index(['When playing games, I consider myself to be...',
       'When playing games, I am generally...',
       'When playing games, I prefer to be...',
       'When playing games, I consider myself to be....1',
       'When playing games, I generally...',
       'When I use the SEAL Sudoku Sheet Tools, I feel like I am playing a game. ',
       'I consider myself to be highly experienced with the SEAL Sheet Tools.',
       'I find the Sudoku Sheet Tools to be aesthetically pleasing.',
       'I think SEAL rank reflect my work and my team's work accurately.',
       'I think SEAL leaderboard reflect my work and my team's work accurately.',
       ...
       'When playing games, I am most motivated by..._Mastering the game, Earning the most points',
       'When playing games, I am most motivated by..._Mastering the game, Earning the most points, Working with a team',
       'When playing games, I am most motivated by..._Mastering the game, Feeling immersed in the story/plot',
       'W

In [93]:
def split_xy(data, drop_cols):
    """function to split the x and y data into separate ndarrays based on
    a set of columns to be dropped

    @parameter: data @type(DataFrame): unprocessed data 
    @parameter: drop_cols @type(ndarray): array of names of columns to drop

    @returns: @type(ndarray): relevant x-values from data
    @returns: @type(ndarray): y-values from data"""

    x_data = data.drop(columns = drop_cols + Y_COLS) # 68 x 49

    #handle X_SEAL_COLS: map disagree - agree as 1-5
    # X_SEAL_COLS = data.columns[21:36]
    options_map = {'Strongly disagree': 1,
                    'Disagree': 2, 
                    'Neutral': 3,
                    'Agree': 4,
                    'Strongly agree': 5
                }
    x_data[X_SEAL_COLS] = x_data[X_SEAL_COLS].replace(options_map)

    # one-hot-encoding for categorical data (demographics)
    x_data = pd.get_dummies(x_data).to_numpy()
    #y1_data, y2_data, y3_data, y4_data = [data[col].to_numpy() for col in Y_COLS]
    y_data = data[Y_COLS].to_numpy()
    return x_data, y_data



In [99]:
x_data, y_data = split_xy(data, X_DROP_COLS)
x_train, x_temp, y_train, y_temp = train_test_split(x_data, y_data, test_size=0.3)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.33)

print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)
print(x_test.shape, y_test.shape)

(47, 109) (47, 4)
(14, 109) (14, 4)
(7, 109) (7, 4)


In [None]:
def standardize(x_train, x_val, x_test):
    """function that standardizes data to normal gaussian distribution. Standardization
    calculation is applied only to the training data.

    @parameter: x_train @type(nd.array) processed x training data to be standardized 
    @parameter: x_val @type(nd.array) processed x val data to be standardized 
    @paremeter: x_test @type(nd.array) processed x-test data to be standardized
    """
    scaler = StandardScaler().fit(x_train) # only fit on training data
    x_train_stand = scaler.transform(x_train)
    x_val_stand = scaler.transform(x_val)
    x_test_stand = scaler.transform(x_test)

    return x_train_stand, x_val_stand, x_test_stand

In [11]:
def main():
    """main function to run the script"""
    # Code to be executed when the script is run directly

    # print('hello world')
    print(get_data())
    # Call other functions or perform operations here
    # process data
    # separate data into sets
    # normalization
    # model