## Step 1: load data and clean missing values

In [1]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [2]:
import pandas as pd
from numpy import nan
import numpy as np
data = pd.read_csv("/drive/My Drive/Colab Notebooks/data/CCST Survey_20221109_Text_Answers.csv")

In [3]:
data=data[['myslice.satisfied','myslice.feedback']].copy()
display(data)

Unnamed: 0,myslice.satisfied,myslice.feedback
0,Agree,
1,Disagree,
2,Agree,
3,Agree,
4,Agree,
...,...,...
1718,Somewhat agree,
1719,Disagree,Nothing is where I want it to be in MySlice. E...
1720,Strongly agree,
1721,Somewhat agree,The credit card reconciliation process is horr...


In [4]:
data=data.dropna()
display(data)

Unnamed: 0,myslice.satisfied,myslice.feedback
6,Agree,"Clearer directions, help contact information, ..."
17,Agree,Can be difficult to navigate when trying to fi...
19,Strongly disagree,The platform switch to icons rather than heade...
24,Agree,I'd really like the option to set the default ...
34,Somewhat agree,I do not find Myslice as intuitive to use as i...
...,...,...
1673,Somewhat agree,I am not always 100 percent clear on where to ...
1703,Disagree,The lack of support in many areas of myslice w...
1714,Strongly agree,PLEASEEEEEEEEEE dont' change it!!! I just g...
1719,Disagree,Nothing is where I want it to be in MySlice. E...


export as CSV files as headers

In [None]:
data.to_csv("myslice-calssification.csv")

## Step 2: Split the data into training data and test data by 30%

In [5]:
y=data['myslice.satisfied'].values
X=data['myslice.feedback'].values

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1337)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(X_train[0])
print(y_train[0])
print(X_test[0])
print(y_test[0])

(336,) (336,) (144,) (144,)
We have too many platforms that do not integrate with MySlice and it's always like pulling teeth to find what it is I am looking for. When I finally learn where something is--it changes and little communication goes out to inform us of these changes.
Strongly disagree
I'm not sure it is organized in a logical fashion.
Agree


In [7]:
# Check if the dataset is balanced(the number of training examples in each category)
unique, counts = np.unique(y_train, return_counts=True)
print(np.asarray((unique, counts)))

[['Agree' 'Disagree' 'Somewhat agree' 'Somewhat disagree'
  'Strongly agree' 'Strongly disagree']
 [110 37 108 36 21 24]]


## Step 3: vectorization(unigram)

In [8]:
my_stopwords=frozenset(['might', 'take', 'ourselves', 'by', 'everything', 'therein', 'twenty', 'others', 'keep', 'ever', 'my', 'been', 'top',  'via', 'are', 'onto', 'would', 'most', 'rather', 'except', 'should', 'per', 'wherein', 'third', 'yourselves', 'nine', 'is', 'namely', 'besides',  'down', 'anyone', 'within', 'latter', 'whose', 'bill', 'throughout', 'until', 'former', 'anything', 'i', 'call', 'thru', 'put', 'see', 'meanwhile', 'very', 'whoever', 'somehow', 'she', 'hereby', 'beside', 'full', 'then', 'something', 'whether', 'can', 'me', 'wherever', 'yours', 'towards', 'together', 'upon', 'name', 'again', 'therefore', 'alone', 'between', 'whom', 'the', 'interest', 'done', 'hereupon', 'them', 'than', 'con', 'eg', 'these', 'becoming', 'much', 'beforehand', 'under', 'too', 'another', 'whither', 'ie', 'un', 'and', 'above', 'amongst', 'across', 'moreover', 'own', 'both', 'other', 'hers', 'find', 'was', 'an', 'many', 'de', 'indeed', 'three', 'twelve', 'serious', 'otherwise', 'thick', 'latterly', 'they', 'thin', 'whatever', 'how', 'same', 'anyway', 'once', 'herself', 'will', 'well', 'give', 'being', 'thereafter', 'seemed', 'sometimes', 'up', 'empty', 'fifteen', 'thereupon', 'where', 'co', 'from', 'whereas', 'over', 'am', 'itself', 'somewhere', 'along', 'which', 'forty', 'also', 'as', 'for', 'amoungst', 'after', 'of', 'back', 'thereby', 'it', 'detail', 'front', 're', 'us', 'five', 'such', 'whole', 'almost', 'made', 'their', 'mine', 'at', 'may', 'sixty', 'before', 'must', 'everywhere', 'amount', 'first', 'already', 'etc', 'eight', 'through', 'perhaps', 'several', 'that', 'mill', 'more', 'whence', 'due', 'each', 'themselves', 'become', 'seems', 'cry', 'into', 'still', 'your', 'why', 'sometime', 'some', 'whenever', 'please', 'neither', 'because', 'herein', 'has', 'this', 'among', 'during', 'when', 'one', 'there', 'ltd', 'hence', 'becomes', 'whereby', 'inc', 'everyone', 'last', 'against', 'out', 'he', 'move', 'all', 'now', 'either', 'bottom', 'nevertheless', 'fifty', 'side', 'hundred', 'his', 'so', 'else', 'or', 'were', 'you', 'further', 'yet', 'here', 'had', 'beyond', 'if', 'part', 'below', 'who', 'be', 'around', 'someone', 'whereupon', 'behind', 'seem', 'ten', 'enough', 'four', 'about', 'in', 'off', 'though', 'since', 'elsewhere', 'anywhere', 'myself', 'sincere', 'hereafter', 'system', 'on', 'ours', 'could', 'with', 'seeming', 'get', 'thence', 'two', 'always', 'toward', 'a', 'yourself', 'do', 'himself', 'her', 'next', 'have', 'mostly', 'formerly', 'we', 'eleven', 'found', 'whereafter', 'anyhow', 'became', 'thus', 'what', 'fire', 'describe', 'him', 'those', 'to', 'afterwards', 'even', 'every', 'fill',  'often', 'its', 'our', 'show', 'go', 'six'])
print(len(my_stopwords))

295


In [9]:
added_stopwords = frozenset(['Myslice', 'myslice'])
my_stopwords =my_stopwords.union(added_stopwords)

In [10]:
print(len(my_stopwords))
print(my_stopwords)

297
frozenset({'my', 'herein', 'yourself', 'across', 'namely', 'where', 'anything', 'those', 'same', 'thereafter', 'has', 'elsewhere', 'so', 'everything', 'why', 'will', 'its', 'whereafter', 'onto', 'top', 'many', 'another', 'am', 'i', 'once', 'that', 'in', 'yours', 'anyway', 'fifty', 'what', 'this', 'mostly', 'hence', 'become', 'cry', 'part', 'thence', 'therefore', 'detail', 'wherein', 'co', 'much', 'several', 'most', 'should', 'one', 'back', 'them', 'amoungst', 'somehow', 'already', 'are', 'hereafter', 'whatever', 'him', 'amongst', 'whose', 'becomes', 'there', 'it', 'ie', 'wherever', 'again', 'three', 'mill', 're', 'except', 'up', 'be', 'and', 'nine', 'though', 'beyond', 'interest', 'well', 'hereupon', 'towards', 'whether', 'could', 'before', 'last', 'forty', 'next', 'rather', 'mine', 'own', 'herself', 'would', 'formerly', 'until', 'too', 'themselves', 'must', 'because', 'empty', 'how', 'thus', 'eg', 'hereby', 'were', 'due', 'ourselves', 'she', 'put', 'whereupon', 'per', 'fire', 'alw

#Step 3.1.1: vectorize the training data(unigram)

In [19]:
# Read the sklearn documentation to understand all vectorization options
from sklearn.feature_extraction.text import CountVectorizer
#  unigram term frequency vectorizer, set minimum document frequency to 2
unigram_count_vectorizer = CountVectorizer(encoding='latin-1', binary=False, analyzer= 'word', token_pattern=r"(?u)\b\w\w+\b|!|\?|\"|\'", min_df=2, stop_words=my_stopwords)

In [20]:
# fit vocabulary in training documents and transform the training documents into vectors
X_train_vec_uni = unigram_count_vectorizer.fit_transform(X_train)

# check the content of a document vector
print(X_train_vec_uni.shape)
print(X_train_vec_uni[0].toarray())

# check the size of the constructed vocabulary
print(len(unigram_count_vectorizer.vocabulary_))

# print out the first 10 items in the vocabulary
print(list(unigram_count_vectorizer.vocabulary_.items())[:10])

(336, 626)
[[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
  0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0

#Step 3.1.2: vectorize the test data(unigram)

In [21]:
# use the vocabulary constructed from the training data to vectorize the test data. 
# Therefore, use "transform" only, not "fit_transform", 
X_test_vec_uni = unigram_count_vectorizer.transform(X_test)

# print out #examples and #features in the test set
print(X_test_vec_uni.shape)

(144, 626)


# Step 4: Train a SVM classifier

In [22]:
# import the LinearSVC module
from sklearn.svm import LinearSVC

# initialize the LinearSVC model
svm_clf = LinearSVC(C=1)

# use the training data to train the model
svm_clf.fit(X_train_vec_uni,y_train)

LinearSVC(C=1)

# Step 4.1 Interpret a trained LinearSVC model

In [23]:
## For category "Agree" , get all features and their weights and sort them in increasing order
feature_ranks = sorted(zip(svm_clf.coef_[0], unigram_count_vectorizer.get_feature_names_out()))

## get the 10 features that are best indicators of agree sentiment (they are at the bottom of the ranked list)
agree_10 = feature_ranks[-10:]
print("agree words")
for i in range(0, len(agree_10)):
    print(agree_10[i])
print()

## get 10 features that are least relevant to "agree" sentiment (they are at the top of the ranked list)
not_agree_10 = feature_ranks[:10]
print("least agree words")
for i in range(0, len(not_agree_10)):
    print(not_agree_10[i])
print()

agree words
(0.7941227554660653, 'locate')
(0.8038622374376496, 'changed')
(0.8186664014386394, 'features')
(0.8671496790196392, 'pretty')
(0.8723453669482062, 'navigator')
(0.906263343382213, 'did')
(0.9239709904489642, 'options')
(0.9843657989522003, 'department')
(1.0293750219857378, 'communications')
(1.1160586206251568, 'guess')

least agree words
(-0.9671730693508949, 'needs')
(-0.8985626744812114, 'slow')
(-0.8681669820320476, 'platform')
(-0.8485043814035732, 'understand')
(-0.6816630454109232, 'frustrating')
(-0.6789052771819105, 'browser')
(-0.6781130248276659, 'thing')
(-0.6777470756710425, 'organized')
(-0.6739743826625615, 'nothing')
(-0.6472275877168207, 'logging')



# Step 5: Test the LinearSVC classifier

In [24]:
# test the classifier on the test data set, print accuracy score
svm_clf.score(X_test_vec_uni,y_test)

0.2847222222222222

In [25]:
# print confusion matrix and classification report

from sklearn.metrics import confusion_matrix
y_pred = svm_clf.predict(X_test_vec_uni)
cm=confusion_matrix(y_test, y_pred)
print(cm)
print()

from sklearn.metrics import classification_report
target_names = ['Agree', 'Disagree', 'Somewhat agree', 'Somewhat disagree','Strongly agree', 'Strongly disagree']
print(classification_report(y_test, y_pred, target_names=target_names))

[[15  3 12  5  3  1]
 [ 8  2  6  2  1  2]
 [16  1 20  5  1  2]
 [ 6  3  9  0  0  0]
 [ 2  0  2  1  2  1]
 [ 6  1  1  1  2  2]]

                   precision    recall  f1-score   support

            Agree       0.28      0.38      0.33        39
         Disagree       0.20      0.10      0.13        21
   Somewhat agree       0.40      0.44      0.42        45
Somewhat disagree       0.00      0.00      0.00        18
   Strongly agree       0.22      0.25      0.24         8
Strongly disagree       0.25      0.15      0.19        13

         accuracy                           0.28       144
        macro avg       0.23      0.22      0.22       144
     weighted avg       0.27      0.28      0.27       144



# Step 5.1 Interpret the prediction result

In [26]:
## get the confidence scores for all test examples from each of the five binary classifiers
svm_confidence_scores = svm_clf.decision_function(X_test_vec_uni)
## get the confidence score for the first test example
print(svm_confidence_scores[0])

## sample output: array([-1.05306321, -0.62746206,  0.31074854, -0.89709483, -1.08343089]
## because the confidence score is the highest for category 2, 
## the prediction should be 2. 

## Confirm by printing out the actual prediction
print(y_test[0])
print(X[0])

[-1.3334605  -1.30989722  0.01500915 -0.27497381 -1.68967797 -0.67915851]
Agree
Clearer directions, help contact information, and info in Answers would be helpful


In [27]:
# output prediction probs

from sklearn.calibration import CalibratedClassifierCV
svm_calibrated = CalibratedClassifierCV(svm_clf) 
svm_calibrated.fit(X_train_vec_uni, y_train)
y_test_proba = svm_calibrated.predict_proba(X_test_vec_uni)
y_test_proba[1]

array([0.28263431, 0.11173411, 0.31870188, 0.14030985, 0.07826307,
       0.06835677])

# Step 5.2 Error Analysis

In [None]:
# print out specific type of error for further analysis

# print out the positive examples that are mistakenly predicted as negative
err_cnt = 0
for i in range(0, len(y_test)):
    if(y_test[i]=='Agree' and y_pred[i]=='Disagree'):
        print(X_test[i])
        err_cnt = err_cnt+1
print("Agree to Disagree errors:", err_cnt)

err_cnt = 0
for i in range(0, len(y_test)):
    if(y_test[i]=='Disagree' and y_pred[i]=='Agree'):
        print(X_test[i])
        err_cnt = err_cnt+1
print("Disagree to Agree errors:", err_cnt)

The HR section of MySlice is quite confusing.
TBH I am glad there is an effort to centralize these services so I don't want to complain to much, but my main issue is with the overall usability of myslice. It can be hard to navigate and find stuff, and I know it's "my slice" but some variation in colors (maybe by function) would be really helpful 
The panels are great in MySLice but when I try to click on the panel where I approve student time will log me out. The work around click on another tile 1st then click on the tile to approve the timesheet. Also, I don't like that I need to go to MySlice for the directory that used to be online. We have external constituents that need to contact us and now they don't have a way to find us in the directory. Staff and faculty are also not updating the directory with their extension or number to contact in MySlice - many people cannot be found in the directory making it difficult to contact them if they don't respond to email. 
Agree to Disagree e

#tf-idf

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_count_vectorizer = TfidfVectorizer(ngram_range=(1,2), analyzer= 'word', token_pattern=r"(?u)\b\w\w+\b|!|\?|\"|\'", stop_words=my_stopwords)

In [12]:
# fit vocabulary in training documents and transform the training documents into vectors
X_train_vec_tf = tfidf_count_vectorizer.fit_transform(X_train)

# check the content of a document vector
print(X_train_vec_tf.shape)
print(X_train_vec_tf[0].toarray())

# check the size of the constructed vocabulary
print(len(tfidf_count_vectorizer.vocabulary_))

# print out the first 10 items in the vocabulary
print(list(tfidf_count_vectorizer.vocabulary_.items())[:10])

(336, 5463)
[[0. 0. 0. ... 0. 0. 0.]]
5463
[('platforms', 3679), ('not', 3203), ('integrate', 2387), ("'", 54), ('like', 2642), ('pulling', 3849), ('teeth', 4713), ('looking', 2816), ('finally', 1771), ('learn', 2602)]


In [13]:
# use the vocabulary constructed from the training data to vectorize the test data. 
# Therefore, use "transform" only, not "fit_transform", 
X_test_vec_tf = tfidf_count_vectorizer.transform(X_test)

# print out #examples and #features in the test set
print(X_test_vec_tf.shape)

(144, 5463)


#step 4 train SVC

In [14]:
# import the LinearSVC module
from sklearn.svm import LinearSVC

# initialize the LinearSVC model
svm_clf = LinearSVC(C=0.1)

# use the training data to train the model
svm_clf.fit(X_train_vec_tf,y_train)

LinearSVC(C=0.1)

# Step 4.1 Interpret a trained LinearSVC model

In [15]:
## For category "Strongly Agree" , get all features and their weights and sort them in increasing order
feature_ranks = sorted(zip(svm_clf.coef_[0], tfidf_count_vectorizer.get_feature_names_out()))
## get the 10 features that are best indicators of agree sentiment (they are at the bottom of the ranked list)
agree_10 = feature_ranks[-20:]
print("strongly agree words")
for i in range(0, len(agree_10)):
    print(agree_10[i])
print()

feature_ranks = sorted(zip(svm_clf.coef_[5], tfidf_count_vectorizer.get_feature_names_out()))
## get 10 features that are most relevant to "strongly diagree" sentiment (they are at the top of the ranked list)
not_agree_10 = feature_ranks[-10:]
print("strongly disagree words")
for i in range(0, len(not_agree_10)):
    print(not_agree_10[i])
print()

strongly agree words
(0.13677916205053345, 'time')
(0.13741147735725634, 'hard information')
(0.14236721780755618, 'feedback')
(0.14236721780755618, 'no feedback')
(0.14452848205618304, 'easy navigate')
(0.14576083149240276, 'student')
(0.15040343165856915, 'changes communicated')
(0.15077267485891432, 'only')
(0.1510309102365599, 'department')
(0.1515753864304643, 'no comment')
(0.1596360985504045, 'didn')
(0.1596360985504045, "didn '")
(0.1716459618998053, 'updates changes')
(0.17287703285580547, 'information need')
(0.2010947772460513, 'usually')
(0.210937819084364, 'fine')
(0.22181145918573242, 'contact')
(0.23357244381224496, 'easy')
(0.29659274463411567, 'none')
(0.3959006918467258, 'changes')

strongly disagree words
(0.14748909425391432, 'jokey avoid')
(0.14748909425391432, 'platform jokey')
(0.14751200581782248, 'change')
(0.14768386248547088, 'awful update')
(0.14768386248547088, 'update change')
(0.15674619710640444, 'available help')
(0.15674619710640444, 'nobody')
(0.15674

In [16]:
# test the classifier on the test data set, print accuracy score
svm_clf.score(X_test_vec_tf,y_test)

0.3402777777777778

In [17]:
# print confusion matrix and classification report

from sklearn.metrics import confusion_matrix
y_pred = svm_clf.predict(X_test_vec_tf)
cm=confusion_matrix(y_test, y_pred)
print(cm)

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
print('precision in each category is:', precision_score(y_test, y_pred, average=None))
print('recall in each category is:', recall_score(y_test, y_pred, average=None))

from sklearn.metrics import classification_report
target_names = ['Agree', 'Disagree', 'Somewhat agree', 'Somewhat disagree','Strongly agree', 'Strongly disagree']
print(classification_report(y_test, y_pred, target_names=target_names))

[[26  0 13  0  0  0]
 [10  0 11  0  0  0]
 [23  0 22  0  0  0]
 [10  0  8  0  0  0]
 [ 1  0  6  0  1  0]
 [ 6  0  7  0  0  0]]
precision in each category is: [0.34210526 0.         0.32835821 0.         1.         0.        ]
recall in each category is: [0.66666667 0.         0.48888889 0.         0.125      0.        ]
                   precision    recall  f1-score   support

            Agree       0.34      0.67      0.45        39
         Disagree       0.00      0.00      0.00        21
   Somewhat agree       0.33      0.49      0.39        45
Somewhat disagree       0.00      0.00      0.00        18
   Strongly agree       1.00      0.12      0.22         8
Strongly disagree       0.00      0.00      0.00        13

         accuracy                           0.34       144
        macro avg       0.28      0.21      0.18       144
     weighted avg       0.25      0.34      0.26       144



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#Error analysis

In [18]:
# print out specific type of error for further analysis

# print out the "Strongly disagree" examples that are mistakenly predicted as "Agree"
# according to the confusion matrix, there should be 6 such examples

err_cnt = 0
for i in range(0, len(y_test)):
    if(y_test[i]=='Strongly disagree' and y_pred[i]=='Agree'):
        print(X_test[i])
        err_cnt = err_cnt+1
print("Strongly disagree examples that are mistakenly predicted as agree errors:", err_cnt)

E.g.: What is BFAS? Often, the items available to bring you to the next set of resources are confusing or ineffective. As a staff member, when is the first row available to me in regards to academics at SU? As a member of career services, how come I can't see resources available to my position or duties? Are there resources available to me (trainings, linkedin learning recommendations) regarding position type? What about non-traditional resources like housing or law clinics?
Usage of MySlice is very confusing and not ease of usage
I use My Slice daily, over 50% of the day for class management, student information, transcripts, and no problems with myslice, I think its great 
The new "tile" structure is much harder to use, since you can no longer tell which items are included within each tile.
The "new" interface is improved, but the information under the "faculty and advisors" tab makes me feel like I'm using a dial-up modem. The information is slow and doesn't easily integrate with al

In [None]:
# print out the "disagree" examples that are mistakenly predicted as "agree"
# according to the confusion matrix, there should be 12 such examples
err_cnt = 0
for i in range(0, len(y_test)):
    if(y_test[i]=='Disagree' and y_pred[i]=='Agree'):
        print(X_test[i])
        err_cnt = err_cnt+1
print("Disagree examples that are mistakenly predicted as agree errors", err_cnt)

Constantly getting timed out is frustrating as is switching from the Myslice student home page to the employee home page every time I sign on. The search function is good as there are a couple of things I can't figure out where they live.  Credit card reconciliation is nice that its all online and has a work flow but completing the form takes so much longer. Perhaps its a connectivity problem that I have to wait for every field of a chart string line.  At least in JPMC I could have a list of favorite chart string to choose from that would immediately populate the form.  I know ITS assists with Blackboard but as for Myslice I didn't know about help or support.  The role out of the new Myslice was terrible. Some prior communication could has softened the transition some (maybe).
Serious issue with student phone numbers not loading into Medicat properly, so if there is a mental health emergency we are unable to contact them by phone.  MySlice also has a known issue with updating address i

#sample-down to check if it is okay to reset labels


In [None]:
data = pd.read_csv("/drive/My Drive/Colab Notebooks/data/CCST Survey_20221109_Text_Answers.csv")
data=data[['myslice.satisfied','myslice.feedback']].copy()
data=data.dropna()
display(data)

df=data.sample(frac=0.10, random_state=1000)
display(df)

Unnamed: 0,myslice.satisfied,myslice.feedback
6,Agree,"Clearer directions, help contact information, ..."
17,Agree,Can be difficult to navigate when trying to fi...
19,Strongly disagree,The platform switch to icons rather than heade...
24,Agree,I'd really like the option to set the default ...
34,Somewhat agree,I do not find Myslice as intuitive to use as i...
...,...,...
1673,Somewhat agree,I am not always 100 percent clear on where to ...
1703,Disagree,The lack of support in many areas of myslice w...
1714,Strongly agree,PLEASEEEEEEEEEE dont' change it!!! I just g...
1719,Disagree,Nothing is where I want it to be in MySlice. E...


Unnamed: 0,myslice.satisfied,myslice.feedback
790,Somewhat agree,"The platform can be cumbersome to navigate, bu..."
99,Somewhat disagree,Clunky and outdated interface
1468,Strongly agree,Our Whitman IT Team is unrivaled in the level ...
179,Disagree,"I have problems with team time reporting, and ..."
327,Agree,It seems more and more functionality is moving...
782,Somewhat disagree,I find issues with various components of MySli...
243,Somewhat agree,its a significant upgrade from what was in pla...
1567,Somewhat agree,Sometime difficult to pick the keywords that r...
1195,Somewhat agree,I find the new format does not make it easy to...
212,Somewhat agree,Would have liked it if it was easier to connec...
