In [1]:
import pandas as pd

In [2]:
#importing the dataframe
df = pd.read_csv("./Data/Data.csv")

In [3]:
df.head()

Unnamed: 0,Privacy Text,Score
0,We are unable to respond to Do Not Track signa...,1
1,These tracking technologies collect informatio...,1
2,We and our third-party partners may also use c...,1
3,Company may share data collected from or about...,1
4,In the event that the company is involved in a...,1


In [4]:
df.shape

(227, 2)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 227 entries, 0 to 226
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Privacy Text  227 non-null    object
 1   Score         227 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 3.7+ KB


### Text preprocessing

#### Removing Punctuations and converting all word to lowercase

In [6]:
import string
import nltk


def remove_punctuation(text):
    no_punctuation_text = ''.join([i for i in str(text) if i not in string.punctuation])
    return no_punctuation_text.lower()

In [7]:
df['Privacy Text'] = df['Privacy Text'].apply(remove_punctuation)

In [8]:
df.head()

Unnamed: 0,Privacy Text,Score
0,we are unable to respond to do not track signa...,1
1,these tracking technologies collect informatio...,1
2,we and our thirdparty partners may also use co...,1
3,company may share data collected from or about...,1
4,in the event that the company is involved in a...,1


#### Removing nonwords and reducing word to it's lemma

In [9]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [10]:
import re 

def remove_nonwords(str_):
    return re.sub("[^A-Za-z ]\w+[^A-Za-z]*", ' ', str_)

# Lemmatization and Removing stop words and non words
def text_preprocessing(text):
    text = remove_nonwords(text)
    tokenized_text = [token.lemma_ for token in nlp(text)]
    no_stopwords_list = [i.lower() for i in tokenized_text if i not in nlp.Defaults.stop_words]
    lemma_text = ' '.join(no_stopwords_list)
    return lemma_text

In [11]:
# Preprocessing the Headline text
df['Privacy Text'] = df['Privacy Text'].apply(text_preprocessing)

In [12]:
df.head()

Unnamed: 0,Privacy Text,Score
0,unable respond track signal set browser time,1
1,track technology collect information use servi...,1
2,thirdparty partner use cookie tracking technol...,1
3,company share datum collect party partner faci...,1
4,event company involve merger acquisition bankr...,1


In [13]:
# dropping ALL duplicte values
df = df.drop_duplicates()

### Building Model

#### Split data into train and test sets

In [14]:
from sklearn.model_selection import train_test_split

X_train_nv, X_test_nv, y_train, y_test = train_test_split(df['Privacy Text'], df['Score'], 
                                                    train_size=0.8, 
                                                    random_state=42)

#### Vectorization

In [15]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.feature_selection import SelectKBest, chi2

vectorizer = TfidfVectorizer(max_features=500,stop_words=stopwords.words('english'))
X_train = vectorizer.fit_transform(X_train_nv).toarray()
X_test = vectorizer.transform(X_test_nv)

In [16]:
#vectorizer = CountVectorizer(ngram_range = (3,3))
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train_nv) 
X_test = vectorizer.transform(X_test_nv) 
features = (vectorizer.get_feature_names())

In [17]:
#Save the vectorizer and feature Selector
import pickle

pickle.dump(vectorizer, open("./Model/vectorizer.pkl", "wb"))

#### Training the model

In [18]:
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from sklearn.svm import SVC

LR = LogisticRegression()
SGDC = SGDClassifier()
RFC = RandomForestClassifier(n_estimators=300, random_state=0)

# Logistic Regression
LR.fit(X_train, y_train)
LR_Model = LR.predict(X_test)
print("\nLogistic Regression Algorithm\n")
print("Confusion Matrix: \n",confusion_matrix(y_test,LR_Model))
print("Classification Report: \n",classification_report(y_test,LR_Model))
print("Accuracy Score: \n",accuracy_score(y_test, LR_Model))

# Stochastic Gradient Descent
SGDC.fit(X_train, y_train)
SGDC_Model = SGDC.predict(X_test)
print("\nStochastic Gradient Descent Algorithm\n")
print("Confusion Matrix: \n",confusion_matrix(y_test,SGDC_Model))
print("Classification Report: \n",classification_report(y_test,SGDC_Model))
print("Accuracy Score: \n",accuracy_score(y_test, SGDC_Model))

# Random Forest Classifier 
RFC.fit(X_train, y_train)
RFC_Model = RFC.predict(X_test)
print("\nRandom Forest Classifier  Algorithm\n")
print("Confusion Matrix: \n",confusion_matrix(y_test,RFC_Model))
print("Classification Report: \n",classification_report(y_test,RFC_Model))
print("Accuracy Score: \n",accuracy_score(y_test, RFC_Model))



Logistic Regression Algorithm

Confusion Matrix: 
 [[16 10]
 [ 8 12]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.67      0.62      0.64        26
           1       0.55      0.60      0.57        20

    accuracy                           0.61        46
   macro avg       0.61      0.61      0.61        46
weighted avg       0.61      0.61      0.61        46

Accuracy Score: 
 0.6086956521739131

Stochastic Gradient Descent Algorithm

Confusion Matrix: 
 [[14 12]
 [ 8 12]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.64      0.54      0.58        26
           1       0.50      0.60      0.55        20

    accuracy                           0.57        46
   macro avg       0.57      0.57      0.56        46
weighted avg       0.58      0.57      0.57        46

Accuracy Score: 
 0.5652173913043478

Random Forest Classifier  Algorithm

Confusion Matrix: 
 [[22  4]
 [ 7 

In [19]:
# Saving the model
pickle.dump(LR, open("./Model/logistic_regression.pkl", "wb"))

### Trying the model in custom data

In [20]:
sim_sentence = ['By using our Services, you’re agreeing to be bound by these Terms and our Rules.',
 'You further agree not to sell any Personal Information, where the term “sell” has the meaning given to it under applicable laws.For Personal Information you provide to us (e.g.',
 'as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.',
 'We may use this feedback for any purpose, in our sole discretion, without any obligation to you.',
 'We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.',
 'Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).',
 'For purposes of this section a “consumer” means a person using the Services for personal, family or household purposes.',
 'This paragraph doesn’t override those laws.AmendmentsWe may make changes to these Terms from time to time.',
 'Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.',
 'By using our Services, you’re agreeing to be bound by these Terms and our Rules.',
 'You further agree not to sell any Personal Information, where the term “sell” has the meaning given to it under applicable laws.For Personal Information you provide to us (e.g.',
 'as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.',
 'We may use this feedback for any purpose, in our sole discretion, without any obligation to you.',
 'We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.',
 'Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).',
 'For purposes of this section a “consumer” means a person using the Services for personal, family or household purposes.',
 'This paragraph doesn’t override those laws.AmendmentsWe may make changes to these Terms from time to time.',
 'Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.',
 'By using our Services, you’re agreeing to be bound by these Terms and our Rules.',
 'You further agree not to sell any Personal Information, where the term “sell” has the meaning given to it under applicable laws.For Personal Information you provide to us (e.g.',
 'as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.',
 'We may use this feedback for any purpose, in our sole discretion, without any obligation to you.',
 'We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.',
 'Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).',
 'For purposes of this section a “consumer” means a person using the Services for personal, family or household purposes.',
 'This paragraph doesn’t override those laws.AmendmentsWe may make changes to these Terms from time to time.',
 'Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.',
 'By using our Services, you’re agreeing to be bound by these Terms and our Rules.',
 'You further agree not to sell any Personal Information, where the term “sell” has the meaning given to it under applicable laws.For Personal Information you provide to us (e.g.',
 'as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.',
 'We may use this feedback for any purpose, in our sole discretion, without any obligation to you.',
 'We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.',
 'Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).',
 'For purposes of this section a “consumer” means a person using the Services for personal, family or household purposes.',
 'This paragraph doesn’t override those laws.AmendmentsWe may make changes to these Terms from time to time.',
 'Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.',
 'By using our Services, you’re agreeing to be bound by these Terms and our Rules.',
 'You further agree not to sell any Personal Information, where the term “sell” has the meaning given to it under applicable laws.For Personal Information you provide to us (e.g.',
 'as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.',
 'We may use this feedback for any purpose, in our sole discretion, without any obligation to you.',
 'We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.',
 'Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).',
 'For purposes of this section a “consumer” means a person using the Services for personal, family or household purposes.',
 'This paragraph doesn’t override those laws.AmendmentsWe may make changes to these Terms from time to time.',
 'Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.',
 'By using our Services, you’re agreeing to be bound by these Terms and our Rules.',
 'You further agree not to sell any Personal Information, where the term “sell” has the meaning given to it under applicable laws.For Personal Information you provide to us (e.g.',
 'as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.',
 'We may use this feedback for any purpose, in our sole discretion, without any obligation to you.',
 'We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.',
 'Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).',
 'For purposes of this section a “consumer” means a person using the Services for personal, family or household purposes.',
 'This paragraph doesn’t override those laws.AmendmentsWe may make changes to these Terms from time to time.',
 'Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.']

In [21]:
sim_sentence = list(set(sim_sentence))

In [22]:
sim_sentence_lower = list(map(remove_punctuation, sim_sentence))

In [23]:
sim_sentence_lemma = list(map(text_preprocessing, sim_sentence_lower))

In [24]:
sim_sentence_vector = vectorizer.transform(sim_sentence_lemma)

In [25]:
predicted = RFC.predict(sim_sentence_vector)

In [26]:
predicted

array([0, 1, 0, 1, 1, 1, 1, 0, 0], dtype=int64)

#### Good Privacy Text

In [28]:
for i,sentence in enumerate(sim_sentence):
    if predicted[i] == 1:
        print(sentence )
        

Medium may provide information about third-party products, services, activities or events, or we may allow third parties to make their content and information available on or through our Services (collectively, “Third-Party Content”).
We may use this feedback for any purpose, in our sole discretion, without any obligation to you.
We also retain the right to create limits on use and storage and may remove or limit content distribution on the Services.TerminationYou’re free to stop using our Services at any time.
as a Newsletter Editor), you represent and warrant that you have lawfully collected the Personal Information and that you or a third party has provided all required notices and collected all required consents before collecting the Personal Information.
Unless we say otherwise in our notice, the amended Terms will be effective immediately, and your continued use of our Services after we provide such notice will confirm your acceptance of the changes.
