In [None]:
# import modules
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# read the dataset
df = pd.read_csv('primary_debates_cleaned.csv')

# get the locations
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.05, random_state=0)

In [None]:
trainData, testData = train_test_split(df, test_size=0.2, random_state=25)

print(f"No. of training examples: {trainData.shape[0]}")
print(f"No. of testing examples: {testData.shape[0]}")

No. of training examples: 10605
No. of testing examples: 2652


In [None]:
trainData.head()

Unnamed: 0,Line,Speaker,Text,Date,Party,Location,URL,Label
2215,313,Huckabee,And I'm still one who says that we can get rid...,08-06-2015,Republican,"Cleveland, Ohio",http://www.presidency.ucsb.edu/ws/index.php?pi...,pos
10366,668,Rubio,He is now dividing Europe up...,03-03-2016,Republican,"Detroit, Michigan",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg
1145,60,AUDIENCE,(BOOING) (LAUGHTER),2/13/16,Republican,"Greenville, South Carolina",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg
8317,76,Muir,"Governor, thank you. Senator Rubio?",02-06-2016,Republican,"Manchester, New Hampshire",http://www.presidency.ucsb.edu/ws/index.php?pi...,pos
2984,465,Trump,Or somebody else. Right.,10/28/15,Republican,"Boulder, Colorado",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg


In [None]:
testData.head()

Unnamed: 0,Line,Speaker,Text,Date,Party,Location,URL,Label
9573,191,Sanders,With all -- with all due respect...,11/14/15,Democratic,"Des Moines, Iowa",http://www.presidency.ucsb.edu/ws/index.php?pi...,pos
8340,99,Rubio,"Chris, everybody -- you said you weren't going...",02-06-2016,Republican,"Manchester, New Hampshire",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg
10805,369,AUDIENCE,(APPLAUSE),2/25/16,Republican,"Houston, Texas",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg
8294,53,AUDIENCE,(APPLAUSE),02-06-2016,Republican,"Manchester, New Hampshire",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg
4301,274,Sanders,"Against the Republican leadership, who all tho...",12/19/15,Democratic,"Manchester, New Hampshire",http://www.presidency.ucsb.edu/ws/index.php?pi...,neg


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Create feature vectors
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(trainData['Text'])
test_vectors = vectorizer.transform(testData['Text'])

In [None]:
import time
from sklearn import svm
from sklearn.metrics import classification_report
# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, trainData['Label'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1
# results
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
report = classification_report(testData['Label'], prediction_linear, output_dict=True)
print('positive: ', report['pos'])
print('negative: ', report['neg'])

Training time: 27.802100s; Prediction time: 4.392682s
positive:  {'precision': 0.29814814814814816, 'recall': 0.2072072072072072, 'f1-score': 0.24449506454062264, 'support': 777}
negative:  {'precision': 0.3230042016806723, 'recall': 0.7028571428571428, 'f1-score': 0.44260525368837716, 'support': 875}


In [None]:
review = """I do not like this country"""
review_vector = vectorizer.transform([review]) # vectorizing
print(classifier_linear.predict(review_vector))

['neg']


In [None]:
review = """I hate america"""
review_vector = vectorizer.transform([review]) # vectorizing
print(classifier_linear.predict(review_vector))

['neg']


In [None]:
review = """I love this country and I hope for it to be good """
review_vector = vectorizer.transform([review]) # vectorizing
print(classifier_linear.predict(review_vector))

['pos']
