#### Import Dependencies

In [27]:
import pickle
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report

#### Load Data

In [3]:
df = pd.read_csv('twitter30k_cleaned.csv')

#### Convert Text Data to Numeric Data

In [12]:
vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1,2))

tweets = df['twitts']
X = vectorizer.fit_transform(tweets)

y = df['sentiment']

#### Split Dataset

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#### Create Model

In [17]:
model = LinearSVC()
model.fit(X_train, y_train)

LinearSVC()

#### Evaluate Model

In [19]:
y_pred = model.predict(X_test)

In [20]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.76      0.73      0.74      3054
           1       0.73      0.76      0.74      2946

    accuracy                           0.74      6000
   macro avg       0.74      0.74      0.74      6000
weighted avg       0.74      0.74      0.74      6000



#### Inference

In [26]:
test_input_1 = 'you won a prize, congrats'
test_input_2 = 'I hate pineapple pizza. It is so gross'
test_input_3 = 'I got into Harvard. I am pumped'

input_vector = vectorizer.transform([test_input_1, test_input_2, test_input_3])

output = model.predict(input_vector)

mapping = {0:'negative', 1:'positive'}
for key in output:
    print(mapping[key])

positive
negative
positive


#### Save Model Weights

In [28]:
pickle.dump(model, open('sentiment_model','wb'))