##### Problem Statement
 
 Many companies are built around lessening one’s environmental impact or carbon footprint. They offer products and services that are environmentally friendly and sustainable, in line with their values and ideals. They would like to determine how people perceive climate change and whether or not they believe it is a real threat. This would add to their market research efforts in gauging how their product/service may be received.

With this context, EDSA is challenging you during the Classification Sprint with the task of creating a Machine Learning model that is able to classify whether or not a person believes in climate change, based on their novel tweet data.

Providing an accurate and robust solution to this task gives companies access to a broad base of consumer sentiment, spanning multiple demographic and geographic categories - thus increasing their insights and informing future marketing strategies..

Import the necessary packages

In [26]:
import pandas as pd
import numpy as np
import nltk
import string
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

from sklearn.metrics import f1_score

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


In [23]:
# Load dataset

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')



y = train['sentiment']
X = train['message']

vectorizer = TfidfVectorizer(ngram_range=(1,1), min_df=2, stop_words="english")
X_vectorized = vectorizer.fit_transform(X)

X_train,X_val,y_train,y_val = train_test_split(X_vectorized,y,test_size=.2,shuffle=True, stratify=y, random_state=15)


from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf')
svclassifier.fit(X_train, y_train)




svc = SVC(C=2,kernel='rbf')
svc.fit(X_train, y_train)
svc_pred = svc.predict(X_val)

print (f1_score(y_val, svc_pred, average="macro"))




0.656107313818997


In [28]:
# Load dataset

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')



y = train['sentiment']
X = train['message']

vectorizer = TfidfVectorizer(ngram_range=(1,1), min_df=2, stop_words="english")
X_vectorized = vectorizer.fit_transform(X)

X_train,X_val,y_train,y_val = train_test_split(X_vectorized,y,test_size=.2,shuffle=True, stratify=y, random_state=15)


from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)




svc = SVC(C=2,kernel='linear')
svc.fit(X_train, y_train)
svc_pred = svc.predict(X_val)

print (f1_score(y_val, svc_pred, average="macro"))




0.6767078828960794


In [81]:
parameters = {'kernel':('linear', 'rbf'), 
              'C':(0.25,1.0),
              'gamma': (1,2)}

In [82]:
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train,y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': (0.25, 1.0), 'gamma': (1, 2),
                         'kernel': ('linear', 'rbf')},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [83]:
clf_pred = clf.predict(X_val)

In [84]:

print (f1_score(y_val, clf_pred, average="macro"))

0.6405232582557283


In [None]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

In [None]:
f1_score(y_val, svc_pred, average="macro")