
# IMPORTS

In [None]:
# utilities
import re
import pickle
import numpy as np
import pandas as pd

# plotting
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff


# nltk
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# important libraries
from bokeh.plotting import figure
from bokeh.io import output_file, show, output_notebook
from collections import Counter
import spacy
from spacy.util import compounding
from spacy.util import minibatch
from spacy import displacy
import gc
import os

# sklearn
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
sns.set(rc={'figure.figsize': (20,10)})

# EDA

## Dataset

In [None]:
csv_collection = []
for dirname, _, filenames in os.walk('../input/ukraine-russian-crisis-twitter-dataset-1-2-m-rows'):
    for filename in filenames:
        fullpath= os.path.join(dirname, filename)
        csv_collection.append(fullpath)

In [None]:
csv_collection

In [None]:
df = pd.read_csv(csv_collection.pop(),compression = 'gzip', index_col=0)
for data in csv_collection:
    try:
        tmp = pd.read_csv(data, compression = 'gzip', index_col=0)
    except:
            tmp = pd.read_csv(data, index_col = 0)
            df = pd.concat([df, tmp], axis=0)

## Data Cleaning

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.isna().sum().sort_values(ascending=False)

In [None]:
df = df.dropna(subset=['location'])

## Tweets arranged according to Language of the tweets

In [None]:
df.language.value_counts()

## Plotting the tweets based on language

In [None]:
sns.barplot(x= df.language.value_counts()[:20].index,y=df.language.value_counts()[:20])

## Unique Tweets based on language

In [None]:
df.groupby('language').nunique()

## Dataset with only English as tweet language

In [None]:
df_en = df[df.language == 'en'].drop('language', axis=1)

## Sorting the Tweets based on Retweet count

In [None]:
sorted_tweets = df_en[['username', 'text','retweetcount','tweetid', ]].sort_values(by = 'retweetcount', ascending=False)

## Most Retweeted Tweet

In [None]:
most_retweeted = sorted_tweets.iloc[0]
print(most_retweeted.text)

In [None]:
sorted_tweets.tweetid.iloc[0]

## Top 10 Most Retweeted Tweets

In [None]:
sorted_tweets.head(10)

## WordCloud with STOPWORDS which is imported from wordcloud library

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(sorted_tweets['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

## WordCloud with stopwords which is imported from nltk library

In [None]:
unique_tweets = df_en.drop_duplicates(subset = ['text'])

## Plotting the number of tweets based on the location of the tweet

In [None]:
df_en.location.value_counts()[:20].plot.bar()

## Wordclouds using nltk stopwords based on unique tweets and ordered by number of retweets

In [None]:
stopwords_set = set(stopwords.words('english'))
wordcloud= WordCloud(background_color = 'white',
                    stopwords=stopwords_set,
                    max_words=300,
                    max_font_size=40,
                    scale = 2,
                    random_state=42
                    ).generate(str(unique_tweets.sort_values(by = 'retweetcount').iloc[:20]['text']))
print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

## Columns with time as a feature

In [None]:
time_cols = ['extractedts','tweetcreatedts','usercreatedts']

In [None]:
df_en[time_cols[2]] = pd.to_datetime(df_en[time_cols[2]])

In [None]:
df_en.head(10)

## Dataframe with sorted date of creation of user id

In [None]:
sort_by_userage= df_en.sort_values(by = time_cols[2], ascending=True)

In [None]:
columns = df_en.columns.to_list()

### Wordcloud using nltk based on unique tweets ordered by the date of creation of user id 

In [None]:
stopwords_set = set(stopwords.words('english'))
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                     max_words = 300,
                     max_font_size = 40,
                      scale = 2,
                     random_state = 42
                     ).generate(str(sort_by_userage.iloc[:1000,columns.index('text')]))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

## Bar Plot on User Account Age and classified based on the location of the account

### Top 1000 sorted values

In [None]:
sns.barplot(x = sort_by_userage.iloc[:1000,columns.index('location')].value_counts()[:20].index,
           y = sort_by_userage.iloc[:1000, columns.index('location')].value_counts()[:20])
plt.xticks(rotation = 90)
plt.show()

### Bottom 1000 sorted values

In [None]:
sns.barplot(x = sort_by_userage.iloc[df_en.shape[0]-1000:,
                                    columns.index('location')].value_counts()[:20].index,
           y= sort_by_userage.iloc[df_en.shape[0]-1000:,
                                  columns.index('location')].value_counts()[:20])
plt.xticks(rotation = 90)
plt.show()

## Hastag Analysis

In [None]:
num_chars = df_en.text.apply(len)
num_words = df_en.text.apply(lambda x: len(x.split()))
df_en['num_chars'] = num_chars
df_en['num_words'] = num_words

In [None]:
print(f"average num characters: {num_chars.mean()}")
print(f"average num words : {num_words.mean()}")
print(f"median num characters: {num_chars.median()}")
print(f"median num words: {num_words.median()}")

In [None]:
df_en.groupby('num_chars')['retweetcount','favorite_count'].describe()

In [None]:
df['tweetcreatedts'] = pd.to_datetime(df['tweetcreatedts'], errors='coerce')
df['extractedts'] = pd.to_datetime(df['extractedts'], errors='coerce') 
gc.collect()

In [None]:
!pip install Unidecode

In [None]:
from unidecode import unidecode
def evaluate_hashtags(x):
    hashtags = []
    
    a = eval(str(x))
    
    for item in a:
        hashtags.append((str(unidecode(item["text"])).lower()))
        hashtags = list(set(hashtags))
        
    return hashtags

In [None]:
masterlist = []
hashtagsListCollection = df["hashtags"].apply(evaluate_hashtags)

for hashtagsList in hashtagsListCollection:
    for hashtag in hashtagsList:
        masterlist.append(hashtag)

In [None]:
topXItem = 25
from collections import Counter

x = Counter(masterlist)

topXItemList = x.most_common(topXItem)

In [None]:
df_hash = pd.DataFrame(topXItemList)
df_hash.columns =['Hashtag','Tweets']
df_hash

In [None]:
dfpivot = df_hash.T
new_header = dfpivot.iloc[0] #grab the first row for the header
dfpivot = dfpivot[1:] #take the data less the header row
dfpivot.columns = new_header #set the header row as the df header
dfpivot

In [None]:
plt.rcParams["figure.figsize"] = [20, 6]
df_hash.plot.bar(x='Hashtag', y='Tweets', rot=90)

# Sentiment Analysis Dataset Builder


In [None]:
sentiment_df = df[['tweetid', 'text', 'hashtags', 'language']] # Filtering Columns
sentiment_df = sentiment_df.loc[sentiment_df['language'] == 'en'].reset_index(drop=True)  # Filtering language

In [None]:
sentiment_df['hashtags'] = sentiment_df.hashtags.map(lambda x: [i['text'] for i in eval(x)])  # Keeping only hashtags

In [None]:
def preprocess(text):
    new_text = []
 
 
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

sentiment_df['text'] = sentiment_df['text'].apply(preprocess)

# Sentiment and Emotion Analysis 

## Sentiment Analysis

### Reading the Sentiment CSV data from RoBERTa

In [None]:
tweet_df = pd.read_csv("../input/rawdata/Tweets_Sentiment_Analysis_RoBERTa_Raw_Values.csv", lineterminator='\n')

### Making a new CSV with sentiment rather than score of the sentiments

In [None]:
tweet_df.insert(7, "Sentiment", '')

In [None]:
for i in range(len(tweet_df)):
  if tweet_df['negative'][i] > tweet_df['positive'][i] and tweet_df['negative'][i] > tweet_df['neutral'][i]:
    tweet_df['Sentiment'][i] = 'negative'
  elif tweet_df['positive'][i] > tweet_df['negative'][i] and tweet_df['positive'][i] > tweet_df['neutral'][i]:
    tweet_df['Sentiment'][i]= 'positive'
  else:
    tweet_df['Sentiment'][i] = 'neutral'

In [None]:
tweet_df.drop(['negative','positive','neutral'], axis=1, inplace=True)

In [None]:
tweet_df.drop('language', axis=1, inplace=True)

In [None]:
tweet_df.to_csv("./Tweets_Sentiment_Analysis_RoBERTa.csv", index=False)

### Plot for Sentiments

In [None]:
plt.figure(figsize = (8,7))
sns.countplot(x="Sentiment", data=tweet_df, palette='magma')

### WordClouds for Negative, Neutral and Positive Sentiments

In [None]:
tweet_neg = tweet_df.loc[tweet_df['Sentiment'] == 'negative'].reset_index(drop=True)
tweet_net = tweet_df.loc[tweet_df['Sentiment'] == 'neutral'].reset_index(drop=True)
tweet_pos = tweet_df.loc[tweet_df['Sentiment'] == 'positive'].reset_index(drop=True)

#### Negative Sentiment

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(tweet_neg['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

#### Neutral Sentiment

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(tweet_net['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

#### Positive Sentiment

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(tweet_pos['text']))

print(wordcloud)
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
plt.show()

### Pre-Processing the Sentiment Dataframe

In [None]:
dataset = tweet_df[['Sentiment','text']]
text, sentiment = list(dataset['text']), list(dataset['Sentiment'])

In [None]:
# Defining dictionary containing all emojis with their meanings.
emojis = {':)': 'smile', ':-)': 'smile', ';d': 'wink', ':-E': 'vampire', ':(': 'sad', 
          ':-(': 'sad', ':-<': 'sad', ':P': 'raspberry', ':O': 'surprised',
          ':-@': 'shocked', ':@': 'shocked',':-$': 'confused', ':\\': 'annoyed', 
          ':#': 'mute', ':X': 'mute', ':^)': 'smile', ':-&': 'confused', '$_$': 'greedy',
          '@@': 'eyeroll', ':-!': 'confused', ':-D': 'smile', ':-0': 'yell', 'O.o': 'confused',
          '<(-_-)>': 'robot', 'd[-_-]b': 'dj', ":'-)": 'sadsmile', ';)': 'wink', 
          ';-)': 'wink', 'O:-)': 'angel','O*-)': 'angel','(:-D': 'gossip', '=^.^=': 'cat'}

## Defining set containing all stopwords in english.
stopwordlist = ['a', 'about', 'above', 'after', 'again', 'ain', 'all', 'am', 'an',
             'and','any','are', 'as', 'at', 'be', 'because', 'been', 'before',
             'being', 'below', 'between','both', 'by', 'can', 'd', 'did', 'do',
             'does', 'doing', 'down', 'during', 'each','few', 'for', 'from', 
             'further', 'had', 'has', 'have', 'having', 'he', 'her', 'here',
             'hers', 'herself', 'him', 'himself', 'his', 'how', 'i', 'if', 'in',
             'into','is', 'it', 'its', 'itself', 'just', 'll', 'm', 'ma',
             'me', 'more', 'most','my', 'myself', 'now', 'o', 'of', 'on', 'once',
             'only', 'or', 'other', 'our', 'ours','ourselves', 'out', 'own', 're',
             's', 'same', 'she', "shes", 'should', "shouldve",'so', 'some', 'such',
             't', 'than', 'that', "thatll", 'the', 'their', 'theirs', 'them',
             'themselves', 'then', 'there', 'these', 'they', 'this', 'those', 
             'through', 'to', 'too','under', 'until', 'up', 've', 'very', 'was',
             'we', 'were', 'what', 'when', 'where','which','while', 'who', 'whom',
             'why', 'will', 'with', 'won', 'y', 'you', "youd","youll", "youre",
             "youve", 'your', 'yours', 'yourself', 'yourselves']

In [None]:
def preprocess(textdata):
    processedText = []
    
    # Create Lemmatizer and Stemmer.
    wordLemm = WordNetLemmatizer()
    
    # Defining regex patterns.
    urlPattern        = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)"
    userPattern       = '@[^\s]+'
    alphaPattern      = "[^a-zA-Z0-9]"
    sequencePattern   = r"(.)\1\1+"
    seqReplacePattern = r"\1\1"
    
    for tweet in textdata:
        tweet = tweet.lower()
        
        # Replace all URls with 'URL'
        tweet = re.sub(urlPattern,' URL',tweet)
        # Replace all emojis.
        for emoji in emojis.keys():
            tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji])        
        # Replace @USERNAME to 'USER'.
        tweet = re.sub(userPattern,' USER', tweet)        
        # Replace all non alphabets.
        tweet = re.sub(alphaPattern, " ", tweet)
        # Replace 3 or more consecutive letters by 2 letter.
        tweet = re.sub(sequencePattern, seqReplacePattern, tweet)

        tweetwords = ''
        for word in tweet.split():
            # Checking if the word is a stopword.
            #if word not in stopwordlist:
            if len(word)>1:
                # Lemmatizing the word.
                word = wordLemm.lemmatize(word)
                tweetwords += (word+' ')
            
        processedText.append(tweetwords)
        
    return processedText

In [None]:
import time
import nltk
nltk.download('wordnet')
t = time.time()
processedtext = preprocess(text)
print(f'Text Preprocessing complete.')
print(f'Time Taken: {round(time.time()-t)} seconds')

### Spltting the Dataframe

In [None]:
X_train, X_test, y_train, y_test = train_test_split(processedtext, sentiment,
                                                    test_size = 0.05, random_state = 0)
print(f'Data Split done.')

In [None]:
vectoriser = TfidfVectorizer(ngram_range=(1,2), max_features=500000)
vectoriser.fit(X_train)
print(f'Vectoriser fitted.')
print('No. of feature_words: ', len(vectoriser.get_feature_names()))

In [None]:
X_train = vectoriser.transform(X_train)
X_test  = vectoriser.transform(X_test)
print(f'Data Transformed.')

### Evaluation

#### Bernoulli

In [None]:
BNBmodel = BernoulliNB(alpha = 2)
BNBmodel.fit(X_train, y_train)
ys_predict = BNBmodel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### K Nearest Neighbors

In [None]:
KNNModel = KNeighborsClassifier(n_neighbors=5)
KNNModel.fit(X_train, y_train)
ys_predict = KNNModel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Decision Tree Classifier

In [None]:
DTCModel = DecisionTreeClassifier(random_state=0)
DTCModel.fit(X_train, y_train)
ys_predict = DTCModel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Random Forest Classifier

In [None]:
RFCModel = RandomForestClassifier(n_estimators=100)
RFCModel.fit(X_train, y_train)
ys_predict = RFCModel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Logisitic Regression

In [None]:
LRmodel = LogisticRegression(C = 2, max_iter = 1000, n_jobs=-1)
LRmodel.fit(X_train, y_train)
ys_predict = LRmodel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Support Vector Classification

In [None]:
SVCmodel = LinearSVC()
SVCmodel.fit(X_train, y_train)
ys_predict = SVCmodel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

## Emotion Analysis

### Reading the Emotion CSV data from RoBERTa

In [None]:
emotion_df = pd.read_csv("../input/emotion/Tweets_Emotion_Analysis_RoBERTa_Emotion_Raw_Values.csv", lineterminator='\n')

### Making a new CSV with emotion rather than score of the sentiments

In [None]:
emotion_df.drop(['negative','positive','neutral','polarity','subjectivity','language'], axis=1, inplace=True)

In [None]:
emotion_df.insert(7, "Emotion", '')

In [None]:
for i in range(len(emotion_df)):
  if emotion_df['anger'][i] > emotion_df['joy'][i] and emotion_df['anger'][i] > emotion_df['optimism'][i] and emotion_df['anger'][i] > emotion_df['sadness'][i]:
    emotion_df['Emotion'][i] = 'anger'
  elif emotion_df['joy'][i] > emotion_df['anger'][i] and emotion_df['joy'][i] > emotion_df['optimism'][i] and emotion_df['joy'][i] > emotion_df['sadness'][i]:
    emotion_df['Emotion'][i]= 'joy'
  elif emotion_df['optimism'][i] > emotion_df['anger'][i] and emotion_df['optimism'][i] > emotion_df['joy'][i] and emotion_df['optimism'][i] > emotion_df['sadness'][i]:
    emotion_df['Emotion'][i]= 'optimism'
  else:
    emotion_df['Emotion'][i] = 'sadness'

In [None]:
emotion_df.drop(['anger','joy','optimism','sadness'], axis=1, inplace=True)

In [None]:
emotion_df.to_csv("./Tweets_Emotion_Analysis_RoBERTa.csv", index=False)

### Plot for Emotions

In [None]:
plt.figure(figsize = (8,7))
sns.countplot(x="Emotion", data=emotion_df, palette='magma')

### WordClouds for Anger, Joy, Optimism and Sadness Emotions

In [None]:
emotion_anger = emotion_df.loc[emotion_df['Emotion'] == 'anger'].reset_index(drop=True)
emotion_joy = emotion_df.loc[emotion_df['Emotion'] == 'joy'].reset_index(drop=True)
emotion_opt = emotion_df.loc[emotion_df['Emotion'] == 'optimism'].reset_index(drop=True)
emotion_sad = emotion_df.loc[emotion_df['Emotion'] == 'sadness'].reset_index(drop=True)

#### Emotion - Anger

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(emotion_anger['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

#### Emotion - Joy

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(emotion_joy['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

#### Emotion - Optimism

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(emotion_opt['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

#### Emotion - Sadness

In [None]:
stopwords_set = set(STOPWORDS)
wordcloud = WordCloud(background_color='white',
                     stopwords = stopwords_set,
                      max_words = 300,
                      max_font_size = 40,
                      scale = 2,
                      random_state=42
                     ).generate(str(emotion_sad['text']))

print(wordcloud)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

### Pre-Processing the Emotion Dataframe

In [None]:
dataset = emotion_df[['Emotion','text']]
text, emotion = list(dataset['text']), list(dataset['Emotion'])

### Splitting the Dataframe

In [None]:
X_train, X_test, y_train, y_test = train_test_split(processedtext, emotion,
                                                    test_size = 0.05, random_state = 0)
print(f'Data Split done.')

In [None]:
vectoriser = TfidfVectorizer(ngram_range=(1,2), max_features=500000)
vectoriser.fit(X_train)
print(f'Vectoriser fitted.')
print('No. of feature_words: ', len(vectoriser.get_feature_names()))

In [None]:
X_train = vectoriser.transform(X_train)
X_test  = vectoriser.transform(X_test)
print(f'Data Transformed.')

### Evaluation

#### Bernoulli

In [None]:
BNBmodel = BernoulliNB(alpha = 2)
BNBmodel.fit(X_train, y_train)
ys_predict = BNBmodel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### K Nearest Neighbor

In [None]:
KNNModel = KNeighborsClassifier(n_neighbors=5)
KNNModel.fit(X_train, y_train)
ys_predict = KNNModel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Decision Tree Classifier

In [None]:
DTCModel = DecisionTreeClassifier(random_state=0)
DTCModel.fit(X_train, y_train)
ys_predict = DTCModel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Random Forest Classifier

In [None]:
RFCModel = RandomForestClassifier(n_estimators=100)
RFCModel.fit(X_train, y_train)
ys_predict = RFCModel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Logistic Regression

In [None]:
LRmodel = LogisticRegression(C = 2, max_iter = 1000, n_jobs=-1)
LRmodel.fit(X_train, y_train)
ys_predict = LRmodel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))

#### Support Vector Machine

In [None]:
SVCmodel = LinearSVC()
SVCmodel.fit(X_train, y_train)
ys_predict = SVCmodel.predict(X_test)
#Display the outcome of classification
print('Classification Report: \n',metrics.classification_report(y_test, ys_predict))
print('Confusion Matrix: \n',metrics.confusion_matrix(y_test, ys_predict))
print('Accuracy Score: \n',metrics.accuracy_score(y_test, ys_predict))