In [121]:
# coding: utf-8

import pandas as pd
import os
import csv
import numpy as np
import re
import warnings
warnings.filterwarnings('ignore')

from sklearn.naive_bayes import MultinomialNB 
from sklearn.linear_model import LogisticRegression
from sklearn import svm # support vector machine
from sklearn import metrics # for accuracy/ precision
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier # Stochastic Gradient Descent
from sklearn.neighbors import KNeighborsClassifier # k-NN ensemble method
from sklearn.ensemble import RandomForestClassifier 

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
# nltk.download('stopwords')
from nltk.stem import PorterStemmer
import sklearn # machine learning
from sklearn.model_selection import train_test_split # splitting up data
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

path = '/Users/andiedonovan/myProjects/Youtube_Python_Project/AndiesBranch/'

os.chdir(path) # change directory
train_data = pd.read_csv('data/OKGOcomments.csv', delimiter=";", skiprows=2, encoding='latin-1', engine='python') # read in the data
df = pd.read_csv('data/data.csv', delimiter="@@@", skiprows=2, encoding='utf-8', engine='python') # read in the user's data
df.columns = ["comment", "label"]

train_data.columns = [
  'label',
  'comment','a','b'
]
train_data = train_data.drop(['a', 'b'], 1).dropna()
for row in range(len(train_data)):
    line = train_data.iloc[row,1]
    train_data.iloc[row,1] = re.sub("[^a-zA-Z]", " ", line)

df2 = df

sw = stopwords.words('english')
ps = PorterStemmer()
lemmatizer = nltk.stem.WordNetLemmatizer()
tfidf = TfidfVectorizer()

df["comment"]= df["comment"].astype(str) 

for row in range(len(df)):
        line = df.loc[row, "comment"]
        #line = data.iloc[row,0]
        df.loc[row,"comment"] = re.sub("[^a-zA-Z]", " ", line)

labels = train_data['label']

def nlpFunction(a):
    a['com_token']=a['comment'].str.lower().str.split()
    a['com_remv']=a['com_token'].apply(lambda x: [y for y in x if y not in sw])
    a["com_lemma"] = a['com_remv']         .apply(lambda x : [lemmatizer.lemmatize(y) for y in x]) # lemmatization
    a['com_stem']=a['com_lemma']         .apply(lambda x : [ps.stem(y) for y in x]) # stemming
    return df

df = nlpFunction(df)

train_data = nlpFunction(train_data)
train_data['label'] = labels

df["com_stem_str"] = df["com_stem"].apply(', '.join)
train_data["com_stem_str"] = train_data["com_stem"].apply(', '.join)

data = train_data.loc[0:len(train_data),["label", "comment"]]
data = train_data.dropna()

X_train, X_test, Y_train, Y_test = train_test_split(
                                    data["com_stem_str"], data["label"], 
                                    test_size=0.25, 
                                    random_state=42)


tfidf = TfidfVectorizer()
xtrain = tfidf.fit_transform(X_train) # transform and fit training data
xtest = tfidf.transform(X_test) # transform test data from fitted transformer

data_trans= tfidf.transform(data["com_stem_str"]) # transform entire dataset for cross validation

df_trans = tfidf.transform(df["com_stem_str"])

rs = 10
lr = LogisticRegression(solver='sag', max_iter=100, random_state=rs, multi_class="multinomial")
mnb = MultinomialNB()
svm = svm.SVC()
rf = RandomForestClassifier(n_estimators=10, random_state=rs)
knn = KNeighborsClassifier()
models = ['lr', 'mnb', 'svm', 'rf', 'knn']

labels = ['label_' + str(models[i]) for i in range(0,len(models))]
predictions = [str(models[i])+"_predict" for i in range(0,len(models))]
d = {}
initModels = [lr, mnb, svm, rf, knn]
for i in range(0,5):
    initModels[i].fit(xtrain, Y_train)
    d[predictions[i]] = initModels[i].predict(df_trans)


Ratios = pd.DataFrame(columns=['label_lr', 'label_mnb', 'label_svm', 'label_rf', 'label_knn'], 
    index=range(0,3))

for i in range(0, len(models)):
    Table[labels[i]] = d[predictions[i]]

Table['comment'] = df2['comment']


def RatioFinder(model): 
    pos = Table[Table[model]== 1.0]
    neg = Table[Table[model]== -1.0]
    neu = Table[Table[model]== 0.0]

    pos_len = len(pos); neg_len = len(neg); neu_len = len(neu)

    total = pos_len + neg_len + neu_len
    
    neg_ratio = round(neg_len / float(total), 2) * 100
    pos_ratio = round(pos_len / float(total), 2) * 100
    neu_ratio = round(neu_len / float(total), 2) * 100
    
    ratios = [pos_ratio, neu_ratio, neg_ratio]
    
    return ratios

for i in range(0,3):
        for j in range(0,5):
            Ratios.iloc[i,j] = RatioFinder(labels[j])[i]


In [122]:
Ratios

Unnamed: 0,label_lr,label_mnb,label_svm,label_rf,label_knn
0,48,49,100,47,32
1,39,42,0,34,30
2,13,9,0,19,38


In [67]:
for i in range(0,3):
        for j in range(0,5):
            Ratios.iloc[i,j] = RatioFinder(labels[j])[i]

In [46]:
Ratios

Unnamed: 0,label_lr,label_mnb,label_svm,label_rf,label_knn
Positive,48,49,100,47,32
Neutral,39,42,0,34,30
Negative,13,9,0,19,38


In [47]:
Model = list(Ratios)

In [48]:
#for i in range(1, len(Model)):
#    df[df[i] == Model]

In [68]:
import plotly.graph_objs as go

In [69]:
def update_graph(Model):
    if Model == "All Models":
        df_plot = df.copy()
    else:
        df_plot = df[df['Model'] == Model]

    trace1 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_lr'], name='LR')
    trace2 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_mnb'], name='MNB')
    trace3 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_svm'], name='SVM')
    trace4 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_rf'], name='RF')
    trace5 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_knn'], name='KNN')

    return {
        'data': [trace1, trace2, trace3, trace4, trace5],
        'layout':
        go.Layout(
            title='Sentiment Ratios as Predicted by {}'.format(Model))
    }

In [50]:
Ratios[[label_lr]]

NameError: name 'label_lr' is not defined

In [71]:
df = Ratios
model_options = list(df)
model_options
#Model = df[df[str(model_options[1])]] 

#trace1 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df[str(Model)], name=str(Model))

['label_lr', 'label_mnb', 'label_svm', 'label_rf', 'label_knn']

In [73]:
df[[str(model_options[1])]]

Unnamed: 0,label_mnb
0,49
1,42
2,9


In [54]:
#model_options[1]

j = 1
for i in range(0,3):
    print(Ratios.iloc[i,j])

    
Ratios.iloc[,1]
#Model = df[df[str(model_options[1])]] 

SyntaxError: invalid syntax (<ipython-input-54-ea5aad304608>, line 8)

In [37]:
df.index

Index(['Positive', 'Neutral', 'Negative'], dtype='object')

In [95]:
Model = model_options[1]

In [96]:
df_plot = df[str(Model)]

In [97]:
list(df_plot)

[49, 42, 9]

In [98]:
str(Model)

'label_mnb'

In [75]:
pv = pd.pivot_table(
    df_plot,
    index=['Name'],
    columns=["Status"],
    values=['Quantity'],
    aggfunc=sum,
    fill_value=0)

KeyError: 'Quantity'

In [83]:
def update_graph(Model):
    if Model == "All Models":
        df_plot = df.copy()
    else:
        df_plot = df[[str(Model[1])]]


    trace1 = go.Pie(labels=["Pos", "Neu", "Neg"], values=list(df_plot), name=str(Model))
    #trace2 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_mnb'], name='MNB')
    #trace3 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_svm'], name='SVM')
    #trace4 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_rf'], name='RF')
    #trace5 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_knn'], name='KNN')

    return {
        'data': trace1,
        'layout':
        go.Layout(
            title='Sentiment Ratios as Predicted by {}'.format(Model))
    }

In [86]:
update_graph(model_options[1])

KeyError: "['a'] not in index"

In [88]:
Model = model_options[1]

In [93]:
df_plot = df[[str(Model)]]

In [94]:
df_plot

['label_mnb']

In [None]:
def update_graph(Model):
    if Model == "All Models":
        df_plot = df.copy()
    else:
        df_plot = df[str(Model)]


    trace1 = go.Pie(labels=["Pos", "Neu", "Neg"], values=list(df_plot), name=str(Model))
    #trace2 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_mnb'], name='MNB')
    #trace3 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_svm'], name='SVM')
    #trace4 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_rf'], name='RF')
    #trace5 = go.Pie(labels=["Pos", "Neu", "Neg"], values=df['label_knn'], name='KNN')

    return {
        'data': trace1,
        'layout':
        go.Layout(
            title='Sentiment Ratios as Predicted by {}'.format(Model))
    }

In [112]:
model_options

['label_lr', 'label_mnb', 'label_svm', 'label_rf', 'label_knn']

In [113]:
Model = model_options[1]

In [105]:


Model = model_options[1]
df_plot = df[str(Model)]
values = list(df_plot)
sent_labs = ["Pos", "Neu", "Neg"]


import plotly.plotly as py
import plotly.graph_objs as go

trace = go.Pie(labels=sent_labs, values=values, name=str(Model))


In [111]:
py.iplot([trace], filename='basic_pie_chart')

In [117]:
def update_graph(Model):

    if Model == "All Models":
        values = [0,0,0]
    else: 
        #Model = model_options[str(Model)]
        df_plot = df[str(Model)]
        values = list(df_plot)
        sent_labs = ["Pos", "Neu", "Neg"]

    trace = go.Pie(labels=sent_labs, values=values, name=str(Model))
    return trace
    #py.iplot([trace], filename='basic_pie_chart')

#py.iplot([trace], filename='basic_pie_chart')

update_graph('label_mnb')

In [119]:
py.iplot(update_graph('label_mnb'), filename='basic_pie_chart')

PlotlyError: The `figure_or_data` positional argument must be either `dict`-like or `list`-like.

In [120]:
Model = 'label_mnb'
df_plot = df[str(Model)]
values = list(df_plot)
sent_labs = ["Pos", "Neu", "Neg"]
trace = go.Pie(labels=sent_labs, values=values, name=str(Model))
py.iplot([trace], filename='basic_pie_chart')