In [15]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
from statistics import mean
import nltk
from pprint import pprint
from pymongo import MongoClient


In [16]:
# Reads in script from csv file and renames columns to lowercase
path = 'Game_of_Thrones_Script.csv'

script_df = pd.read_csv(path)

script_df = script_df.rename(columns = {'Release Date': 'release_date',
                                        'Season': 'season',
                                        'Episode':'episode',
                                        'Episode Title':'episode_title',
                                        'Name':'name', 
                                        'Sentence': 'sentence'
                                       })

In [17]:
# sentiment analysis for script df using vaderSentiment library


analyzer = SentimentIntensityAnalyzer()

sent_analysis = [analyzer.polarity_scores(sentence) for sentence in script_df.sentence]

In [18]:
# adds column to df for each polarity score (pos,compound,neu,neg)
script_df['positive_score'] = [key.get('pos') for key in sent_analysis]
script_df['compound_score'] = [key.get('compound') for key in sent_analysis]
script_df['neutral_score'] = [key.get('neu') for key in sent_analysis]
script_df['negative_score'] = [key.get('neg') for key in sent_analysis]

# using nltk to separate sentence into words and punctuation as a list and add as a column into df
script_df['tokenized_words'] = [nltk.word_tokenize(sentence)for sentence in script_df.sentence]


# tokenized_words column with punctuation removed 
script_df['alphanumeric_words'] = script_df.tokenized_words.apply(lambda x: [item for item in x if item.isalnum()])


#adds column for word count within stripped down sentences
script_df['word_count'] = [len(words) for words in script_df.alphanumeric_words ]
script_df

Unnamed: 0,release_date,season,episode,episode_title,name,sentence,positive_score,compound_score,neutral_score,negative_score
0,2011-04-17,Season 1,Episode 1,Winter is Coming,waymar royce,What do you expect? They're savages. One lot s...,0.000,-0.7717,0.774,0.226
1,2011-04-17,Season 1,Episode 1,Winter is Coming,will,I've never seen wildlings do a thing like this...,0.208,0.6124,0.792,0.000
2,2011-04-17,Season 1,Episode 1,Winter is Coming,waymar royce,How close did you get?,0.000,0.0000,1.000,0.000
3,2011-04-17,Season 1,Episode 1,Winter is Coming,will,Close as any man would.,0.000,0.0000,1.000,0.000
4,2011-04-17,Season 1,Episode 1,Winter is Coming,gared,We should head back to the wall.,0.000,0.0000,1.000,0.000
...,...,...,...,...,...,...,...,...,...,...
23906,2019-05-19,Season 8,Episode 6,The Iron Throne,brienne,I think we can all agree that ships take prece...,0.185,0.3612,0.815,0.000
23907,2019-05-19,Season 8,Episode 6,The Iron Throne,bronn,I think that's a very presumptuous statement.,0.000,0.0000,1.000,0.000
23908,2019-05-19,Season 8,Episode 6,The Iron Throne,tyrion lannister,I once brought a jackass and a honeycomb into ...,0.000,-0.4215,0.781,0.219
23909,2019-05-19,Season 8,Episode 6,The Iron Throne,man,The Queen in the North!,0.000,0.0000,1.000,0.000


In [24]:
# converts df into dictionary and adds to mongodb

script_dict = script_df.to_dict('records')
script_dict

[{'release_date': '2011-04-17',
  'season': 'Season 1',
  'episode': 'Episode 1',
  'episode_title': 'Winter is Coming',
  'name': 'waymar royce',
  'sentence': "What do you expect? They're savages. One lot steals a goat from another lot and before you know it, they're ripping each other to pieces.",
  'positive_score': 0.0,
  'compound_score': -0.7717,
  'neutral_score': 0.774,
  'negative_score': 0.226,
  'tokenized_words': ['What',
   'do',
   'you',
   'expect',
   '?',
   'They',
   "'re",
   'savages',
   '.',
   'One',
   'lot',
   'steals',
   'a',
   'goat',
   'from',
   'another',
   'lot',
   'and',
   'before',
   'you',
   'know',
   'it',
   ',',
   'they',
   "'re",
   'ripping',
   'each',
   'other',
   'to',
   'pieces',
   '.'],
  'alphanumeric_words': ['What',
   'do',
   'you',
   'expect',
   'They',
   'savages',
   'One',
   'lot',
   'steals',
   'a',
   'goat',
   'from',
   'another',
   'lot',
   'and',
   'before',
   'you',
   'know',
   'it',
   'they',


In [25]:
# connections to mongo db
client = MongoClient('mongodb://localhost:27017/')

# setting variable for db
db = client.sent_analysisdb

# setting variable for collection in db
col = db.got_scripts

In [26]:
# inserts dictionary into db
col.insert_many(script_dict)


# verifys succesful import
col.find_one()

<pymongo.results.InsertManyResult at 0x7fddbe060a80>