# Avatar the Last Air Bender 
## *Sentiment Analysis of Characters, plot over time*

## Part Three: Process Sentiment Data to Create Data for Charts

We'll need the json module and a few helper functions for opening and saving json files

In [32]:
import json

# opens json file as python dictionary
def open_json_as_dict(path):
  with open(path) as f:
    data = json.load(f)
  return data

# dump a python dictionary object to a json file
def save_json(pyDict, path):
  with open(path, 'w') as f:
    json.dump(pyDict, f)

Make a function to get episode plots from the sentiment data

In [90]:
import pprint

def get_plots_from_sentiment_data(nrcEp, vaderEp):
    plots = []
    title = nrcEp['title']
    numEps = len(nrcEp['lines'])
    lines = [ ln for ln in range(0, numEps) ]
    vader = [ vaderEp['vaderLines'][i][1]['compound'] for i in range(0, numEps) ] 
    nrc = [ nrcEp['lines'][i][1] for i in range(0, numEps) ]
    # three charts each
    # 1. [lines] x [vader compound scores]
    # 2. [lines] x [emotions]
    # 3. [emotions] x [num appearances]
    emotionMap = {
      "anger": 0,
      "anticipation": 0,
      "disgust": 0,
      "fear": 0,
      "joy": 0,
      "negative": 0,
      "positive": 0,
      "sadness": 0,
      "surprise": 0,
      "trust": 0
      }
    for emotion in nrc:
        if emotion != 'none':
            emotionMap[emotion] += 1
    
    emotions = list(emotionMap.keys())
    emotionTotals = list(emotionMap.values())
    
    plots.append({"name": title + ' Vader Compound Scores By Line', "x": lines, "y": vader})
    plots.append({"name": title + ' NRC Emotions By Line', "x": lines, "y": nrc})
    plots.append({"name": title + ' NRC Emotion Totals', "x": emotions, "y": emotionTotals})
    
    pprint.pprint(plots)
    # get top five characters and same charts as above
    # lines refers only to lines w/ character speaking
    # 18 charts x (64 episodes + 3 books + 1 series) = 1224 charts 
    return plots

In [67]:
# one plot of form {"name", "x", "y"}

Now, we need a function to get the sentiment data for a book/season summary.

In [79]:
def get_end_of_book_page(index, title, nrc, vader):
    nrcL = []
    vaderL = []
    
    # if book 1
    if index == 22:
        for i in range(0,22):
            nrcL.extend(nrc[i]['lines'])
            vaderL.extend(vader[i]['vaderLines'])
    # book 2
    elif index == 42:
        for i in range(22,42):
            nrcL.extend(nrc[i]['lines'])
            vaderL.extend(vader[i]['vaderLines'])
    # book 3
    else:
        for i in range(42,len(nrc)):
            nrcL.extend(nrc[i]['lines'])
            vaderL.extend(vader[i]['vaderLines'])
    
    # combine all the lines for the book
    plots = get_plots_from_sentiment_data({ "title": title, "lines": nrcL }, { "title": title, "vaderLines": vaderL })
    
    return {"title": 'end-book', "bookTitle": title, "plots": plots}

And a function to get the sentiment data for the series in totality. 

In [84]:
def get_end_of_series_page(nrc, vader):
    nrcL = []
    vaderL = []
    title = 'end-series'
    
    for i in range(0, len(nrc)):
        nrcL.extend(nrc[i]['lines'])
        vaderL.extend(vader[i]['vaderLines'])
        
    plots = get_plots_from_sentiment_data({ "title": title, "lines": nrcL }, { "title": title, "vaderLines": vaderL })
    
    return {"title": title, "bookTitle": '', "plots": plots}

Define the final function needed. The one that calls them all. Loops through all the episodes and 
generates chart data for each.

In [89]:
def generate_chart_data(nrcData, vaderData):
    bookTitles = ['Book One: Water', 'Book Two: Earth', 'Book Three: Fire']
    bookStarts = ['The Avatar State', 'Escape from the Spirit World']
    book = 0
    pages = []
    totalEps = len(nrcData)
    for i in range(0, totalEps):
        title = nrcData[i]['title']
        # if we are starting a new Book
        if title in bookStarts and title != bookStarts[book - 1]:
            print(i)
            print(title)
            book += 1
            pages.append(get_end_of_book_page(i, bookTitles[book], nrcData, vaderData))
        bookTitle = bookTitles[book]
        plots = get_plots_from_sentiment_data(nrcData[i], vaderData[i])
        page = { "title": title, "bookTitle": bookTitle, "plots": plots }
        pages.append(page)
    
    # if this is the last episode
    if i == (totalEps - 1):
        pages.append(get_end_of_book_page(i, bookTitle, nrcData[i], vaderData[i]))
        pages.append(get_end_of_series_page(nrcData, vaderData))
    
    return pages

Okay now time to do the work.

Generate the pages and save chart data as a json file.

In [91]:
# get NRC and Vader data by episode
nrcEmotionData = open_json_as_dict('data/avatar-episodes-nrc-emotion.json')['episodes']
vaderScoreData = open_json_as_dict('data/avatar-episodes-vader-scores.json')['episodes']

# generate page data
pages = { "pages": generate_chart_data([ nrcEmotionData[0] ], [ vaderScoreData[0] ]) }

# save the data as json file
save_json(pages, 'data/avatar-chart-data.json')

[{'name': 'The Boy in the Iceberg Vader Compound Scores By Line',
  'x': [0,
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        11,
        12,
        13,
        14,
        15,
        16,
        17,
        18,
        19,
        20,
        21,
        22,
        23,
        24,
        25,
        26,
        27,
        28,
        29,
        30,
        31,
        32,
        33,
        34,
        35,
        36,
        37,
        38,
        39,
        40,
        41,
        42,
        43,
        44,
        45,
        46,
        47,
        48,
        49,
        50,
        51,
        52,
        53,
        54,
        55,
        56,
        57,
        58,
        59,
        60,
        61,
        62,
        63,
        64,
        65,
        66,
        67,
        68,
        69,
        70,
        71,
        72,
        73,
        74,
        75,
        76,
        77,
        

        150,
        151,
        152,
        153],
  'y': [-0.6874,
        0.6124,
        0.0,
        0.3164,
        0.0,
        0.0,
        0.34,
        0.0,
        0.4215,
        -0.4926,
        0.0,
        -0.4019,
        -0.0026,
        -0.4522,
        -0.8011,
        -0.2942,
        -0.4753,
        0.0,
        -0.734,
        0.0,
        0.0,
        0.2263,
        0.0,
        0.7269,
        0.7081,
        0.0,
        0.0,
        0.4767,
        0.0,
        0.5686,
        0.8735,
        -0.4826,
        -0.2481,
        -0.3595,
        -0.4404,
        0.0,
        0.3182,
        0.0,
        0.8625,
        0.3182,
        0.0,
        0.351,
        -0.2411,
        0.0,
        0.7418,
        0.0,
        0.0,
        -0.3182,
        0.6814,
        -0.5826,
        -0.25,
        0.3595,
        0.0,
        0.0,
        0.0,
        0.3802,
        -0.6249,
        0.0258,
        0.8264,
        -0.4184,
        0.4515,
        0.3578,
     