# Film Script Analyzer

## Graphs

In [None]:
import networkx as nx
import plotly.plotly     as py
from   plotly.graph_objs import *

G   = nx.random_geometric_graph(200,0.125)
pos = nx.get_node_attributes(G,'pos')

dmin    = 1
ncenter = 0
for n in pos:
    x,y=pos[n]
    d=(x-0.5)**2+(y-0.5)**2
    if d<dmin:
        ncenter=n
        dmin=d
        
p=nx.single_source_shortest_path_length(G,ncenter)

In [None]:
edge_trace = Scatter(
    x=[], 
    y=[], 
    line=Line(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in G.edges():
    x0, y0 = G.node[edge[0]]['pos']
    x1, y1 = G.node[edge[1]]['pos']
    edge_trace['x'] += [x0, x1, None]
    edge_trace['y'] += [y0, y1, None]

node_trace = Scatter(
    x=[], 
    y=[], 
    text=[],
    mode='markers', 
    hoverinfo='text',
    marker=Marker(
        showscale=True,
        # colorscale options
        # 'Greys' | 'Greens' | 'Bluered' | 'Hot' | 'Picnic' | 'Portland' |
        # Jet' | 'RdBu' | 'Blackbody' | 'Earth' | 'Electric' | 'YIOrRd' | 'YIGnBu'
        colorscale='YIGnBu',
        reversescale=True,
        color=[], 
        size=10,         
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

for node in G.nodes():
    x, y = G.node[node]['pos']
    node_trace['x'].append(x)
    node_trace['y'].append(y)

In [None]:
for node, adjacencies in enumerate(G.adjacency_list()):
    node_trace['marker']['color'].append(len(adjacencies))
    node_info = '# of connections: '+str(len(adjacencies))
    node_trace['text'].append(node_info)

In [None]:
import plotly.plotly as py
py.sign_in('luislundquist','6sxF9J9jN7NllRtl3iGg')

fig = Figure(data=Data([edge_trace, node_trace]),
             layout=Layout(
                title='<br>Network graph made with Python',
                titlefont=dict(size=16),
                showlegend=False, 
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> https://plot.ly/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=XAxis(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=YAxis(showgrid=False, zeroline=False, showticklabels=False)))

py.iplot(fig, filename='networkx')

## Bokeh

In [1]:
import bokeh

In [2]:
#bokeh.models
#bokeh.interface
from bokeh.charts import Scatter, output_file, show

In [3]:
import pandas as pd 

df = pd.read_csv('data/dfreps0.csv',index_col=0)

df.columns

Index([u'Dutifulness', u'Cooperation', u'Self-consciousness', u'Orderliness',
       u'Achievement striving', u'Self-efficacy', u'Activity level',
       u'Self-discipline', u'Excitement-seeking', u'Cautiousness', u'Morality',
       u'Anxiety', u'Emotionality', u'Vulnerability', u'Immoderation',
       u'Sympathy', u'Friendliness', u'Modesty', u'Altruism', u'Assertiveness',
       u'Adventurousness', u'Gregariousness', u'Intellect', u'Imagination',
       u'Artistic interests', u'Depression', u'Anger', u'Trust',
       u'Cheerfulness', u'Liberalism', u'WORDS', u'DIVERSITY', u'LENGTH',
       u'VERB', u'NOUN', u'ADP', u'.', u'ADJ', u'ADV', u'CONJ', u'PRON',
       u'PRT', u'NUM', u'X', u'Plot', u'Rated', u'Title', u'Metascore',
       u'imdbRating', u'Director', u'Released', u'Year', u'Awards', u'Runtime',
       u'Poster', u'imdbVotes', u'imdbID', u'Language1', u'Language2',
       u'Actor1', u'Actor2', u'Writer1', u'Writer2', u'Country1', u'Country2',
       u'Genre1', u'Genre2'],
  

In [None]:
p = Scatter(df,x='WORDS',y='imdbRating',title='Words vs. Rating',xlabel='Words',ylabel='Rating')

output_file('wvsr.html')

show(p)

In [4]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

In [5]:
p=figure(plot_width=500,plot_height=400)

In [6]:
p.circle(df['WORDS'],df['imdbRating'],size=5,color='red')
output_notebook()

show(p)

In [7]:
import numpy as np

from bokeh.io             import curdoc
from bokeh.layouts        import row, widgetbox
from bokeh.models         import ColumnDataSource
from bokeh.models.widgets import Slider, TextInput
from bokeh.plotting       import figure

In [8]:
plot = figure(plot_height=400, plot_width=400, title="Awesome Plot",
              tools="crosshair,pan,reset,save,wheel_zoom",
              x_range=[0,10], y_range=[-4,6])

In [10]:
source = ColumnDataSource(data=dict(x=df['WORDS'],y=df['imdbRating']))
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

In [11]:
text = TextInput(title="title", value='my sine wave')
offset = Slider(title="offset", value=0.0, start=-5.0, end=5.0, step=0.1)
amplitude = Slider(title="amplitude", value=1.0, start=-5.0, end=5.0)
phase = Slider(title="phase", value=0.0, start=0.0, end=2*np.pi)
freq = Slider(title="frequency", value=1.0, start=0.1, end=5.1)

In [12]:
# Set up callbacks
def update_title(attrname, old, new):
    plot.title.text = text.value

text.on_change('value', update_title)

def update_data(attrname, old, new):

    # Get the current slider values
    a = amplitude.value
    b = offset.value
    w = phase.value
    k = freq.value

    # Generate the new curve
    x = np.linspace(0, 4*np.pi, N)
    y = a*np.sin(k*x + w) + b

    source.data = dict(x=x, y=y)

for w in [offset, amplitude, phase, freq]:
    w.on_change('value', update_data)


# Set up layouts and add to document
inputs = widgetbox(text, offset, amplitude, phase, freq)


In [13]:
curdoc().add_root(row(inputs, plot, width=800))
curdoc().title = "Sliders"

In [17]:
output_file('plot.html')
show(plot)

In [31]:
import pandas as pd

from bokeh.layouts import row, widgetbox
from bokeh.models import Select
from bokeh.palettes import Spectral5
from bokeh.plotting import curdoc, figure
from bokeh.sampledata.autompg import autompg
from bokeh.models import CustomJS, Slider

df = autompg.copy()

SIZES = list(range(6, 22, 3))
COLORS = Spectral5
ORIGINS = ['North America', 'Europe', 'Asia']

# data cleanup
df.cyl = [str(x) for x in df.cyl]
df.origin = [ORIGINS[x-1] for x in df.origin]

df['year'] = [str(x) for x in df.yr]
del df['yr']

df['mfr'] = [x.split()[0] for x in df.name]
df.loc[df.mfr=='chevy', 'mfr'] = 'chevrolet'
df.loc[df.mfr=='chevroelt', 'mfr'] = 'chevrolet'
df.loc[df.mfr=='maxda', 'mfr'] = 'mazda'
df.loc[df.mfr=='mercedes-benz', 'mfr'] = 'mercedes'
df.loc[df.mfr=='toyouta', 'mfr'] = 'toyota'
df.loc[df.mfr=='vokswagen', 'mfr'] = 'volkswagen'
df.loc[df.mfr=='vw', 'mfr'] = 'volkswagen'
del df['name']

columns = sorted(df.columns)
discrete = [x for x in columns if df[x].dtype == object]
continuous = [x for x in columns if x not in discrete]
quantileable = [x for x in continuous if len(df[x].unique()) > 20]


def create_figure():
    xs = df[x.value].values
    ys = df[y.value].values
    x_title = x.value.title()
    y_title = y.value.title()

    kw = dict()
    if x.value in discrete:
        kw['x_range'] = sorted(set(xs))
    if y.value in discrete:
        kw['y_range'] = sorted(set(ys))
    kw['title'] = "%s vs %s" % (x_title, y_title)

    p = figure(plot_height=600, plot_width=800, tools='pan,box_zoom,reset', **kw)
    p.xaxis.axis_label = x_title
    p.yaxis.axis_label = y_title

    if x.value in discrete:
        p.xaxis.major_label_orientation = pd.np.pi / 4

    sz = 9
    if size.value != 'None':
        groups = pd.qcut(df[size.value].values, len(SIZES))
        sz = [SIZES[xx] for xx in groups.codes]

    c = "#31AADE"
    if color.value != 'None':
        groups = pd.qcut(df[color.value].values, len(COLORS))
        c = [COLORS[xx] for xx in groups.codes]
    p.circle(x=xs, y=ys, color=c, size=sz, line_color="white", alpha=0.6, hover_color='white', hover_alpha=0.5)

    return p


def update(attr, old, new):
    layout.children[1] = create_figure()


x = Select(title='X-Axis', value='mpg', options=columns)
x.on_change('value', update)

y = Select(title='Y-Axis', value='hp', options=columns)
y.on_change('value', update)

size = Select(title='Size', value='None', options=['None'] + quantileable)
size.on_change('value', update)

color = Select(title='Color', value='None', options=['None'] + quantileable)
color.on_change('value', update)

controls = widgetbox([x, y, color, size], width=200)
layout = row(controls, create_figure())

curdoc().add_root(layout)
curdoc().title = "Crossfilter"

In [32]:
p = create_figure()
output_file('figure.html')
#show(plot)

In [33]:
show(p)

## KERAS


In [39]:
import pandas as pd 

df = pd.read_csv('data/dfreps0.csv',index_col=0)