# Network Visualization of Company Acquisitions by Google

### Acquiring the Dataset from Wikipedia

The data is extracted using the wikipedia package and pandas. I only care about the table that lists all the acquisitions by google, so only extracting that.

In [1]:
import pandas as pd
import wikipedia as wp
 
#Get the html source
html = wp.page("List_of_mergers_and_acquisitions_by_Alphabet").html().encode("UTF-8")

#read the html
df = pd.read_html(html)[0]

#extract only desired columns
df_final=df[['Company','Used as or integrated with']]

In [3]:
#change column names

##Acquired_Company: the company that Alphabet bought
##New Operating Entity: the Alphabet entity under which the acquired company will operate
df_final.rename(columns={'Company':'Acquired_Company',
                    'Used as or integrated with':'New_Operating_Entity'}, inplace=True)

In [4]:
#drop NA's
df_final=df_final.dropna()

### Quick Look at the Dataset

In [5]:
#get all rows that contain , in their string for Acquired_Company
df_final[df_final['Acquired_Company'].str.contains(',')]

Unnamed: 0,Acquired_Company,New_Operating_Entity


In [6]:
#get all rows that contain , in their string for New_Operating_Entity
df_final[df_final['New_Operating_Entity'].str.contains(',')]

Unnamed: 0,Acquired_Company,New_Operating_Entity
3,Neotonic Software,"Google Groups, Gmail"
4,Applied Semantics,"AdSense, AdWords"
6,Sprinks,"AdSense, AdWords"
9,Picasa,"Picasa, Blogger"
12,Keyhole,"Google Maps, Google Earth"
28,Neven Vision Germany,"Picasa, Google Goggles"
36,Marratech,"Google Talk, Google Hangouts"
50,On2,"WebM, YouTube"
52,AdMob,"DoubleClick, Invite Media"
53,Gizmo5,"Google Talk, Google Hangouts"


A quick investigation of the dataset reveals that some of the acquired companies will fall under more than one operating unit within Alphabet. For example, Applied Semantic will be under Adsense and Adwords. For this reason, we need to split these multiple entities into their own rows so everything is atomized.

In [7]:
# Split entity and expand row-wise. Store in a temporary dataframe
temp_df = df_final.New_Operating_Entity.str.split(',').apply(pd.Series, 1).stack()

# Drop index level to match main dataframe
temp_df.index = temp_df.index.droplevel(-1)

# Name new column
temp_df.name = 'New_Operating_Entity'

# Delete the original entity column
del df_final['New_Operating_Entity']

# Join the original dataframe with the temp df
df_final = df_final.join(temp_df)

In [8]:
df_final.shape

(268, 2)

In [45]:
df_final.to_csv('alphabet_acquisitions.csv')

### Building a Directed Network Graph

Start by loading all the necessary Dash and Networkx libraries.
Resources utilized: 
* https://plot.ly/python/network-graphs/
* https://python-graph-gallery.com/322-network-layout-possibilities/
* https://plot.ly/python/reference/#scatter

In [31]:
import networkx as nx
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import plotly.graph_objs as gobj
import matplotlib.pyplot as plt
import plotly as plotly
from plotly import *

In [34]:
Company_List= list(df_final["Acquired_Company"].unique())
Entity_List = list(df_final["New_Operating_Entity"].unique())
node_list = list(set(Company_List+Entity_List))

In [17]:
#create graph and add each node from nodelist to graph
G = nx.Graph()
for i in node_list:
    G.add_node(i)

In [15]:
print("The number of nodes is: {}".format(len(G.nodes())))

The number of nodes is: 324


In [19]:
#get edges from the dataset pairs
for row, company in df_final.iterrows():
    G.add_edges_from([(company["Acquired_Company"],company["New_Operating_Entity"])])

In [20]:
print("The number of edges is: {}".format(len(G.edges())))

The number of edges is: 268


In [21]:
#get x and y coordinates of nodes
pos = nx.spring_layout(G)

#add node positions to the graph
for i, j in pos.items():
    G.node[i]['pos'] = j

In [22]:
#create edges
edge_trace = gobj.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in G.edges():
    x0, y0 = G.node[edge[0]]['pos']
    x1, y1 = G.node[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

In [23]:
#create nodes
node_trace = gobj.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='Portland',
        reversescale=True,
        color=[],
        size=15,
        colorbar=dict(
            thickness=10,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=0)))

for node in G.nodes():
    x, y = G.node[node]['pos']
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

In [24]:
#color the nodes
for node, adjacencies in enumerate(G.adjacency()):
    node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    node_info = adjacencies[0] +' # of connections: '+str(len(adjacencies[1]))
    node_trace['text']+=tuple([node_info])

### Dash Application for Network Graph

In [41]:
app = dash.Dash()
app.css.append_css({
    'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'})


In [42]:
fig = gobj.Figure(data=[edge_trace, node_trace],
             layout=gobj.Layout(
                title='<br>Alphabet Acquisitions Network Graph',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

In [43]:
app.layout = html.Div([
                html.Div(dcc.Graph(id='Graph',figure=fig)),
                html.Div(className='row', children=[
                    html.Div([html.H2('Overall Data'),
                              html.P('Num of nodes: ' + str(len(G.nodes))),
                              html.P('Num of edges: ' + str(len(G.edges)))],
                              className='three columns'),
                    html.Div([
                            html.H2('Selected Data'),
                            html.Div(id='selected-data'),
                        ], className='six columns')
                    ])
                ])

In [44]:
@app.callback(
    Output('selected-data', 'children'),
    [Input('Graph','selectedData')])
def display_selected_data(selectedData):
    num_of_nodes = len(selectedData['points'])
    text = [html.P('Num of nodes selected: '+str(num_of_nodes))]
    for x in selectedData['points']:
#        print(x['text'])
        material = x['text'].split('<br>')[0][7:]
        text.append(html.P(str(material)))
    return text

if __name__ == '__main__':
    app.run_server(debug=True)

Running on http://127.0.0.1:8050/
Running on http://127.0.0.1:8050/
Debugger PIN: 621-207-777
Debugger PIN: 621-207-777
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


SystemExit: 1


To exit: use 'exit', 'quit', or Ctrl-D.

