## Presentation of the results

Here are our main findings

In [5]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np
import pandas as pd
import csv
%matplotlib inline
plt.rcParams['figure.figsize'] = [12, 8]

from IPython.display import display 
import ipywidgets as widgets 
from ipywidgets import interact, Layout, interactive, fixed, interact_manual, IntSlider

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

import cufflinks as cf
cf.go_offline(connected=True)
cf.set_config_file(colorscale='plotly', world_readable=True)

# Extra options
pd.options.display.max_rows = 30
pd.options.display.max_columns = 25

df_eng = pd.read_csv('Dominant_Topics_ENG_2.csv')
df_eng.rename(columns={ df_eng.columns[0]: "Articles" }, inplace = True)
df_eng.rename(columns={ df_eng.columns[1]: "Topic_0" }, inplace = True)
df_eng.rename(columns={ df_eng.columns[2]: "Topic_1" }, inplace = True)
df_eng.rename(columns={ df_eng.columns[3]: "Topic_2" }, inplace = True)
df_eng.rename(columns={ df_eng.columns[4]: "Topic_3" }, inplace = True)
df_eng.rename(columns={ df_eng.columns[5]: "Topic_4" }, inplace = True)
# Second step
df_eng.drop('Dominant_Topic_NMF', axis=1, inplace=True)
# Third step
df_eng['Year'] = df_eng['Articles']
df_eng['Newspaper'] = df_eng['Articles']
# Fourth step
df_eng['Year']= df_eng['Year'].map(lambda x: str(x)[0:4])
df_eng['Year'].astype(int)
# Fifth step
df_eng['Newspaper']= df_eng['Newspaper'].map(lambda x: str(x)[11:14])
# Sixth step
df_eng2 = df_eng.sort_values(by='Year',ascending=True)

## Filter dataset by topic indice

In [6]:
@interact
def show_articles(column=['Topic_0', 'Topic_1', 'Topic_2', 'Topic_3', 'Topic_4'], value=(0, 1, 0.05)):
    return df_eng.loc[df_eng[column] > value]

interactive(children=(Dropdown(description='column', options=('Topic_0', 'Topic_1', 'Topic_2', 'Topic_3', 'Top…

## Compare topic distribution over time

In [7]:
opts = list(df_eng2[["Topic_0", "Topic_1", "Topic_2", "Topic_3", "Topic_4"]])

sel_mult = widgets.SelectMultiple(description="Variables",options=opts,value=[opts[1]],disabled=False)

@interact(variables=sel_mult)
def plot_multiple(variables):
    df_eng3 = df_eng.set_index("Year").sort_index(axis=0)
    df_eng3[list(variables)].plot(style=".", ms=20, alpha=0.5)
    plt.title("Topics in time")
    plt.xlabel("Years")
    plt.ylabel("Topics' weight")
    plt.legend(loc=2, prop={'size':12}, bbox_to_anchor=(1,1),ncol=1)

interactive(children=(SelectMultiple(description='Variables', index=(1,), options=('Topic_0', 'Topic_1', 'Topi…

## Compare the contribution of newspapers to topics

In [8]:
import plotly.express as px
# Define color discrete map for the newspapers' categories (hex code for colors)
color_discrete_map = {'ajA': '#002b36', 'NYT': '#586e75', 'Maj': '#657b83', 'Tor': '#839496', 'Ind': '#93a1a1', 'Tim': '#fdf6e3', 'Dai': '#b58900', 'Gua': '#cb4b16', 'Bus': '#d30102', 'ail': '#d33682', 'CDa': '#268bd2'}

@interact
# with year only
def scatter_plot(x=list(df_eng2.select_dtypes('number').columns[5:]), 
                 y=list(df_eng2.select_dtypes('number').columns[0:4]),
                 theme=list(cf.themes.THEMES.keys()), 
                 colorscale=list(cf.colors._scales_names.keys())):
    
    fig=px.scatter(df_eng2, x=x, y=y, color="Newspaper", 
             opacity=0.5,
             #trendline="rolling",
             #trendline_options=dict(window=2),
             color_discrete_map=color_discrete_map)
    fig.update_traces(marker=dict(size=15))
    fig.show()

interactive(children=(Dropdown(description='x', options=(), value=None), Dropdown(description='y', options=('T…