# **Plotly Choropleth Maps Showing Geographic Distribution of News Sentiment**

In [1]:
# Setup dependencies
import pandas as pd 
import plotly_express as px
import plotly.graph_objects as go
from PIL import Image
import glob

path = "static/data/choropleth_locations4_sentiment.csv"
df = pd.read_csv(path)
df

Unnamed: 0,nid,country,country_ISO_code,state,US_state_code,year,month,weekday,headline_score,article_score
0,2,United States,USA,Louisiana,LA,2015,January,Thursday,0.0000,0.128000
1,3,United States,USA,,,2015,January,Thursday,0.2960,-0.051600
2,5,United States,USA,Massachusetts,MA,2015,January,Thursday,0.0000,-0.291450
3,6,United States,USA,Texas,TX,2015,January,Thursday,0.0000,-0.670500
4,11,United States,USA,New York,NY,2015,January,Thursday,0.0000,0.585900
...,...,...,...,...,...,...,...,...,...,...
15094,50547,United States,USA,South Carolina,SC,2017,December,Saturday,0.5423,0.831600
15095,50563,United States,USA,Colorado,CO,2017,December,Sunday,-0.6369,-0.630833
15096,50570,United States,USA,California,CA,2017,December,Sunday,0.0000,0.338400
15097,50571,United States,USA,New York,NY,2017,December,Sunday,0.0000,0.000000


In [2]:
# Get mean scores by country (2015-2017)  
df_country = df[['country', 'country_ISO_code', 'article_score']]
grouped_by_country = df_country.groupby(['country', 'country_ISO_code'],  as_index=False)['article_score'].mean()
df_country_final = pd.DataFrame({'country':grouped_by_country.country, 'country_ISO_code':grouped_by_country.country_ISO_code, 'article_score':grouped_by_country.article_score})

for i in range(len(df_country_final)):
    df_country_final['article_score'][i] = "{:.3f}".format(df_country_final['article_score'][i])
df_country_final

Unnamed: 0,country,country_ISO_code,article_score
0,Afghanistan,AFG,-0.342
1,Algeria,DZA,-0.691
2,American Samoa,ASM,0.421
3,Andorra,AND,-0.101
4,Antarctica,ATA,0.028
...,...,...,...
126,Vatican City,VAT,0.101
127,Venezuela,VEN,-0.022
128,Vietnam,VNM,0.116
129,Yemen,YEM,-0.595


In [3]:
# Create choropleth map of sentiment scores by country (2015-2017)

fig_global_allyears = go.Figure(data=go.Choropleth(
    locations = df_country_final['country_ISO_code'],
    z = df_country_final['article_score'],
    text = df_country_final['country'],
    colorscale = 'RdBu',
    marker_line_color='darkgray',
    marker_line_width=1,
    colorbar_title = 'Sentiment Score',
))

fig_global_allyears.update_layout(
    title_text='Global News Sentiment by Country (2015-2017)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.35,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_global_allyears.show()

In [9]:
# Get mean scores by country for each year
df_country_year = df[['country', 'country_ISO_code', 'year', 'article_score']]
grouped_by_country_year = df_country_year.groupby(['country', 'country_ISO_code', 'year'], as_index=False)['article_score'].mean()
df_country_year = pd.DataFrame({'country':grouped_by_country_year.country, 'country_ISO_code':grouped_by_country_year.country_ISO_code, 'year':grouped_by_country_year.year, 'article_score':grouped_by_country_year.article_score})
df_country_year = df_country_year.loc[df_country_year['country_ISO_code'] != " "]

df_country_year.reset_index(drop=True, inplace=True)
for i in range(len(df_country_year)):
    df_country_year['article_score'][i] = "{:.3f}".format(df_country_year['article_score'][i])
df_country_year   

Unnamed: 0,country,country_ISO_code,year,article_score
0,Afghanistan,AFG,2015,-0.268
1,Afghanistan,AFG,2016,-0.429
2,Algeria,DZA,2016,-0.691
3,American Samoa,ASM,2016,0.421
4,Andorra,AND,2015,-0.101
...,...,...,...,...
277,Yemen,YEM,2015,-0.665
278,Yemen,YEM,2016,-0.332
279,Yemen,YEM,2017,-0.651
280,Zimbabwe,ZWE,2015,-0.622


In [17]:
# Create country wise dataframes for 2015, 2016, and 2017

df_year_2015 = df_country_year[df_country_year['year'] == 2015]
df_year_2016 = df_country_year[df_country_year['year'] == 2016]
df_year_2017 = df_country_year[df_country_year['year'] == 2017]

df_year_2017

Unnamed: 0,country,country_ISO_code,year,article_score
7,Antarctica,ATA,2017,0.011
12,Australia,AUS,2017,-0.012
15,Azerbaijan,AZE,2017,-0.542
18,Bangladesh,BGD,2017,-0.067
22,Belgium,BEL,2017,-0.011
...,...,...,...,...
265,United States,USA,2017,-0.055
273,Venezuela,VEN,2017,0.096
276,Vietnam,VNM,2017,-0.459
279,Yemen,YEM,2017,-0.651


In [44]:
# Create choropleth map of sentiment scores by country (year 2015 only)

fig_global_2015 = go.Figure(data=go.Choropleth(
    locations = df_year_2015['country_ISO_code'],
    z = df_year_2015['article_score'],
    text = df_year_2015['country'],
    colorscale = 'RdBu',
    marker_line_color='darkgray',
    marker_line_width=1,
    colorbar_title = 'Sentiment Score',
))

fig_global_2015.update_layout(
    title_text='Global News Sentiment by Country (2015)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.35,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_global_2015.show()

In [20]:
# Create choropleth map of sentiment scores by country (year 2016 only)

fig_global_2016 = go.Figure(data=go.Choropleth(
    locations = df_year_2016['country_ISO_code'],
    z = df_year_2016['article_score'],
    text = df_year_2016['country'],
    colorscale = 'RdBu',
    marker_line_color='darkgray',
    marker_line_width=1,
    colorbar_title = 'Sentiment Score',
))

fig_global_2016.update_layout(
    title_text='Global News Sentiment by Country (2016)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.35,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_global_2016.show()

In [22]:
# Create choropleth map of sentiment scores by country (year 2017 only)

fig_global_2017 = go.Figure(data=go.Choropleth(
    locations = df_year_2017['country_ISO_code'],
    z = df_year_2017['article_score'],
    text = df_year_2017['country'],
    colorscale = 'RdBu',
    marker_line_color='darkgray',
    marker_line_width=1,
    colorbar_title = 'Sentiment Score',
))

fig_global_2017.update_layout(
    title_text='Global News Sentiment by Country (2017)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.35,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_global_2017.show()

In [42]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.plotly as py

# min year in your dataset
year = 2015
# years = ['2015', '2016', '2017']
# your color-scale
colorscale = 'RdBu'
# scl = [[0.0, '#ffffff'],[0.2, '#b4a8ce'],[0.4, '#8573a9'],
#        [0.6, '#7159a3'],[0.8, '#5732a1'],[1.0, '#2c0579']] # purples


data_slider = []
for year in df['years'].unique():
    df_segmented =  df[(df['years']== year)]

    for col in df_segmented.columns:
        df_segmented[col] = df_segmented[col].astype(str)

    data_each_yr = dict(
                        type='choropleth',
                        locations = df_segmented['state'],
                        z=df_segmented['sightings'].astype(float),
                        locationmode='USA-states',
                        colorscale = scl,
                        colorbar= {'title':'# Sightings'})

    data_slider.append(data_each_yr)

steps = []
for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='Year {}'.format(i + 1998))
    step['args'][1][i] = True
    steps.append(step)

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

layout = dict(title ='UFO Sightings by State Since 1998', geo=dict(scope='usa',
                       projection={'type': 'albers usa'}),
              sliders=sliders)

fig = dict(data=data_slider, layout=layout)
periscope.plotly(fig)

ImportError: 
The plotly.plotly module is deprecated,
please install the chart-studio package and use the
chart_studio.plotly module instead. 


In [31]:
# Create a GIF for the yearly global plots:
years = ['2015', '2016', '2017']

# for year in years:
#     fig(year)
fig(2015) = './news_app/static/img/fig_global_2015.png'
fig(2016) = './news_app/static/img/fig_global_2016.png'
fig(2017) = './news_app/static/img/fig_global_2017.png'


images = [fig(2015), fig(2016), fig(2017)]
# looping over the images and saving them into a list
for i in range(images):
  images.append(images)

# creating the GIF
images[0].save('./news_app/static/img/fig_global_2015-16-17.gif',
               save_all=True, append_images=images[1:], optimize=True, duration=800, loop=0)

SyntaxError: can't assign to function call (<ipython-input-31-7a643e909fc2>, line 6)

In [40]:
# Get mean scores by state (2015-2017)
df_state = df[['state', 'US_state_code', 'article_score']]
grouped_by_state = df_state.groupby(['state', 'US_state_code'], as_index=False)['article_score'].mean()
df_state_final = pd.DataFrame({'state':grouped_by_state.state, 'US_state_code':grouped_by_state.US_state_code, 'article_score':grouped_by_state.article_score})
df_state_final = df_state_final.loc[df_state_final['US_state_code'] != " "]

df_state_final.reset_index(drop=True, inplace=True)
for i in range(len(df_state_final)):
    df_state_final['article_score'][i] = "{:.3f}".format(df_state_final['article_score'][i])
df_state_final   

Unnamed: 0,state,US_state_code,article_score
0,Alabama,AL,-0.015
1,Alaska,AK,0.09
2,Arizona,AZ,-0.13
3,Arkansas,AR,-0.089
4,California,CA,-0.007
5,Colorado,CO,-0.11
6,Connecticut,CT,-0.081
7,Delaware,DE,-0.021
8,District of Columbia,DC,0.031
9,Florida,FL,-0.059


In [5]:
# # Create list of US states
# state_list = []
# for i in range(len(df_state_final)):
#     state_list.append(df_state_final['US_state_code'][i])
# print(state_list)

In [6]:
# # Create list of article scores by state 
# state_a_scores_list = []
# for i in range(1, len(df_country_final)):
#     df_state_final['article_score'][i] = "{:.3f}".format(df_state_final['article_score'][i])
#     state_a_scores_list.append(df_state_final['article_score'][i])
# print(state_a_scores_list)

In [41]:
# Create choropleth map of US sentiment scores by states (2015-2017)

fig_state_allyears = go.Figure(data=go.Choropleth(
    locations=df_state_final['US_state_code'],
    z=df_state_final['article_score'].astype(str),
    locationmode='USA-states',
    colorscale='RdBu',
    autocolorscale=False,
    # text=df_state_final['US_state_code'], # hover text
    marker_line_color='white', # line markers between states
    colorbar_title="Sentiment Score"
))

fig_state_allyears.update_layout(
    title_text='US News Sentiment by State (2015-2017)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_state_allyears.show()

In [36]:
locations_list = []
for i in range(len(country_mean_all)):
    locations_list.append.country_mean_all['country_ISO_code']
print(locations_list)

NameError: name 'country_mean_all' is not defined

In [6]:
# Get mean scores by month
df_state_month = df[['state', 'US_state_code', 'month', 'article_score']]
grouped_by_state = df_state_month.groupby(['state', 'US_state_code', 'month'], as_index=False)['article_score'].mean()
# state_mean_month = state_mean_month.loc['month': 'January']
grouped_by_state
# df_state = df[['state', 'US_state_code', 'article_score']]
# grouped_by_state = df_state.groupby(['state', 'US_state_code'], as_index=False)['article_score'].mean()
# df_state_final = pd.DataFrame({'state':grouped_by_state.state, 'US_state_code':grouped_by_state.US_state_code, 'article_score':grouped_by_state.article_score})
# df_state_final = df_state_final.loc[df_state_final['US_state_code'] != " "]

Unnamed: 0,state,US_state_code,month,article_score
0,,,April,0.014757
1,,,August,-0.025686
2,,,February,-0.000091
3,,,January,0.034859
4,,,July,-0.052278
...,...,...,...,...
900,Île-de-France,,May,-0.250267
901,Île-de-France,,November,0.674000
902,Île-de-France,,October,0.014950
903,Île-de-France,,September,0.306200
