# **Plotly Choropleth Maps - By US States**

In [15]:
# Setup dependencies
import pandas as pd
import numpy as np
import plotly_express as px
import plotly.graph_objects as go

path = "static/data/choropleth_locations3_all_sentiment.csv"
df = pd.read_csv(path)
df

Unnamed: 0,nid,country,country_ISO_code,state,US_state_code,latitude,longitude,year,month,weekday,headline_score,article_score
0,2,United States,USA,Louisiana,LA,30.984298,-91.962333,2015,January,Thursday,0.0000,0.128000
1,3,United States,USA,,,37.090240,-95.712891,2015,January,Thursday,0.2960,-0.051600
2,5,United States,USA,Massachusetts,MA,42.360083,-71.058880,2015,January,Thursday,0.0000,-0.291450
3,6,United States,USA,Texas,TX,31.117119,-97.727796,2015,January,Thursday,0.0000,-0.670500
4,11,United States,USA,New York,NY,40.726477,-73.981534,2015,January,Thursday,0.0000,0.585900
...,...,...,...,...,...,...,...,...,...,...,...,...
15625,50563,United States,USA,Colorado,CO,39.739236,-104.990251,2017,December,Sunday,-0.6369,-0.630833
15626,50564,United States,USA,Maine,ME,44.338556,-68.273335,2017,December,Sunday,-0.5023,0.700300
15627,50570,United States,USA,California,CA,37.774929,-122.419415,2017,December,Sunday,0.0000,0.338400
15628,50571,United States,USA,New York,NY,43.299428,-74.217933,2017,December,Sunday,0.0000,0.000000


## US States by Year

In [18]:
# Create a dataframe with all US states and codes

df_states_year = df.drop_duplicates(subset=['state'])
df_states_year = df_states_year[['state', 'US_state_code']]
df_states_year = df_states_year.dropna(subset=['US_state_code'])
df_states_year = df_states_year.sort_values('state').reset_index(drop=True)

df_states_year['article_score'] = " "
df_states_year['year'] = " "
df_states_year = df_states_year[['state', 'US_state_code', 'article_score', 'year']]
df_states_year

Unnamed: 0,state,US_state_code,article_score,year
0,Alabama,AL,,
1,Alaska,AK,,
2,Arizona,AZ,,
3,Arkansas,AR,,
4,California,CA,,
5,Colorado,CO,,
6,Connecticut,CT,,
7,Delaware,DE,,
8,District of Columbia,DC,,
9,Florida,FL,,


In [19]:
# Get mean scores by state by year

df_st_year = df[['state', 'US_state_code', 'article_score', 'year']]
group_by_year = df_st_year.groupby(['year', 'state', 'US_state_code'], as_index=False)['article_score'].mean()
df_st_year = pd.DataFrame({'state':group_by_year.state, 'US_state_code':group_by_year.US_state_code, 'article_score':group_by_year.article_score, 'year':group_by_year.year})

for i in range(len(df_st_year)):
    df_st_year['article_score'][i] = "{:.3f}".format(df_st_year['article_score'][i])
df_st_year

Unnamed: 0,state,US_state_code,article_score,year
0,Alabama,AL,-0.079,2015
1,Alaska,AK,0.133,2015
2,Arizona,AZ,-0.111,2015
3,Arkansas,AR,0.132,2015
4,California,CA,-0.060,2015
...,...,...,...,...
148,Virginia,VA,-0.242,2017
149,Washington,WA,-0.169,2017
150,West Virginia,WV,-0.032,2017
151,Wisconsin,WI,-0.121,2017


In [20]:
# Create state dataframes for each year and combine to get a single master dataset

# Extract and concat data for 2015
df_state_2015 = df_states_year
for i in range(len(df_state_2015)):
    df_state_2015['year'][i]  = 2015
    for j in range(len(df_st_year)):
        if df_st_year['year'][j] == 2015:
            if df_state_2015['state'][i] == df_st_year['state'][j]:     
                df_state_2015['article_score'][i] = df_st_year['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_2015['article_score'][i] == " ":
        df_state_2015['article_score'][i] = np.nan

df_state_year = pd.DataFrame()
df_state_year = pd.concat([df_state_year, df_state_2015], ignore_index=True)


# Extract and concat data for 2016
df_state_2016 = df_states_year
for i in range(len(df_state_2016)):
    df_state_2016['year'][i]  = 2016
    for j in range(len(df_st_year)):
        if df_st_year['year'][j] == 2016:
            if df_state_2016['state'][i] == df_st_year['state'][j]:     
                df_state_2016['article_score'][i] = df_st_year['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_2016['article_score'][i] == " ":
        df_state_2016['article_score'][i] = np.nan

df_state_year = pd.concat([df_state_year, df_state_2016], ignore_index=True)


# Extract and concat data for 2017
df_state_2017 = df_states_year
for i in range(len(df_state_2017)):
    df_state_2017['year'][i]  = 2017
    for j in range(len(df_st_year)):
        if df_st_year['year'][j] == 2017:
            if df_state_2017['state'][i] == df_st_year['state'][j]:     
                df_state_2017['article_score'][i] = df_st_year['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_2017['article_score'][i] == " ":
        df_state_2017['article_score'][i] = np.nan

df_state_year = pd.concat([df_state_year, df_state_2017], ignore_index=True).dropna()
df_state_year

Unnamed: 0,state,US_state_code,article_score,year
0,Alabama,AL,-0.079,2015
1,Alaska,AK,0.133,2015
2,Arizona,AZ,-0.111,2015
3,Arkansas,AR,0.132,2015
4,California,CA,-0.06,2015
...,...,...,...,...
148,Virginia,VA,-0.242,2017
149,Washington,WA,-0.169,2017
150,West Virginia,WV,-0.032,2017
151,Wisconsin,WI,-0.121,2017


## States - final data by year

In [32]:
state_2015 = df_state_year.loc[df_state_year['year'] == 2015]
state_2016 = df_state_year.loc[df_state_year['year'] == 2016]
state_2017 = df_state_year.loc[df_state_year['year'] == 2017]

In [33]:
# Create choropleth map of US sentiment scores by states (2015)

fig_state_2015 = go.Figure(data=go.Choropleth(
    locations=state_2015['US_state_code'],
    z=state_2015['article_score'].astype(str),
    locationmode='USA-states',
    colorscale='RdBu',
    autocolorscale=False,
    # text=df_state_final['US_state_code'], # hover text
    marker_line_color='white', # line markers between states
    colorbar_title="Sentiment Score"
))

fig_state_2015.update_layout(
    title_text='US News Sentiment by State (2015)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_state_2015.show()

In [34]:
# Create choropleth map of US sentiment scores by states (2016)

fig_state_2016 = go.Figure(data=go.Choropleth(
    locations=state_2016['US_state_code'],
    z=state_2016['article_score'].astype(str),
    locationmode='USA-states',
    colorscale='RdBu',
    autocolorscale=False,
    # text=df_state_final['US_state_code'], # hover text
    marker_line_color='white', # line markers between states
    colorbar_title="Sentiment Score"
))

fig_state_2016.update_layout(
    title_text='US News Sentiment by State (2016)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_state_2016.show()

In [35]:
# Create choropleth map of US sentiment scores by states (2017)

fig_state_2017 = go.Figure(data=go.Choropleth(
    locations=state_2017['US_state_code'],
    z=state_2017['article_score'].astype(str),
    locationmode='USA-states',
    colorscale='RdBu',
    autocolorscale=False,
    # text=df_state_final['US_state_code'], # hover text
    marker_line_color='white', # line markers between states
    colorbar_title="Sentiment Score"
))

fig_state_2017.update_layout(
    title_text='US News Sentiment by State (2017)',
    width=1150,
    height=1150,
    margin=dict(l=20, r=20, b=300, t=100, pad=10),
    font=dict(size=20),
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.nytimes.com/"> The New York Times</a>',
        showarrow = False
    )]
)

fig_state_2017.show()

## US States by Week

In [21]:
# Re-create a dataframe with all US states and codes

df_states_weekday = df.drop_duplicates(subset=['state'])
df_states_weekday = df_states_weekday[['state', 'US_state_code']]
df_states_weekday = df_states_weekday.dropna(subset=['US_state_code'])
df_states_weekday = df_states_weekday.sort_values('state').reset_index(drop=True)

df_states_weekday['article_score'] = " "
df_states_weekday['weekday'] = " "
df_states_weekday = df_states_weekday[['state', 'US_state_code', 'article_score', 'weekday']]
df_states_weekday

Unnamed: 0,state,US_state_code,article_score,weekday
0,Alabama,AL,,
1,Alaska,AK,,
2,Arizona,AZ,,
3,Arkansas,AR,,
4,California,CA,,
5,Colorado,CO,,
6,Connecticut,CT,,
7,Delaware,DE,,
8,District of Columbia,DC,,
9,Florida,FL,,


In [22]:
# Get mean scores by state by day of the week

df_st_weekday = df[['state', 'US_state_code', 'article_score', 'weekday']]
group_by_weekday = df_st_weekday.groupby(['weekday', 'state', 'US_state_code'], as_index=False)['article_score'].mean()
df_st_weekday = pd.DataFrame({'state':group_by_weekday.state, 'US_state_code':group_by_weekday.US_state_code, 'article_score':group_by_weekday.article_score, 'weekday':group_by_weekday.weekday})

for i in range(len(df_st_weekday)):
    df_st_weekday['article_score'][i] = "{:.3f}".format(df_st_weekday['article_score'][i])
df_st_weekday

Unnamed: 0,state,US_state_code,article_score,weekday
0,Alabama,AL,-0.097,Friday
1,Alaska,AK,0.081,Friday
2,Arizona,AZ,-0.213,Friday
3,Arkansas,AR,-0.044,Friday
4,California,CA,-0.015,Friday
...,...,...,...,...
347,Virginia,VA,-0.053,Wednesday
348,Washington,WA,-0.080,Wednesday
349,West Virginia,WV,0.007,Wednesday
350,Wisconsin,WI,-0.182,Wednesday


In [23]:
# Create state dataframes for each weekday and combine to get a single master dataset

# Extract and concat data for Monday
df_state_mon = df_states_weekday
for i in range(len(df_state_mon)):
    df_state_mon['weekday'][i]  = "Monday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Monday":
            if df_state_mon['state'][i] == df_st_weekday['state'][j]:     
                df_state_mon['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_mon['article_score'][i] == " ":
        df_state_mon['article_score'][i] = np.nan

df_state_weekday = pd.DataFrame()
df_state_weekday = pd.concat([df_state_weekday, df_state_mon], ignore_index=True)


# Extract and concat data for Tuesday
df_state_tues = df_states_weekday
for i in range(len(df_state_tues)):
    df_state_tues['weekday'][i]  = "Tuesday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Tuesday":
            if df_state_tues['state'][i] == df_st_weekday['state'][j]:     
                df_state_tues['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_tues['article_score'][i] == " ":
        df_state_tues['article_score'][i] = np.nan

df_state_weekday = pd.concat([df_state_weekday, df_state_tues], ignore_index=True)


# Extract and concat data for Wednesday
df_state_wed = df_states_weekday
for i in range(len(df_state_wed)):
    df_state_wed['weekday'][i]  = "Wednesday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Wednesday":
            if df_state_wed['state'][i] == df_st_weekday['state'][j]:     
                df_state_wed['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_wed['article_score'][i] == " ":
        df_state_wed['article_score'][i] = np.nan

df_state_weekday = pd.concat([df_state_weekday, df_state_wed], ignore_index=True)


# Extract and concat data for Thursday
df_state_thur = df_states_weekday
for i in range(len(df_state_thur)):
    df_state_thur['weekday'][i]  = "Thursday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Thursday":
            if df_state_thur['state'][i] == df_st_weekday['state'][j]:     
                df_state_thur['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_thur['article_score'][i] == " ":
        df_state_thur['article_score'][i] = np.nan

df_state_weekday = pd.concat([df_state_weekday, df_state_thur], ignore_index=True)


# Extract and concat data for Friday
df_state_fri = df_states_weekday
for i in range(len(df_state_fri)):
    df_state_fri['weekday'][i]  = "Friday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Friday":
            if df_state_fri['state'][i] == df_st_weekday['state'][j]:     
                df_state_fri['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_fri['article_score'][i] == " ":
        df_state_fri['article_score'][i] = np.nan

df_state_weekday = pd.concat([df_state_weekday, df_state_fri], ignore_index=True)


# Extract and concat data for Saturday
df_state_sat = df_states_weekday
for i in range(len(df_state_sat)):
    df_state_sat['weekday'][i]  = "Saturday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Saturday":
            if df_state_sat['state'][i] == df_st_weekday['state'][j]:     
                df_state_sat['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_sat['article_score'][i] == " ":
        df_state_sat['article_score'][i] = np.nan

df_state_weekday = pd.concat([df_state_weekday, df_state_sat], ignore_index=True)


# Extract and concat data for Sunday
df_state_sun = df_states_weekday
for i in range(len(df_state_sun)):
    df_state_sun['weekday'][i]  = "Sunday"
    for j in range(len(df_st_weekday)):
        if df_st_weekday['weekday'][j] == "Sunday":
            if df_state_sun['state'][i] == df_st_weekday['state'][j]:     
                df_state_sun['article_score'][i] = df_st_weekday['article_score'][j]
            else:
                continue
        else:     
            continue
    if df_state_sun['article_score'][i] == " ":
        df_state_sun['article_score'][i] = np.nan

df_state_weekday = pd.concat([df_state_weekday, df_state_sun], ignore_index=True)


df_state_weekday

Unnamed: 0,state,US_state_code,article_score,weekday
0,Alabama,AL,0.031,Monday
1,Alaska,AK,0.184,Monday
2,Arizona,AZ,0.042,Monday
3,Arkansas,AR,-0.075,Monday
4,California,CA,0.031,Monday
...,...,...,...,...
352,Virginia,VA,-0.286,Sunday
353,Washington,WA,-0.16,Sunday
354,West Virginia,WV,-0,Sunday
355,Wisconsin,WI,-0.309,Sunday


## States - final data by weekday

In [31]:
state_monday = df_state_weekday.loc[df_state_weekday['weekday'] == "Monday"]
state_tuesday = df_state_weekday.loc[df_state_weekday['weekday'] == "Tuesday"]
state_wednesday = df_state_weekday.loc[df_state_weekday['weekday'] == "Wednesday"]
state_thursday = df_state_weekday.loc[df_state_weekday['weekday'] == "Thursday"]
state_friday = df_state_weekday.loc[df_state_weekday['weekday'] == "Friday"]
state_saturday = df_state_weekday.loc[df_state_weekday['weekday'] == "Saturday"]
state_sunday = df_state_weekday.loc[df_state_weekday['weekday'] == "Sunday"]

## State Animations

In [16]:
# Create animation

px.choropleth(df_states_year,               
              locations="country_ISO_code",               
              color="article_score",
              hover_name="country",  
              animation_frame="year",    
              color_continuous_scale='Plasma',  
              height=600             
)

In [24]:
# # Create a GIF for the yearly global plots:
# years = ['2015', '2016', '2017']

# # for year in years:
# #     fig(year)
# fig(2015) = './news_app/static/img/fig_global_2015.png'
# fig(2016) = './news_app/static/img/fig_global_2016.png'
# fig(2017) = './news_app/static/img/fig_global_2017.png'


# images = [fig(2015), fig(2016), fig(2017)]
# # looping over the images and saving them into a list
# for i in range(images):
#   images.append(images)

# # creating the GIF
# images[0].save('./news_app/static/img/fig_global_2015-16-17.gif',
#                save_all=True, append_images=images[1:], optimize=True, duration=800, loop=0)

In [42]:
# import numpy as np
# import pandas as pd
# import plotly.graph_objs as go
# import plotly.plotly as py

# # min year in your dataset
# year = 2015
# # years = ['2015', '2016', '2017']
# # your color-scale
# colorscale = 'RdBu'
# # scl = [[0.0, '#ffffff'],[0.2, '#b4a8ce'],[0.4, '#8573a9'],
# #        [0.6, '#7159a3'],[0.8, '#5732a1'],[1.0, '#2c0579']] # purples


# data_slider = []
# for year in df['years'].unique():
#     df_segmented =  df[(df['years']== year)]

#     for col in df_segmented.columns:
#         df_segmented[col] = df_segmented[col].astype(str)

#     data_each_yr = dict(
#                         type='choropleth',
#                         locations = df_segmented['state'],
#                         z=df_segmented['sightings'].astype(float),
#                         locationmode='USA-states',
#                         colorscale = scl,
#                         colorbar= {'title':'# Sightings'})

#     data_slider.append(data_each_yr)

# steps = []
# for i in range(len(data_slider)):
#     step = dict(method='restyle',
#                 args=['visible', [False] * len(data_slider)],
#                 label='Year {}'.format(i + 1998))
#     step['args'][1][i] = True
#     steps.append(step)

# sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

# layout = dict(title ='UFO Sightings by State Since 1998', geo=dict(scope='usa',
#                        projection={'type': 'albers usa'}),
#               sliders=sliders)

# fig = dict(data=data_slider, layout=layout)
# periscope.plotly(fig)

ImportError: 
The plotly.plotly module is deprecated,
please install the chart-studio package and use the
chart_studio.plotly module instead. 
