In [41]:
import pandas as pd
import datetime
import config
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf

In [2]:
path_temp = config.PATH_TEMP_DATA
path_visuals = config.PATH_VIZ
path_source_data = config.PATH_SOURCE_DATA
path_output_data = config.PATH_OUTPUT_DATA

In [3]:
# get a all elections

elections = pd.read_csv(path_source_data + 'view_election.csv')
len(elections)

8146

In [4]:
# remove ep elections

elections = elections[elections['election_type'] == 'parliament']
len(elections)

6736

In [5]:
# extract election year

elections['election_year'] = pd.to_datetime(elections['election_date']).dt.year

In [6]:
# select necessary columns

elections = elections[['country_name', 
                       'country_name_short', 
                       'vote_share', 
                       'seats', 
                       'party_id', 
                       'party_name', 
                       'party_name_english',
                       'election_year']]

In [7]:
# merge with party info to get party family names

party = pd.read_csv(path_source_data + 'view_party.csv')
party = party[['party_id', 'family_id', 'family_name', 'left_right', 'liberty_authority', 'eu_anti_pro']]
df = pd.merge(elections, party, on='party_id', how='left')
df = df.drop_duplicates()
len(df)

6733

In [8]:
# remove non_eu members

non_eu_members = ['Norway', 'Canada', 'Australia', 'Switzerland', 'New Zealand', 'Iceland', 'Japan',
                 'Israel', 'Turkey']

df = df[~df['country_name'].isin(non_eu_members)]
len(df)

4725

In [9]:
# reshape data so that every year has values, not only election years

df2 = df.pivot_table(index=['election_year'], 
                     columns=['country_name',
                              'party_id', 
                              'family_name'], 
                     values=['seats', 
                             'vote_share', 
                             'left_right', 
                             'liberty_authority', 
                             'eu_anti_pro']).stack([1,2,3])


In [36]:
# fill the NaNs

df2 = df2.apply(lambda series: series.loc[:series.last_valid_index()].ffill())

In [11]:
# try to find changes in number of parties per country

df3 = df2.groupby(['election_year', 'country_name', 'party_id'])['vote_share'].mean()

In [12]:
df3.to_csv(path_visuals + 'vis_v3.csv', header=True)

In [13]:
df4 = df3.groupby(['election_year', 'country_name']).count()

In [14]:
df4.to_csv(path_visuals + 'vis_v4.csv', header=True)

In [22]:
# has the voteshare of families decreased?

df5 = df2.groupby(['election_year', 'family_name'])['vote_share'].sum()



In [38]:
df5.to_csv(path_temp + 'd5.csv', header=True)





In [52]:

df5 = df5.reset_index()

In [55]:

df5.iplot(kind = 'bar', x='election_year', y='vote_share')

In [None]:
# create a trace
trace = go.Scatter(
    x=df5['election_year'],
    y=df5['']

In [40]:
trace0 = go.Scatter(
    x=[1, 2, 3, 4],
    y=[10, 15, 13, 17]
)
trace1 = go.Scatter(
    x=[1, 2, 3, 4],
    y=[16, 5, 11, 9]
)
data = [trace0, trace1]

py.iplot(data, filename = 'basic-line')


Consider using IPython.display.IFrame instead

