 1.1.1	Example: Linking policy agendas data to party manifestos
 
    Each observation in the output data set will describe a political party in a particular country, during a particular year. From the CMP data, we will include a measurement of the party’s expressed policy preferences, and from the CAP data we will include one summary measure of the national legislative policy agenda, the percentage of legislative attention directed to the economy (major topic 1).

In [1]:
import pandas as pd
import numpy as np

from src.get_manifesto import *
from config import manifesto_key

import dateutil.parser as dparser

pd.options.mode.chained_assignment = None

      1.1.1.1	 Reshape and combine the two CAP data sets
      1.1.1.2	 Harmonize country reference variable

In [2]:
france = pd.read_csv('data/raw/CAP/bills_5.csv')
france.head(2)

Unnamed: 0.1,Unnamed: 0,monthyear,yearweek,year,id,date,title,text,text2,pm,president,original_subtopic,original_majortopic,codegeo1,codegeo2,type,subtopic,majortopic
0,26.0,197401.0,197405.0,1974.0,746000371.0,1/30/1974,CONSEIL DU 30 JANVIER 1974,COMMUNIQUE OFFICIEL:SUR LE RAPPORT DU MINISTRE...,"M. GISCARD D'ESTAING A FAIT APPROUVER, CONJOIN...",Messmer 2,Georges Pompidou,107.0,1.0,,,P,107.0,1.0
1,36.0,197402.0,197407.0,1974.0,746000444.0,2/13/1974,CONSEIL DU 13 FEVRIER 1974,COMMUNIQUE OFFICIEL:LE MINISTRE DE L'ECONOMIE ...,D'AUTRE_PART M. GISCARD D'ESTAING A FAIT APPRO...,Messmer 2,Georges Pompidou,1501.0,15.0,,,P,1501.0,15.0


In [3]:
denmark = pd.read_csv('data/raw/CAP/Love_01092019_-_Sheet1.csv')
denmark.head(2)

Unnamed: 0,id,year,majortopic,subtopic,description,filter_eu,var2,var3,var4,var5,...,var8_16,var8_17,var8_21,var13,var16,var18,var19,var23,calendar_year,var17
0,2619,1973,13,1300,Forslag til lov om social bistand,0.0,3,5,33,18.0,...,999.0,,,1,,1300,1300.0,,1973.0,10/9/1973
1,3206,1973,1,107,Lov om ændring af lov om registreringsafgift,0.0,3,3,7,3.0,...,999.0,,,26,,107,107.0,,1973.0,10/10/1973


In [4]:
def reshape_annual_level(df, majortopic, country):
    df['n'] = 1
    df['economy'] = np.where(df[majortopic] == 1.0, 1, 0)
    df_yearly = df.groupby('year').sum()
    df_yearly['CAP_per_economy'] = ( df_yearly['economy'] / df_yearly['n'] ) * 100
    df_yearly['countryname'] = country
    return df_yearly[['n', 'economy', 'CAP_per_economy', 'countryname']]

In [30]:
france_yr = reshape_annual_level(france, 'original_majortopic', 'France')
denmark_yr = reshape_annual_level(denmark, 'majortopic', 'Denmark')

In [32]:
cap = pd.concat([france_yr, denmark_yr])
cap.head()

Unnamed: 0_level_0,n,economy,CAP_per_economy,countryname
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1974.0,76,8,10.526316,France
1975.0,107,11,10.280374,France
1976.0,105,8,7.619048,France
1977.0,94,12,12.765957,France
1978.0,54,10,18.518519,France


In [33]:
cap.groupby('countryname').count()

Unnamed: 0_level_0,n,economy,CAP_per_economy
countryname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Denmark,64,64,64
France,40,40,40


    1.1.1.3	Prepare the CMP data for joining

For example, one option is to assume that parties are tied to their expressed policy preferences from the previous election until they produce a new manifesto

In [7]:
VERSION = "MPDS2020b"
a = Manifesto_data(VERSION, manifesto_key)
meta_data = a.get_meta_data()
meta_data.head()

INFO:root:Available version numbers: ['MPDS2012a', 'MPDS2012b', 'MPDS2013a', 'MPDS2013b', 'MPDS2014a', 'MPDS2014b', 'MPDS2015a', 'MPDS2016a', 'MPDS2016b', 'MPDS2017a', 'MPDS2017b', 'MPDS2018a', 'MPDS2018b', 'MPDS2019a', 'MPDS2019b', 'MPDS2020a', 'MPDS2020b']
INFO:root:The most recent version number is: MPDS2020b.

INFO:root:The most recent available version number (MPDS2020b) is the same as the version you've initialized (MPDS2020b)
INFO:root:Available countries:

['Sweden', 'Norway', 'Denmark', 'Finland', 'Iceland', 'Belgium', 'Netherlands', 'Luxembourg', 'France', 'Italy', 'Spain', 'Greece', 'Portugal', 'Germany', 'Austria', 'Switzerland', 'United Kingdom', 'Northern Ireland', 'Ireland', 'Malta', 'Cyprus', 'United States', 'Canada', 'Australia', 'New Zealand', 'Japan', 'Israel', 'Sri Lanka', 'Turkey', 'Albania', 'Armenia', 'Azerbaijan', 'Belarus', 'Bosnia-Herzegovina', 'Bulgaria', 'Croatia', 'Czech Republic', 'Estonia', 'Georgia', 'German Democratic Republic', 'Hungary', 'Latvia', 'L

Unnamed: 0,country,countryname,oecdmember,eumember,edate,date,party,partyname,partyabbrev,parfam,...,per608_3,per703_1,per703_2,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm
0,11,Sweden,0,0,17/09/1944,194409,11220,Communist Party of Sweden,SKP,20,...,,,,9.6,1.9,1.9,0.0,1.9,2020b,JN1LZH
1,11,Sweden,0,0,17/09/1944,194409,11320,Social Democratic Labour Party,SAP,30,...,,,,-37.8,3.3,2.2,33.4,5.600000000000001,2020b,CMR7F6
2,11,Sweden,0,0,17/09/1944,194409,11420,People’s Party,FP,40,...,,,,9.5,3.2,6.4,14.3,1.6,2020b,Z6OL6C
3,11,Sweden,0,0,17/09/1944,194409,11620,Right Party,,60,...,,,,28.0,1.8,22.8,10.6,0.0,2020b,YMKVN2
4,11,Sweden,0,0,17/09/1944,194409,11810,Agrarian Party,,80,...,,,,23.81,0.0,19.048,0.0,4.762,2020b,U4SCRD


In [10]:
class Expand_cmp_data():
    
    def __init__(self, dataset, startdate):

        dataset['year'] = dataset['edate'].apply(lambda x: self.myparser(x).year)
        self.data = dataset
        self.startdate = startdate
        self.imputed_data = self.expanding_data()

    def myparser(self, x):
        try:
            return dparser.parse(x,fuzzy=True)
        except:
            return None

    def expanding_data(self):

        years_france = sorted(set( [y for y in set(self.data['year'])]  ))
        all_years = list(range(self.startdate, 2020, 1))

        results = []

        for conseq_year in all_years:

            logging.info('-------')
            logging.info(f'{conseq_year}')
            logging.info('-------')

            if conseq_year in years_france:

                logging.info(f"{conseq_year}: Election year. Therefore, we can simple use the cmp data of {conseq_year}")

                df = self.data [ self.data['year'] == conseq_year ]
                df['imputed_year'] = conseq_year
                df['election_year'] = 1

                results.append( df )

            elif conseq_year not in years_france: 

                previous_election = [e for e in years_france if e < conseq_year][-1]

                logging.info(f"{conseq_year} was not an election year. There, we impute the data of the year {previous_election}")

                df = self.data [ self.data['year'] == previous_election ]
                df['imputed_year'] = conseq_year
                df['election_year'] = 0

                results.append( df )

        d = pd.concat(results)
        #d.set_index('year', inplace=True)
        
        return d

In [18]:
a = Expand_cmp_data(meta_data[ meta_data['countryname'] == 'France' ], 1951)
france_cmp = a.imputed_data
france_cmp.head()

INFO:root:-------
INFO:root:1951
INFO:root:-------
INFO:root:1951: Election year. Therefore, we can simple use the cmp data of 1951
INFO:root:-------
INFO:root:1952
INFO:root:-------
INFO:root:1952 was not an election year. There, we impute the data of the year 1951
INFO:root:-------
INFO:root:1953
INFO:root:-------
INFO:root:1953 was not an election year. There, we impute the data of the year 1951
INFO:root:-------
INFO:root:1954
INFO:root:-------
INFO:root:1954 was not an election year. There, we impute the data of the year 1951
INFO:root:-------
INFO:root:1955
INFO:root:-------
INFO:root:1955 was not an election year. There, we impute the data of the year 1951
INFO:root:-------
INFO:root:1956
INFO:root:-------
INFO:root:1956: Election year. Therefore, we can simple use the cmp data of 1956
INFO:root:-------
INFO:root:1957
INFO:root:-------
INFO:root:1957 was not an election year. There, we impute the data of the year 1956
INFO:root:-------
INFO:root:1958
INFO:root:-------
INFO:root:

INFO:root:2012
INFO:root:-------
INFO:root:2012: Election year. Therefore, we can simple use the cmp data of 2012
INFO:root:-------
INFO:root:2013
INFO:root:-------
INFO:root:2013 was not an election year. There, we impute the data of the year 2012
INFO:root:-------
INFO:root:2014
INFO:root:-------
INFO:root:2014 was not an election year. There, we impute the data of the year 2012
INFO:root:-------
INFO:root:2015
INFO:root:-------
INFO:root:2015 was not an election year. There, we impute the data of the year 2012
INFO:root:-------
INFO:root:2016
INFO:root:-------
INFO:root:2016 was not an election year. There, we impute the data of the year 2012
INFO:root:-------
INFO:root:2017
INFO:root:-------
INFO:root:2017: Election year. Therefore, we can simple use the cmp data of 2017
INFO:root:-------
INFO:root:2018
INFO:root:-------
INFO:root:2018 was not an election year. There, we impute the data of the year 2017
INFO:root:-------
INFO:root:2019
INFO:root:-------
INFO:root:2019 was not an el

Unnamed: 0,country,countryname,oecdmember,eumember,edate,date,party,partyname,partyabbrev,parfam,...,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm,year,imputed_year,election_year
1234,31,France,0,0,17/06/1951,195106,31220,French Communist Party,PCF,20,...,0.325,1.623,0.0,10.39,14.935,2020b,2XOHOR,1951,1951,1
1235,31,France,0,0,17/06/1951,195106,31320,French Section of the Workers' International,SIFO,30,...,-15.607,2.89,0.0,7.514,7.514,2020b,L17ORA,1951,1951,1
1236,31,France,0,0,17/06/1951,195106,31421,Radical Socialist Party,RRRS,40,...,15.383,2.403,5.611,6.271,3.337,2020b,H1SYMR,1951,1951,1
1237,31,France,0,0,17/06/1951,195106,31521,Popular Republican Movement,MRP,50,...,25.928,2.397,2.397,4.464,1.712,2020b,6QTLOZ,1951,1951,1
1238,31,France,0,0,17/06/1951,195106,31621,Rally for the French People - Gaullists,RPF,60,...,25.0,1.724,1.724,0.862,1.724,2020b,WJC4RE,1951,1951,1


In [19]:
a = Expand_cmp_data(meta_data[ meta_data['countryname'] == 'Denmark' ], 1950)
denmark_cmp = a.imputed_data
denmark_cmp.head()

INFO:root:-------
INFO:root:1950
INFO:root:-------
INFO:root:1950: Election year. Therefore, we can simple use the cmp data of 1950
INFO:root:-------
INFO:root:1951
INFO:root:-------
INFO:root:1951 was not an election year. There, we impute the data of the year 1950
INFO:root:-------
INFO:root:1952
INFO:root:-------
INFO:root:1952 was not an election year. There, we impute the data of the year 1950
INFO:root:-------
INFO:root:1953
INFO:root:-------
INFO:root:1953: Election year. Therefore, we can simple use the cmp data of 1953
INFO:root:-------
INFO:root:1954
INFO:root:-------
INFO:root:1954 was not an election year. There, we impute the data of the year 1953
INFO:root:-------
INFO:root:1955
INFO:root:-------
INFO:root:1955 was not an election year. There, we impute the data of the year 1953
INFO:root:-------
INFO:root:1956
INFO:root:-------
INFO:root:1956 was not an election year. There, we impute the data of the year 1953
INFO:root:-------
INFO:root:1957
INFO:root:-------
INFO:root:

INFO:root:-------
INFO:root:2011: Election year. Therefore, we can simple use the cmp data of 2011
INFO:root:-------
INFO:root:2012
INFO:root:-------
INFO:root:2012 was not an election year. There, we impute the data of the year 2011
INFO:root:-------
INFO:root:2013
INFO:root:-------
INFO:root:2013 was not an election year. There, we impute the data of the year 2011
INFO:root:-------
INFO:root:2014
INFO:root:-------
INFO:root:2014 was not an election year. There, we impute the data of the year 2011
INFO:root:-------
INFO:root:2015
INFO:root:-------
INFO:root:2015: Election year. Therefore, we can simple use the cmp data of 2015
INFO:root:-------
INFO:root:2016
INFO:root:-------
INFO:root:2016 was not an election year. There, we impute the data of the year 2015
INFO:root:-------
INFO:root:2017
INFO:root:-------
INFO:root:2017 was not an election year. There, we impute the data of the year 2015
INFO:root:-------
INFO:root:2018
INFO:root:-------
INFO:root:2018 was not an election year. Th

Unnamed: 0,country,countryname,oecdmember,eumember,edate,date,party,partyname,partyabbrev,parfam,...,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm,year,imputed_year,election_year
280,13,Denmark,0,0,05/09/1950,195009,13220,Danish Communist Party,DKP,20,...,-26.9,1.8,3.6,12.5,25.1,2020b,4XH2IG,1950,1950,1
281,13,Denmark,0,0,05/09/1950,195009,13320,Social Democratic Party,SD,30,...,-9.2,4.600000000000001,1.5,13.8,4.600000000000001,2020b,E232ME,1950,1950,1
282,13,Denmark,0,0,05/09/1950,195009,13410,Danish Social-Liberal Party,RV,40,...,0.0,9.6,4.8,0.0,4.8,2020b,MJZGXQ,1950,1950,1
283,13,Denmark,0,0,05/09/1950,195009,13420,Liberals,V,40,...,20.0,0.0,17.5,2.5,7.5,2020b,1W5317,1950,1950,1
284,13,Denmark,0,0,05/09/1950,195009,13620,Conservative People’s Party,KF,60,...,38.2,0.0,14.3,4.8,0.0,2020b,WI1QFD,1950,1950,1


In [22]:
cmp = pd.concat([denmark_cmp, france_cmp])
cmp.head()

Unnamed: 0,country,countryname,oecdmember,eumember,edate,date,party,partyname,partyabbrev,parfam,...,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm,year,imputed_year,election_year
280,13,Denmark,0,0,05/09/1950,195009,13220,Danish Communist Party,DKP,20,...,-26.9,1.8,3.6,12.5,25.1,2020b,4XH2IG,1950,1950,1
281,13,Denmark,0,0,05/09/1950,195009,13320,Social Democratic Party,SD,30,...,-9.2,4.600000000000001,1.5,13.8,4.600000000000001,2020b,E232ME,1950,1950,1
282,13,Denmark,0,0,05/09/1950,195009,13410,Danish Social-Liberal Party,RV,40,...,0.0,9.6,4.8,0.0,4.8,2020b,MJZGXQ,1950,1950,1
283,13,Denmark,0,0,05/09/1950,195009,13420,Liberals,V,40,...,20.0,0.0,17.5,2.5,7.5,2020b,1W5317,1950,1950,1
284,13,Denmark,0,0,05/09/1950,195009,13620,Conservative People’s Party,KF,60,...,38.2,0.0,14.3,4.8,0.0,2020b,WI1QFD,1950,1950,1


    1.1.1.4	Final merge

In [24]:
result = pd.merge(cap, cmp, on=['year', 'countryname'])

In [29]:
pd.set_option('display.max_rows', None)
result.head(100)

Unnamed: 0,year,n,economy,CAP_per_economy,countryname,country,oecdmember,eumember,edate,date,...,per703_2,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm,imputed_year,election_year
0,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,-39.9,16.5,0.0,13.4,3.9,2020b,OPHBEQ,1978,1
1,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,-39.2,13.6,2.0,11.6,5.600000000000001,2020b,COQLPG,1978,1
2,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,19.1,5.4,6.8,9.6,2.7,2020b,ACCFCA,1978,1
3,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,11.4,2.6,9.6,7.100000000000001,2.6,2020b,S1OJPB,1978,1
4,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,10.909,0.0,21.818,3.636,0.0,2020b,2NQ9TT,1978,1
5,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,-39.9,16.5,0.0,13.4,3.9,2020b,OPHBEQ,1979,0
6,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,-39.2,13.6,2.0,11.6,5.600000000000001,2020b,COQLPG,1979,0
7,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,19.1,5.4,6.8,9.6,2.7,2020b,ACCFCA,1979,0
8,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,11.4,2.6,9.6,7.100000000000001,2.6,2020b,S1OJPB,1979,0
9,1978.0,54,10,18.518519,France,31,10,10,12/03/1978,197803,...,,10.909,0.0,21.818,3.636,0.0,2020b,2NQ9TT,1979,0
