In [2]:
import os
import pandas as pd

## Loading the data

In [16]:
# file available upon request
representatives = pd.read_csv('../../../data/00_raw_data/twitter_representatives_handles_final.csv')

In [12]:
basepath = "../../../data/05_swing_voters"

# files available upon request
swing_voters_bd = pd.read_csv(os.path.join(basepath, 'swing_voters_bd.csv')) # before -> during
swing_voters_da = pd.read_csv(os.path.join(basepath, 'swing_voters_da.csv')) # during -> after
swing_voters_ba = pd.read_csv(os.path.join(basepath, 'swing_voters_ba.csv')) # before -> after

# print total
print(f"Number of BD swing voters: {len(swing_voters_bd)}")
print(f"Number of DA swing voters: {len(swing_voters_da)}")
print(f"Number of BA swing voters: {len(swing_voters_ba)}")

Number of BD swing voters: 5324
Number of DA swing voters: 8303
Number of BA swing voters: 3699


In [13]:
swing_voters_bd['swing_vote_time'] = 'BD'
swing_voters_da['swing_vote_time'] = 'DA'
swing_voters_ba['swing_vote_time'] = 'BA'

In [14]:
swing_voters = pd.concat([swing_voters_bd, swing_voters_da, swing_voters_ba])

## Classify swing voters

Adjust non-political communities to "noparty"

In [17]:
# find where party1 is nan
swing_voters["party1"] = swing_voters["party1"].fillna("noparty").astype(str)
swing_voters["party2"] = swing_voters["party2"].fillna("noparty").astype(str)

Get unique parties

In [18]:
all_parties = representatives["Party"].unique()
all_parties

array(['Alleanza Verdi Sinistra', 'Azione - Italia Viva', 'Forza Italia',
       "Fratelli d'Italia", 'Lega', 'Movimento 5s', 'Noi moderati',
       'Partito Democratico'], dtype=object)

Specificy party abbrevations and coalition groups

In [19]:
all_parties_abbr = ["AVS", "Az-Iv", "FI", 
                    "FdI", "L", "M5s", "NM",
                    "PD"]

In [20]:
right_wing_coalition = ["FI", "FdI", "L", "NM"]
left_wing_coalition = ["AVS", "PD"]

Populate swing type lists

In [23]:
no_real_swing_vote = []
nan_to_party = []
party_to_nan = []
soft_swing_voters = []
hard_swing_voters = []

swing_vote_type = []

for row, data in swing_voters.iterrows():

    if data["party1"] == "noparty":
        nan_to_party.append(["noparty", data["party2"]])
        swing_vote_type.append("nan_to_party")
    elif data["party2"]=="noparty":
        party_to_nan.append([data["party1"], "noparty"])
        swing_vote_type.append("party_to_nan")
    else:
        list_coalition_1 = data["party1"].split(";")
        list_coalition_2 = data["party2"].split(";")
        if data["party1"] in list_coalition_2 or data["party2"] in list_coalition_1 or set(list_coalition_1).intersection(list_coalition_2):
            no_real_swing_vote.append([data["party1"], data["party2"]])
            swing_vote_type.append("no_real_swing_vote") # spurious swing vote
        else:
            # soft swing considering list_coalition_1, list_coalition_2 
            # the user does a politically coherent transition
            if set(list_coalition_1).intersection(right_wing_coalition) and set(list_coalition_2).intersection(right_wing_coalition):
                soft_swing_voters.append([data["party1"], data["party2"]])
                swing_vote_type.append("softswing")
            elif set(list_coalition_1).intersection(left_wing_coalition) and set(list_coalition_2).intersection(left_wing_coalition):
                soft_swing_voters.append([data["party1"], data["party2"]])
                swing_vote_type.append("softswing")
            # hard swing
            else:
                hard_swing_voters.append([data["party1"], data["party2"]])
                swing_vote_type.append("hardswing")

In [24]:
swing_voters["swing_vote_type"] = swing_vote_type

# save to csv, file available upon request
swing_voters.to_csv(os.path.join(basepath, 'swingers_full.csv'), index=False)

In [35]:
swing_voters.groupby("swing_vote_time")["swing_vote_type"].value_counts()

swing_vote_time  swing_vote_type   
BA               no_real_swing_vote    1957
                 nan_to_party          1170
                 hardswing              510
                 party_to_nan            50
                 softswing               12
BD               no_real_swing_vote    4151
                 hardswing              576
                 party_to_nan           382
                 nan_to_party           210
                 softswing                5
DA               no_real_swing_vote    5524
                 nan_to_party          2110
                 hardswing              622
                 party_to_nan            47
Name: swing_vote_type, dtype: int64

In [37]:
# when looking at the total numbers, we should not consider the period BA
swing_voters[swing_voters["swing_vote_time"]!="BA"]["swing_vote_type"].value_counts()

no_real_swing_vote    9675
nan_to_party          2320
hardswing             1198
party_to_nan           429
softswing                5
Name: swing_vote_type, dtype: int64

# Examining the most common party/coalition user migrations

##### Before electoral campaign -> during campaign

In [47]:
# select all hard swing voters whose change happened from before to during the electoral campaign
swing_voters[
    (swing_voters["swing_vote_type"]=="hardswing")
    & (swing_voters["swing_vote_time"]=="BD")
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,FdI,PD,108
1,L;Az-Iv,AVS;PD,100
2,AVS;PD,Az-Iv,76
3,AVS;PD,M5s;FdI,64
4,M5s,AVS;PD,61
5,FdI,AVS;PD,39
6,M5s,PD,34
7,FdI,Az-Iv,32
8,L;FdI,Az-Iv,20
9,L;FdI,PD,15


In [48]:
# select all swing voters moving from a political to a non-political community,
# whose change happened from before to during the electoral campaign
swing_voters[
    (swing_voters["swing_vote_type"]=="party_to_nan")
    & (swing_voters["swing_vote_time"]=="BD")
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,L;FdI,noparty,191
1,M5s,noparty,81
2,AVS;PD,noparty,35
3,L;Az-Iv,noparty,35
4,FdI,noparty,34
5,FI,noparty,6


In [49]:
# select all swing voters moving from a non-political to a political community,
# whose change happened from before to during the electoral campaign
swing_voters[
    (swing_voters["swing_vote_type"]=="nan_to_party")
    & (swing_voters["swing_vote_time"]=="BD")
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,noparty,FI;L;PD;FdI;NM,161
1,noparty,M5s;FdI,26
2,noparty,AVS;PD,12
3,noparty,PD,9
4,noparty,Az-Iv,2


In [None]:
# select all swing voters moving from a non-political to a political community,
# whose change happened from before to during the electoral campaign
swing_voters[
    (swing_voters["swing_vote_type"]=="nan_to_party")
    & (swing_voters["swing_vote_time"]=="BD")
][['party1', 'party2']].value_counts().reset_index()

##### During electoral campaign -> After elections

In [50]:
# select all swing voters moving from a non-political to a political community,
# whose change happened from during the electoral campaign to after the elections
swing_voters[
    (swing_voters["swing_vote_type"]=="nan_to_party")
    & (swing_voters["swing_vote_time"]=="DA")
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,noparty,L;FdI,1757
1,noparty,Az-Iv;FdI,110
2,noparty,M5s,90
3,noparty,AVS;PD,87
4,noparty,FI;FdI,46
5,noparty,PD;Az-Iv,19
6,noparty,NM;FdI,1


In [51]:
# select all hard swing voters,
# whose change happened from during the electoral campaign to after the elections
swing_voters[
    (swing_voters["swing_vote_type"]=="hardswing")
    & (swing_voters["swing_vote_time"]=="DA")
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,PD,Az-Iv;FdI,200
1,Az-Iv,AVS;PD,128
2,AVS;PD,Az-Iv;FdI,94
3,M5s;FdI,AVS;PD,88
4,AVS;PD,M5s,50
5,Az-Iv,FI;FdI,10
6,Az-Iv,L;FdI,9
7,PD,M5s,9
8,PD,L;FdI,8
9,PD,FI;FdI,7


In [52]:
# select all swing voters moving from a political to a non-political community,
# whose change happened from during the electoral campaign to after the elections
swing_voters[
    (swing_voters["swing_vote_type"]=="party_to_nan")
    & (swing_voters["swing_vote_time"]=="DA")
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,AVS;PD,noparty,22
1,M5s;FdI,noparty,8
2,PD,noparty,8
3,FI;L;PD;FdI;NM,noparty,6
4,Az-Iv,noparty,3


#### Party/coalition migrations (Before, During) & (During, After)

In [56]:
# select all hard swing voters
swing_voters[
    (swing_voters["swing_vote_type"]=="hardswing")
    & ((swing_voters["swing_vote_time"]=="BD") | (swing_voters["swing_vote_time"]=="DA"))
][['party1', 'party2']].value_counts().reset_index()

Unnamed: 0,party1,party2,0
0,PD,Az-Iv;FdI,200
1,Az-Iv,AVS;PD,128
2,FdI,PD,108
3,L;Az-Iv,AVS;PD,100
4,AVS;PD,Az-Iv;FdI,94
5,M5s;FdI,AVS;PD,88
6,AVS;PD,Az-Iv,76
7,AVS;PD,M5s;FdI,64
8,M5s,AVS;PD,61
9,AVS;PD,M5s,50


## Store swing voter categories to csv

In [26]:
no_real_swing_vote = pd.DataFrame(no_real_swing_vote, columns=["party1", "party2"])
# remove duplicates
no_real_swing_vote = no_real_swing_vote.drop_duplicates()
no_real_swing_vote.to_csv(os.path.join(basepath, 'categories', 'no_real_swing_vote.csv'), index=False)

In [27]:
nan_to_party = pd.DataFrame(nan_to_party, columns=["party1", "party2"])
# remove duplicates
nan_to_party = nan_to_party.drop_duplicates()
nan_to_party.to_csv(os.path.join(basepath, 'categories', 'nan_to_party.csv'), index=False)

In [28]:
party_to_nan = pd.DataFrame(party_to_nan, columns=["party1", "party2"])
# remove duplicates
party_to_nan = party_to_nan.drop_duplicates()
party_to_nan.to_csv(os.path.join(basepath, 'categories', 'party_to_nan.csv'), index=False)

In [29]:
soft_swing_voters = pd.DataFrame(soft_swing_voters, columns=["party1", "party2"])
# remove duplicates
soft_swing_voters = soft_swing_voters.drop_duplicates()
soft_swing_voters.to_csv(os.path.join(basepath, 'categories', 'soft_swing_voters.csv'), index=False)

In [31]:
hard_swing_voters = pd.DataFrame(hard_swing_voters, columns=["party1", "party2"])
# remove duplicates
hard_swing_voters = hard_swing_voters.drop_duplicates()
hard_swing_voters.to_csv(os.path.join(basepath, 'categories', 'hard_swing_voters.csv'), index=False)