# Libraries

In [197]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
from tqdm import tqdm


# The data

In [109]:
data_folder = "data"
data_files = os.listdir(data_folder)

data_df = pd.DataFrame()
for data_file in data_files:
    date_hour = data_file.split("_")
    date = date_hour[1]
    hour = date_hour[2].split("-")[0]
    _df = pd.read_csv(os.path.join(data_folder, data_file))
    _df['date'] = date
    _df['hour'] = hour
    data_df = data_df.append(_df)
data_df.head(3)

Unnamed: 0,Judet,UAT,Localitate,Siruta,Nr sectie de votare,Nume sectie de votare,Mediu,Votanti pe lista permanenta,Votanti pe lista complementara,LP,...,Femei 113,Femei 114,Femei 115,Femei 116,Femei 117,Femei 118,Femei 119,Femei 120,date,hour
0,AB,MUNICIPIUL ALBA IULIA,ALBA IULIA,1017,1,CENTRUL DE ZI PENTRU PERSOANE VÂRSTNICE,U,1642,11,0,...,0,0,0,0,0,0,0,0,2020-09-27,7
1,AB,MUNICIPIUL ALBA IULIA,ALBA IULIA,1017,2,ŞCOALA GIMNAZIALĂ „VASILE GOLDIŞ”,U,1023,7,0,...,0,0,0,0,0,0,0,0,2020-09-27,7
2,AB,MUNICIPIUL ALBA IULIA,ALBA IULIA,1017,3,ŞCOALA GIMNAZIALĂ „VASILE GOLDIŞ”,U,1368,9,0,...,0,0,0,0,0,0,0,0,2020-09-27,7


# Function that creates information about the number of votes by sex and age

In [115]:
def county_sex_age(data, county, sex):
    
    if sex == "men":
        Men18_24 = data[(data["Judet"]== county) & (data["hour"]=="21")]["Barbati 18-24"].sum()

        Men25_34 = data[(data["Judet"]== county) & (data["hour"]=="21")]["Barbati 25-34"].sum()

        Men35_44 =  data[(data["Judet"]== county) & (data["hour"]=="21")]["Barbati 35-44"].sum()

        Men45_64 =  data[(data["Judet"]== county) & (data["hour"]=="21")]["Barbati 45-64"].sum()

        Men65 =  data[(data["Judet"]== county) & (data["hour"]=="21")]["Barbati 65+"].sum()


        men = pd.DataFrame([Men18_24,Men25_34, Men35_44, Men45_64, Men65 ], index= data.columns[14:19])
        men.columns = ["Value"]
        return(men)
       
    if sex == "women":
        Women18_24 = data[(data["Judet"]== county) & (data["hour"]=="21")]["Femei 18-24"].sum()

        Women25_34 = data[(data["Judet"]== county) & (data["hour"]=="21")]["Femei 25-34"].sum()

        Women35_44 =  data[(data["Judet"]== county) & (data["hour"]=="21")]["Femei 35-44"].sum()

        Women45_64 =  data[(data["Judet"]== county) & (data["hour"]=="21")]["Femei 45-64"].sum()

        Women65 =  data[(data["Judet"]== county) & (data["hour"]=="21")]["Femei 65+"].sum()


        Women = pd.DataFrame([Women18_24, Women25_34, Women35_44, Women45_64, Women65 ], index= data.columns[19:24])
        Women.columns = ["Value"]
        return(Women)

# Example of this function

In [132]:
County = "B"
Sex ="men"
county_sex_age(data_df, County , Sex ).sum() + county_sex_age(data_df, County , "women" ).sum()

Value    671743
dtype: int64

# Function that creates pie charts by sex, age and county

In [117]:
def county_sex_age_pie(county, sex):
    df = county_sex_age(data_df, County , Sex )

    fig = px.pie(df, values='Value', names=df.index, color_discrete_sequence=px.colors.sequential.RdBu, title= "County of " +county)
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()
    

# Example of this function

In [129]:
County = "B"
Sex ="men"
county_sex_age_pie(County, Sex)

# We created a function that measures the percentage of votes counted and the total maximum on the permanent and supplementary lists

In [213]:
def percrnt_part(County, UAT):

    total_Votes = data_df[(data_df["Judet"]== County) & (data_df["UAT"]== UAT) & (data_df["hour"]=="21")]["LT"].sum()

    total_VPLP = data_df[(data_df["Judet"]== County) & (data_df["UAT"]== UAT) & (data_df["hour"]=="21")]["Votanti pe lista permanenta"].sum()
    total_VPLC = data_df[(data_df["Judet"]== County) & (data_df["UAT"]== UAT) & (data_df["hour"]=="21")]["Votanti pe lista complementara"].sum()
    total = total_VPLP + total_VPLC
    return(total_Votes/total)


percrnt_part("AB", "MUNICIPIUL ALBA IULIA")


0.3597375092256843

# We calculate this percentage for the following locations

In [220]:
Per_part = pd.DataFrame(data_df[["Judet", "UAT"]].drop_duplicates().apply(lambda x: percrnt_part(x['Judet'], x['UAT']), axis=1))
Per_part.columns = ["per"]
Per_part = Per_part.reset_index(drop = True)

In [221]:
fig = px.box(Per_part, y="per", points="all")
fig.show()

##  <span style="color:red">In these places there is a participation percentage greater than 1, which does not make sense, this could be irregularities or outliers.</span>

In [235]:
X = data_df[["Judet", "UAT"]].drop_duplicates().reset_index(drop= True)
X = pd.concat([X,Per_part], axis = 1)
X[X["per"] > 1]

Unnamed: 0,Judet,UAT,per
23,AB,CERU-BĂCĂINŢI,1.139535
893,CL,GURBĂNEŞTI,1.025028
999,CS,ZORLENŢU MARE,1.193814
1219,DJ,BOTOŞEŞTI-PAIA,1.094545
1261,DJ,GOGOŞU,1.089912
1457,GR,BULBUCATA,1.068015
1524,HD,BĂTRÂNA,1.530612
1532,HD,BULZEŞTII DE SUS,1.441964
1707,IL,DRĂGOEŞTI,1.004032
1850,MH,BALTA,1.323424
