# Pandas introduction

In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
a = ['alfio', 'ferrara', 28]
b = {'nome': 'alfio', 'cognome': 'ferrara', 'eta': 28}
c = {'a': 24, 'b': 28, 'c': 42}
values, idx = [0, 1, 2, 3, 4], ['_0', 'c1', 'a2', 's3', 'a4']
values2, idx2 = [4, 5, 1, 6, 4], ['_0', 'c1', 'a2', 's3', 'a4']

In [3]:
s = pd.Series(data=values, index=idx)
s2 = pd.Series(data=values2, index=idx2)

In [4]:
D = {
    's1': {'a': 3, 'b': 4, 'c': 1},
    's2': {'a': 2, 'c': 1, 'd': 6}
}

X = [
    {'a': 6, 'c': 't'},
    {'a': 8, 'd': 2}
]

In [5]:
k = 'abcdef'
df = pd.DataFrame(np.zeros((12, 6)), 
                  index=range(0, 12*4, 4),
                  columns=[x for x in k]
                 )

In [6]:
df['y'] = range(12)

In [7]:
df.columns[2:]

Index(['c', 'd', 'e', 'f', 'y'], dtype='object')

In [8]:
df.iloc[:3][df.columns[2:]]

Unnamed: 0,c,d,e,f,y
0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,0.0,1
8,0.0,0.0,0.0,0.0,2


## Get data from files

In [10]:
f = 'data/us_elections.csv'

In [11]:
df = pd.read_csv(f)

In [12]:
df.head()

Unnamed: 0,State,Source,Official/Unofficial,Total Ballots Counted (Estimate),Vote for Highest Office (President),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,Overseas Eligible,State Abv
0,United States,,,158835004,,66.4%,239247182,257605088,7.8%,1461074,1962811,616440,3294457,4971025.0,
1,Alabama,https://www2.alabamavotes.gov/electionnight/st...,Unofficial,2306587,2297295.0,62.6%,3683055,3837540,2.3%,25898,50997,10266,67782,,AL
2,Alaska,https://www.elections.alaska.gov/results/20GEN...,,367000,,69.8%,525568,551117,3.4%,4293,2074,1348,6927,,AK
3,Arizona,https://results.arizona.vote/#/featured/18/0,,3400000,,65.5%,5189000,5798473,8.9%,38520,76844,7536,93699,,AZ
4,Arkansas,https://results.enr.clarityelections.com/AR/10...,Unofficial,1212030,1206697.0,55.5%,2182375,2331171,3.6%,17510,36719,24698,64974,,AR


In [13]:
df.dtypes

State                                  object
Source                                 object
Official/Unofficial                    object
Total Ballots Counted (Estimate)       object
Vote for Highest Office (President)    object
VEP Turnout Rate                       object
Voting-Eligible Population (VEP)       object
Voting-Age Population (VAP)            object
% Non-citizen                          object
Prison                                 object
Probation                              object
Parole                                 object
Total Ineligible Felon                 object
Overseas Eligible                      object
State Abv                              object
dtype: object

In [20]:
def thousands(x):
    if pd.isnull(x):
        return x
    else:
        try:
            return float(x.replace(',', ''))
        except ValueError:
            return np.nan

In [21]:
converters = {
    3: thousands, 6: thousands, 7: thousands, 9: thousands, 10: thousands, 11: thousands,
    12: thousands, 13: thousands
}

In [22]:
df2 = pd.read_csv(f, converters=converters)

In [23]:
df2.head()

Unnamed: 0,State,Source,Official/Unofficial,Total Ballots Counted (Estimate),Vote for Highest Office (President),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,Overseas Eligible,State Abv
0,United States,,,158835004.0,,66.4%,239247182.0,257605088.0,7.8%,1461074.0,1962811.0,616440.0,3294457.0,4971025.0,
1,Alabama,https://www2.alabamavotes.gov/electionnight/st...,Unofficial,2306587.0,2297295.0,62.6%,3683055.0,3837540.0,2.3%,25898.0,50997.0,10266.0,67782.0,,AL
2,Alaska,https://www.elections.alaska.gov/results/20GEN...,,367000.0,,69.8%,525568.0,551117.0,3.4%,4293.0,2074.0,1348.0,6927.0,,AK
3,Arizona,https://results.arizona.vote/#/featured/18/0,,3400000.0,,65.5%,5189000.0,5798473.0,8.9%,38520.0,76844.0,7536.0,93699.0,,AZ
4,Arkansas,https://results.enr.clarityelections.com/AR/10...,Unofficial,1212030.0,1206697.0,55.5%,2182375.0,2331171.0,3.6%,17510.0,36719.0,24698.0,64974.0,,AR


In [25]:
df2.dtypes

State                                   object
Source                                  object
Official/Unofficial                     object
Total Ballots Counted (Estimate)       float64
Vote for Highest Office (President)     object
VEP Turnout Rate                        object
Voting-Eligible Population (VEP)       float64
Voting-Age Population (VAP)            float64
% Non-citizen                           object
Prison                                 float64
Probation                              float64
Parole                                 float64
Total Ineligible Felon                 float64
Overseas Eligible                      float64
State Abv                               object
dtype: object