In [75]:
class Person:
    def __init__(self, line):
        gender, race, education, income = line.split(',')
        self.gender = int(gender)
        self.race = int(race)
        self.education = int(education)
        self.income = int(income)
    
    def __repr__(self):
        return "Person(%s, %s, %s, %s)" % (self.gender, self.race,
                                           self.education, self.income)
    
class Group:
    def __init__(self):
        self.people = []
        
    def add_person(self, p):
        self.people.append(p)
        
    def mean_income(self):
        total = sum(p.income for p in self.people)
        return total / self.size()

    def mean_education(self):
        total = 0
        for p in self.people:
            total += p.education 
        return total / self.size()
    
    def size(self):
        return len(self.people)

    def count_gender(self):
        """
        Retorna uma dupla (a, b) com o numero de pessoas
        com o genero 1 vs genero 2.
        """
        a, b = 0, 0
        for p in self.people:
            if p.gender == 1:
                a += 1
            elif p.gender == 2:
                b += 1
        return (a, b)

    def count_race(self):
        """
        Retorna uma dupla (a, b, c, d, e) com o numero de pessoas
        com com as racas de 1 a 5.
        """
        count = [0, 0, 0, 0, 0]
        for p in self.people:
            count[p.race - 1] += 1
        return count

    def filter_gender(self, gender):
        """
        Retorna um novo grupo com todas pessoas do genero "id"
        """
        g = Group()
        for p in self.people:
            if p.gender == gender:
                g.add_person(p)
        return g

    def filter_race(self, race):
        """
        Retorna um novo grupo com todas pessoas da raca "id"
        """
        g = Group()
        for p in self.people:
            if p.race == race:
                g.add_person(p)
        return g

    def filter_active(self):
        """
        Retorna um novo grupo com todas pessoas com renda nao-nula
        """
        g = Group()
        for p in self.people:
            if p.income != 0:
                g.add_person(p)
        return g
        
    def median_income(self):
        """
        Retorna a mediana da distribuiçao de renda
        """
        ...

    # quem ganha mais?
    # quem ganha mais (por categoria)
    # renda total dos 10% mais ricos vs 90% 
    # descobrir o seu percentil

In [32]:
arquivo = open('pnad.dat')
lista_pessoas = arquivo.readlines()

g = Group()
for line in lista_pessoas:
    p = Person(line)
    g.add_person(p)

In [54]:
print('HOMENS')
for i in range(1, 6):
    print(g.filter_active()
              .filter_race(i)
              .filter_gender(1)
              .mean_income())

print('MULHERES')
for i in range(1, 6):
    print(g.filter_active()
              .filter_race(i)
              .filter_gender(2)
              .mean_income())

HOMENS
3333.6025917926568
1243.5945658479984
1273.557589058275
1260.5492227979275
2199.7619487804363
MULHERES
1967.6754563894524
852.6687953241693
867.3131907993002
905.8125
1472.7037005310947


In [57]:
print('MULHERES')
for i in range(1, 6):
    print(g.filter_active()
              .filter_race(i)
              .filter_gender(2)
              .mean_education())
    
print('HOMENS')
for i in range(1, 6):
    print(g.filter_active()
              .filter_race(i)
              .filter_gender(1)
              .mean_education())

MULHERES
9.782961460446247
7.248619980517372
7.31899458029275
6.707386363636363
8.881360287819085
HOMENS
10.136069114470843
7.014117192032489
6.8995375631931655
6.494818652849741
8.556692367486711


In [50]:
[n / g.size() for n in g.count_race()]

[0.0042764401256997495,
 0.08310088812004933,
 0.4793282402310933,
 0.003959155858309123,
 0.42933527566484847]

In [72]:
lst = [1,2,4,2,45,43,5,2,3434,5,346,534,534,563,5,34]
sum(lst) / len(lst)

347.4375

In [73]:
n = len(lst)
ordenada = sorted(lst)
ordenada[n // 2]

34

In [74]:
ordenada

[1, 2, 2, 2, 4, 5, 5, 5, 34, 43, 45, 346, 534, 534, 563, 3434]