In [1]:
import datetime
import csv

def get_file_rows(path):
    output = []
    with open(path) as file:
        csv_handle = csv.reader(file)
        for row in csv_handle:
            output.append(row)
    return output


def ageInYears( d ):
    today = datetime.date.today()
    currentYrAnniversary = datetime.date( today.year, d.month, d.day )
    return (today.year - d.year) - (1 if today < currentYrAnniversary else 0)


cadastro_keys = ['nome', 'estado_civil', 'nascimento', 'lingua', 'curso', 'city', 'bairro', 'escola']
cadastro = {k: v for k, v in zip(cadastro_keys, range(len(cadastro_keys)))}

In [2]:
"""
STUDENTS BY AGE 

This function should manipulate the data
to generate the plots with the registration data
"""
csv_rows = get_file_rows('dados/2018_cadastro.csv')

# 1st objective - age.
to_date = lambda dt: datetime.datetime.strptime(dt, '%d/%m/%Y')

birthdays = [ row[cadastro['nascimento']] for idx, row in enumerate(csv_rows) if idx != 0]

ages = list(map(ageInYears, map(to_date, birthdays)))
print(f'ages = {ages}')

from collections import Counter
counter = Counter(ages)

keys = sorted(counter.keys())
counts = [counter[k] for k in keys]

print(f'Categories {keys}\nSeries = {counts}')

categories = [ el for el in range(18, 61) ]
series = [ counter[k] if k in keys else 0 for k in categories ]

print(f'Categories {categories}\nSeries = {series}')


ages = [25, 22, 23, 28, 28, 20, 24, 54, 18, 18, 19, 20, 18, 29, 23, 24, 24, 21, 56, 24, 19, 22, 21, 58, 21, 20, 19, 40, 21, 19, 28, 19, 34, 19, 24, 44, 28, 19, 20, 30, 22, 28, 36, 22, 26, 20, 23, 34, 43, 32, 37, 44, 20, 24, 24, 18, 18, 28, 19, 18, 20, 20, 32, 31, 18, 21, 33, 27, 27, 23, 24, 24, 24, 23, 22, 21, 21, 23, 54, 55, 20, 33, 26, 40, 23, 20, 20, 19, 24, 24, 22, 18, 19, 21, 25, 22, 20, 35, 21, 19]
Categories [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 43, 44, 54, 55, 56, 58]
Series = [8, 11, 12, 9, 7, 7, 12, 2, 2, 2, 6, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1]
Categories [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60]
Series = [8, 11, 12, 9, 7, 7, 12, 2, 2, 2, 6, 1, 1, 1, 2, 2, 2, 1, 1, 1, 0, 0, 2, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0]


In [3]:
"""
STUDENTS BY CITY
"""

from collections import Counter

def normalize(city):
    output = []
    for palavra in city.split():
        output.append(palavra.capitalize())
    return ' '.join(output)


cidades_dados = map(normalize, [ row[cadastro['city']] for row in csv_rows[1:] ])
counter_cities = Counter(cidades_dados)

print([list(item) for item in counter_cities.items()])

[['Campina Grande', 78], ['Mogeiro', 2], ['Boqueirão', 2], ['Queimadas', 3], ['Areial', 2], ['Lagoa De Roça', 2], ['Taperoa', 1], ['Santa Cecilia', 2], ['Alcantil', 1], ['Soledade', 1], ['Esperança', 4], ['Barauna', 1], ['Gurjão', 1]]


In [4]:
scheme = ['#67d33d','#84db5f', '#b2e897', '#d3f2c1', '#eaf9e1']
scheme_categories = {}
out = []
for c, value in counter_cities.most_common():
    for color in scheme[len(scheme_categories):]:
        if value not in scheme_categories:
            scheme_categories[value] = color
    out.append([c, value, scheme_categories[value]])
print(out)

[['Campina Grande', 78, '#67d33d'], ['Esperança', 4, '#84db5f'], ['Queimadas', 3, '#b2e897'], ['Mogeiro', 2, '#d3f2c1'], ['Boqueirão', 2, '#d3f2c1'], ['Areial', 2, '#d3f2c1'], ['Lagoa De Roça', 2, '#d3f2c1'], ['Santa Cecilia', 2, '#d3f2c1'], ['Taperoa', 1, '#eaf9e1'], ['Alcantil', 1, '#eaf9e1'], ['Soledade', 1, '#eaf9e1'], ['Barauna', 1, '#eaf9e1'], ['Gurjão', 1, '#eaf9e1']]


In [5]:
"""
students by neighborhood 
"""
from pprint import pprint 

bairros_dados = map(normalize, [ row[cadastro['bairro']] for row in csv_rows[1:] if row[cadastro['city']].lower() == 'campina grande'])
counter_bairros = Counter(bairros_dados)

pprint([ list(item) for item in counter_bairros.most_common() ])

[['Bodocongó', 12],
 ['Palmeira', 7],
 ['Monte Santo', 5],
 ['Liberdade', 5],
 ['Malvinas', 4],
 ['Cruzeiro', 4],
 ['Jardim Paulistano', 3],
 ['Sao Jose Da Mata', 3],
 ['Jose Pinheiro', 2],
 ['Ramadinha I', 2],
 ['Prata', 2],
 ['São Jose Da Mata', 2],
 ['Catole', 2],
 ['Castelo Branco', 2],
 ['Dinamerica', 2],
 ['Santa Cruz', 2],
 ['Catingueira', 1],
 ['Chico Mendes', 1],
 ['Bela Vista', 1],
 ['São José', 1],
 ['Novo Horizonte', 1],
 ['Rosa Cruz', 1],
 ['Vila Cabral', 1],
 ['Ressurreição', 1],
 ['Cinza', 1],
 ['Ligeiro', 1],
 ['Santa Rosa', 1],
 ['Presidente Medici', 1],
 ['Tambor', 1],
 ['Jardim Tavares', 1],
 ['Centro', 1],
 ['Centenario', 1],
 ['Monte Castelo', 1],
 ['Quarenta', 1],
 ['Mutirão', 1]]


In [11]:
import json 
geo_file = open('dados/CG_GEO_JSON.json', 'r')
geo_parsed = json.loads(geo_file.read())
geo_file.close()