# Proyecto

## Lectura del archivo

In [1]:
%config IPCompleter.greedy=True
import json
import pandas as pd
import re

try:
    with open('data/Personas.json') as json_data:
        d = json.load(json_data)
except Exception as error:
    print('Archivo inválido.')
    print(error.args)


## Guardado de la información requerida y limpieza de datos

In [2]:
total_persons_country = {}
total_salaries_country = {}
total_persons_domain = []
total_IMCs_country = {}

for i in range(len(d)):
    
    country = d[i]['country']
    email = d[i]['email']
    weight = d[i]['weight']
    height = d[i]['height']
    salary = d[i]['salary']
    
    try:
        if bool(re.search(r'\d', country)):
            raise Exception(f'País {country} inválido')
        int(weight)
        int(height)
        float(salary)
    except Exception as error:
        print(error.args) 
    else:
        if country in total_persons_country:
            total_persons_country[country] += 1
            total_salaries_country[country] += salary
            total_IMCs_country[country] += weight/((height/100)**2)
        else:
            total_persons_country[country] = 1
            total_salaries_country[country] = salary
            total_IMCs_country[country] = weight/((height/100)**2)
        
    if email not in total_persons_domain:
        total_persons_domain.append(email)

## Cantidad de personas por país

In [3]:
df = pd.DataFrame([total_persons_country])
df.rename({0: 'Cantidad de personas'}, axis='index', inplace=True)
df = df.transpose()
total = df['Cantidad de personas'].sum()
print(df)
print(f'\nTotal de personas {total}')

                                  Cantidad de personas
Afghanistan                                          1
Albania                                              4
Angola                                               1
Argentina                                           13
Armenia                                              2
Australia                                            2
Azerbaijan                                           1
Bangladesh                                           4
Belarus                                              4
Benin                                                1
Bolivia                                              2
Bosnia and Herzegovina                               6
Botswana                                             3
Brazil                                              48
Bulgaria                                             2
Burkina Faso                                         2
Burundi                                              1
Cameroon  

## Salario promedio del trabajador por país

In [4]:
mean_salaries_country = {}
for i in total_salaries_country.keys():
    mean_salaries_country[i] = total_salaries_country[i] / total_persons_country[i]
df2 = pd.DataFrame([mean_salaries_country])
df2.rename({0: 'Salario promedio'}, axis='index', inplace=True)
df2 = df2.transpose()
print(df2)

                                  Salario promedio
Afghanistan                            4526.000000
Albania                                6414.750000
Angola                                 9696.000000
Argentina                              5810.230769
Armenia                                4591.500000
Australia                              3378.000000
Azerbaijan                             3765.000000
Bangladesh                             3430.250000
Belarus                                4981.500000
Benin                                  9168.000000
Bolivia                                1429.500000
Bosnia and Herzegovina                 4041.666667
Botswana                               6309.000000
Brazil                                 5128.270833
Bulgaria                               2793.000000
Burkina Faso                           6838.000000
Burundi                                5744.000000
Cameroon                               5790.000000
Canada                         

In [5]:
import re

domains = {}

for email in total_persons_domain:
    match = re.search(r'([\w.-]+)@([\w.-]+)', email)
    if match:
        domain = match.group(2)
        if domain in domains:
            domains[domain] += 1
        else:
            domains[domain] = 1
            

## Cantidad de personas que utilizan el dominio:

In [6]:
df3 = pd.DataFrame([domains])
df3.rename({0: 'Cantidad'}, axis='index', inplace=True)
df3 = df3.transpose()
total = df3['Cantidad'].sum()
print(df3)
print(f'\nTotal de personas {total}')

                  Cantidad
123-reg.co.uk            5
1688.com                 1
1und1.de                 1
360.cn                   2
51.la                    1
a8.net                   1
abc.net.au               4
about.com                2
about.me                 3
aboutads.info            1
accuweather.com          3
addthis.com              3
addtoany.com             1
adobe.com                4
alexa.com                3
alibaba.com              1
altervista.org           1
amazon.co.jp             3
amazon.co.uk             2
amazon.com               4
amazon.de                2
amazonaws.com            1
ameblo.jp                1
angelfire.com            2
answers.com              1
aol.com                  1
apache.org               2
apple.com                2
archive.org              2
arizona.edu              2
...                    ...
whitehouse.gov           1
who.int                  1
wikia.com                2
wikimedia.org            2
wikipedia.org            1
w

## IMC promedio de las personas por país

In [9]:
mean_IMCs_country = {}
for i in total_IMCs_country.keys():
    mean_IMCs_country[i] = total_IMCs_country[i] / total_persons_country[i]
df4 = pd.DataFrame([mean_IMCs_country])
df4.rename({0: 'Promedio de IMCs'}, axis='index', inplace=True)
df4 = df4.transpose()
print(df4)

                                  Promedio de IMCs
Afghanistan                              31.297374
Albania                                  23.288846
Angola                                   30.864198
Argentina                                32.799195
Armenia                                  22.147016
Australia                                34.671031
Azerbaijan                               17.541874
Bangladesh                               30.584265
Belarus                                  33.636773
Benin                                    47.086801
Bolivia                                  34.850280
Bosnia and Herzegovina                   21.995592
Botswana                                 29.561835
Brazil                                   28.063290
Bulgaria                                 22.544257
Burkina Faso                             32.545647
Burundi                                  15.059701
Cameroon                                 41.893810
Canada                         

## Reporte de información 

In [8]:
df.to_html('Personas_por_pais.html',justify='center')
df2.to_html('Salarios_por_pais.html',justify='center')
df3.to_html('Personas_por_dominio.html',justify='center')
df4.to_html('IMCs_promedios_por_pais.html',justify='center')