# JSON files and Pygal

In [1]:
import pandas as pd
import pygal as pg
#pip install pygal

df_pop = pd.read_json('population_data.json')
df_pop['Value']=df_pop['Value'].astype('int64')
print(df_pop.head())

  Country Code Country Name      Value  Year
0          ARB   Arab World   96388069  1960
1          ARB   Arab World   98882541  1961
2          ARB   Arab World  101474075  1962
3          ARB   Arab World  104169209  1963
4          ARB   Arab World  106978104  1964


### Printing some coutries

In [2]:
#pip install pygal_maps_world
worldmap_chart = pg.maps.world.World()
worldmap_chart.title = 'Some countries'
worldmap_chart.add('F countries', ['fr', 'fi'])
worldmap_chart.add('J countries', ['jm', 'jp', 'jo'])
worldmap_chart.add('U countries', ['ua', 'ug', 'us', 'uy', 'uz'])
worldmap_chart.render_to_file('world_map.svg')

AttributeError: module 'pygal.maps' has no attribute 'world'

### Converting country codes to two digits
pygal use a two digit codes for the countries

In [5]:
from pygal.maps.world import COUNTRIES

for cc in sorted(COUNTRIES.keys()):
    lcc = (df_pop['Country Name']==COUNTRIES[cc]).idxmax()
    if lcc:
        df_pop['Country Code'].replace(to_replace=df_pop['Country Code'].iloc[lcc],value=cc,inplace=True)
        
print(df_pop[df_pop['Country Code']=='us'].iloc[0:5])
print(df_pop[df_pop['Country Code']=='br'].iloc[0:5])

      Country Code   Country Name      Value  Year
11876           us  United States  180671000  1960
11877           us  United States  183691000  1961
11878           us  United States  186538000  1962
11879           us  United States  189242000  1963
11880           us  United States  191889000  1964
     Country Code Country Name     Value  Year
2907           br       Brazil  72758801  1960
2908           br       Brazil  74975656  1961
2909           br       Brazil  77279855  1962
2910           br       Brazil  79642214  1963
2911           br       Brazil  82021855  1964


### Most populated countries in a given year

In [None]:
def sort_by_pop(g):
    return(g.sort_values(by='Value',ascending=False))
    
df_pop_year = df_pop.groupby('Year').apply(sort_by_pop)

year = 2000 # given year
worldmap_most_pop = pg.maps.world.World()
n_pop = 5 # five most populated
c = 0
i = 0
#print(year,df_pop_year.loc[year][['Country Code','Country Name']])
while c<n_pop:
    # not all codes are conveted to 2 digits, so we have to check if it is valid
    if (len(df_pop_year.loc[year]['Country Code'].iloc[i]) == 2):
        c=c+1
        #print(df_pop_year.loc[year]['Country Name'],df_pop_year.loc[year]['Country Code'].iloc[c])
        worldmap_most_pop.add(df_pop_year.loc[year]['Country Name'].iloc[i],{df_pop_year.loc[year]['Country Code'].iloc[i]:df_pop_year.loc[year]['Value'].iloc[i]})
    i=i+1

worldmap_most_pop.title = 'Most Populated in '+str(year)
worldmap_most_pop.render_to_file('world_most_popylated.svg')

***
# Quering Web Sites

In [None]:
import requests as rqs

url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
r = rqs.get(url)
print(r.status_code) # 200 means a successful request 
print(r.text)

In [None]:
# converting the json to a dictionary
resp=r.json()
print(resp.keys())
print('Number of Repositories in GitHub: ', resp['total_count'])
print('Number of Stared Repositories: ', len(resp['items']))

In [None]:
repositories = resp['items']
first_repo = repositories[0] # getting the first repository
print(first_repo['name'])
print(first_repo['stargazers_count'])
print(first_repo['description'])

In [None]:
r = rqs.get('http://en.wikipedia.org/wiki/Monty_Python')
#print(r.headers['content-type'])
for k in r.headers.keys():
    print(k)
print(r.text)

# PyGal: examples presented in class

In [2]:
import pandas as pd
import pygal as pg

dt = {'Chrome': [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607],
      'Firefox': [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450],
      'Opera': [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]} 
df = pd.DataFrame(dt)

In [None]:
# Dot chart
dot_chart = pg.Dot(x_label_rotation=30)
dot_chart.title = 'V8 benchmark results'
dot_chart.x_labels = ['Richards', 'DeltaBlue', 'Crypto', 'RayTrace', 'EarleyBoyer', 'RegExp', 'Splay', 'NavierStokes']
for cols in df.columns:
   dot_chart.add(cols,df[cols])
dot_chart.render_to_file('dot_chart.svg')

In [None]:
# StackedLine charts
dot_chart = pg.StackedLine(fill=True)
dot_chart.title = 'V8 benchmark results'
dot_chart.x_labels = map(str,range(0,df.shape[0]))
for cols in df.columns:
   dot_chart.add(cols,df[cols])
dot_chart.render_to_file('stacked_chart.svg')

In [None]:
# incorporating some customizations
from pygal.style import DarkStyle, DarkSolarizedStyle
from pygal import Config

#cibar_chart = pg.Bar(width=1000,spacing=50,style=DarkStyle)
cibar_chart = pg.Bar(width=800,spacing=20,style=DarkSolarizedStyle)
cibar_chart.title = 'Browsers'
# labels
cibar_chart.x_label_rotation=30 
cibar_chart.truncate_label=8
cibar_chart.human_readable = True
#cibar_chart.x_labels_major = ['Crypto','RayTrace']
cibar_chart.x_labels_major_count=4
#cibar_chart.include_x_axis=False
#cibar_chart.x_labels = ({'lable':'Richards','value':.0000000012},
#                        {'lable':'DeltaBlue','value':.00024},
#                        {'lable':'Crypto','value':.00036},
#                        {'lable':'RayTrace','value':.00036},
#                        {'lable':'EarleyBoyer','value':.00024},
#                        {'lable':'RegExp','value':.00012},
#                        {'lable':'Splay','value':.00012},
#                        {'lable':'NavierStokes','value':.00012})
cibar_chart.x_labels = ['Richards', 'DeltaBlue', 'Crypto', 'RayTrace', 'EarleyBoyer', 'RegExp', 'Splay', 'NavierStokes']
for cols in dt.keys():
    cibar_chart.add(cols,dt[cols])

cibar_chart.render_to_file('cibar_chart.svg')