# PYTHON

 By Marcelo Feighelstein
# Visualization

## Reading Tables from Excel file

In [1]:
# Import Pandas library
import pandas as pd

In [2]:
happy_df = pd.read_csv('/content/WHR2023.csv')

In [3]:
happy_df.head(5)

Unnamed: 0,Country name,Regional indicator,Ladder score,Standard error of ladder score,upperwhisker,lowerwhisker,Logged GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Ladder score in Dystopia,Explained by: Log GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption,Dystopia + residual
0,Finland,Western Europe,7.804,0.036,7.875,7.733,10.792,0.969,71.15,0.961,-0.019,0.182,1.778,1.888,1.585,0.535,0.772,0.126,0.535,2.363
1,Denmark,Western Europe,7.586,0.041,7.667,7.506,10.962,0.954,71.25,0.934,0.134,0.196,1.778,1.949,1.548,0.537,0.734,0.208,0.525,2.084
2,Iceland,Western Europe,7.53,0.049,7.625,7.434,10.896,0.983,72.05,0.936,0.211,0.668,1.778,1.926,1.62,0.559,0.738,0.25,0.187,2.25
3,Israel,Middle East and North Africa,7.473,0.032,7.535,7.411,10.639,0.943,72.697,0.809,-0.023,0.708,1.778,1.833,1.521,0.577,0.569,0.124,0.158,2.691
4,Netherlands,Western Europe,7.403,0.029,7.46,7.346,10.942,0.93,71.55,0.887,0.213,0.379,1.778,1.942,1.488,0.545,0.672,0.251,0.394,2.11


In [4]:
happy_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137 entries, 0 to 136
Data columns (total 20 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   Country name                                137 non-null    object 
 1   Regional indicator                          137 non-null    object 
 2   Ladder score                                137 non-null    float64
 3   Standard error of ladder score              137 non-null    float64
 4   upperwhisker                                137 non-null    float64
 5   lowerwhisker                                137 non-null    float64
 6   Logged GDP per capita                       137 non-null    float64
 7   Social support                              137 non-null    float64
 8   Healthy life expectancy                     136 non-null    float64
 9   Freedom to make life choices                137 non-null    float64
 10  Generosity    

In [5]:
work_df = pd.DataFrame(data=happy_df,columns=['Country name','Regional indicator','Ladder score'])

In [6]:
work_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137 entries, 0 to 136
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Country name        137 non-null    object 
 1   Regional indicator  137 non-null    object 
 2   Ladder score        137 non-null    float64
dtypes: float64(1), object(2)
memory usage: 3.3+ KB


In [7]:
work_df.columns

Index(['Country name', 'Regional indicator', 'Ladder score'], dtype='object')

In [8]:
work_df.index

RangeIndex(start=0, stop=137, step=1)

In [9]:
work_df.head(10)

Unnamed: 0,Country name,Regional indicator,Ladder score
0,Finland,Western Europe,7.804
1,Denmark,Western Europe,7.586
2,Iceland,Western Europe,7.53
3,Israel,Middle East and North Africa,7.473
4,Netherlands,Western Europe,7.403
5,Sweden,Western Europe,7.395
6,Norway,Western Europe,7.315
7,Switzerland,Western Europe,7.24
8,Luxembourg,Western Europe,7.228
9,New Zealand,North America and ANZ,7.123


## Indexing a table

In [10]:
work_df = work_df.set_index('Country name')

In [11]:
work_df.head(5)

Unnamed: 0_level_0,Regional indicator,Ladder score
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1
Finland,Western Europe,7.804
Denmark,Western Europe,7.586
Iceland,Western Europe,7.53
Israel,Middle East and North Africa,7.473
Netherlands,Western Europe,7.403


In [12]:
work_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 137 entries, Finland to Afghanistan
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Regional indicator  137 non-null    object 
 1   Ladder score        137 non-null    float64
dtypes: float64(1), object(1)
memory usage: 3.2+ KB


In [13]:
 work_df.rename({"Ladder score": "Happiness score"},axis=1,inplace=True )

In [14]:
work_df.head(10)

Unnamed: 0_level_0,Regional indicator,Happiness score
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1
Finland,Western Europe,7.804
Denmark,Western Europe,7.586
Iceland,Western Europe,7.53
Israel,Middle East and North Africa,7.473
Netherlands,Western Europe,7.403
Sweden,Western Europe,7.395
Norway,Western Europe,7.315
Switzerland,Western Europe,7.24
Luxembourg,Western Europe,7.228
New Zealand,North America and ANZ,7.123


# Visualization

## Import library Plotly for visualization

In [15]:
import plotly.express as px

## Bar charts

In [16]:
# bar plot
fig = px.bar(data_frame = work_df[0:15], # take first 15 rows of dataset
             y=work_df.index[0:15],      # y axis = country names (remember: now are in the index of the table)

             x="Happiness score",        # x axis = Happiness score
             orientation='h',            # horizontal layout
             color="Happiness score",    # set color scheme according to Happiness Score
             text="Happiness score")     # set x title "Happiness Score"

In [17]:
fig.show() # show the graph

In [18]:
fig = px.bar(data_frame = work_df[0:15],             # take first 15 rows of dataset
             y=work_df.index[0:15],                  # y axis = country names (remember: now are in the index of the table)
             x="Happiness score",                    # x axis = Happiness score
             labels={
                     "Happiness score": "How much HAPPY is the Country", # Set x axis name to How much HAPPY is the Country
                     "y": "Country",                                     # Set y axis name to Country
                 },
             orientation='h',                        # horizontal layout
             color="Happiness score")                # set color scheme according to Happiness Score)

fig.update_layout(width=800,                         # set graph width to 800
                  yaxis=dict(autorange="reversed"),               # inverse order of y axis
                  title="Top 15 happiest countries")              # set title
fig.show() # show the graph

In [19]:
work_df [ 'Ranking' ] = [i for i in range(1,len(work_df)+1)]

In [32]:
work_df.loc [ 'Israel', "Ranking"]

4

In [21]:
work_df.head(5)

Unnamed: 0_level_0,Regional indicator,Happiness score,Ranking
Country name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Finland,Western Europe,7.804,1
Denmark,Western Europe,7.586,2
Iceland,Western Europe,7.53,3
Israel,Middle East and North Africa,7.473,4
Netherlands,Western Europe,7.403,5


In [22]:
fig = px.bar(data_frame = work_df[-10:],              # take last 10 rows of dataset
             x=work_df.index[-10:],                   # x axis = country names (remember: now are in the index of the table)
             y="Ranking",                             # y axis = Ranking on Happiness index of the country
             labels={
                     "x": "Country",
                     "Ranking": "Ranking of the country" # Set index (y) axis name to Country
                 },
             orientation='v',                         # layout vertical
             color="Happiness score",                 # set color scheme according to Ranking
             text="Ranking")                          # Display the Ranking as labels of the bar
fig.update_layout(width=800,                          # set width of graph
                  height=500,                         # set height of graph
                  title="Bottom 10 happiest countries") # set title
fig.show() # show the graph

In [23]:
work_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 137 entries, Finland to Afghanistan
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Regional indicator  137 non-null    object 
 1   Happiness score     137 non-null    float64
 2   Ranking             137 non-null    int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 8.4+ KB


In [24]:
fig = px.bar(data_frame = work_df[0:20], # take first 20 rows of dataset
             y=work_df.index[0:20], # y axis = country names (remember: now are in the index of the table)
             x="Happiness score",   # x axis = Happiness score
             orientation='h',       # layout horizontal
             color="Regional indicator", # color scheme according to Region
             text="Happiness score")     # bar label equal to Happiness Score
fig.update_layout(width=1000,
                  yaxis=dict(autorange="reversed"),  # reverse y order
                  title="Top 20 happiest countries") # set title
fig.show() # show graph

## Pie Chart

In [25]:
work_df["Regional indicator"].value_counts().index

Index(['Sub-Saharan Africa', 'Western Europe', 'Latin America and Caribbean',
       'Central and Eastern Europe', 'Middle East and North Africa',
       'Southeast Asia', 'Commonwealth of Independent States', 'East Asia',
       'South Asia', 'North America and ANZ'],
      dtype='object', name='Regional indicator')

In [35]:
work_df["Regional indicator"].value_counts()

Index(['Sub-Saharan Africa', 'Western Europe', 'Latin America and Caribbean',
       'Central and Eastern Europe', 'Middle East and North Africa',
       'Southeast Asia', 'Commonwealth of Independent States', 'East Asia',
       'South Asia', 'North America and ANZ'],
      dtype='object', name='Regional indicator')

In [39]:
fig = px.pie(values = work_df["Regional indicator"].value_counts().values, # number of countries per region
             names= work_df['Regional indicator'].value_counts().index, # name of regions
             title="Percentage of contribution of regions", # set title
             )

fig.update_layout(width=800) # set graph width
fig.show() # show graph

## Box plot graph

In [43]:
fig = px.box(work_df,
             x="Happiness score",                  # set Happiness score as x axis
             y="Regional indicator",               # set region as y axis
             color="Regional indicator",           # set color scheme according to Region
             color_discrete_sequence=px.colors.qualitative.Pastel_r, # set predefined color scheme
             template="plotly_dark")               # use predefined color scheme for layout
fig.update_layout(height=600, # set graph height
                  width=900,  # set graph width
                  xaxis= dict(showticklabels = True),
                 title="Distribution of happiness score by region")
fig.show()

## Line plot

In [45]:
fig = px.line(data_frame = work_df,          # take last 30 rows of dataset
             x=work_df.index,                   # x axis = country names (remember: now are in the index of the table)
             y="Happiness score",                    # y axis = Ranking on Happiness index of the country
             labels={
                     "x": "Country",                 # Set index (x) axis name to Country
                 },
             orientation='h',                        # layout vertical
             template="plotly_dark",                  # Use predefined color scheme for layout
)
fig.update_layout(width=2000,                        # set width of graph
                  height=500,                        # set height of graph
                  title="Happiness score trend") # set title
fig.show() # show the graph

## Scatter graph

In [30]:
fig = px.scatter(data_frame = work_df[:30],          # take last 30 rows of dataset
             x=work_df.index[:30],                   # x axis = country names (remember: now are in the index of the table)
             y="Happiness score",                    # y axis = Ranking on Happiness index of the country
             labels={
                     "x": "Country",                 # Set index (x) axis name to Country
                 },
             orientation='v',                        # layout vertical
             template="plotly_dark",                  # Use predefined color scheme for layout
             color="Happiness score"
)
fig.update_layout(width=1000,                        # set width of graph
                  height=500,                        # set height of graph
                  title="Top 30 happiest countries") # set title
fig.show() # show the graph

In [49]:
fig = px.scatter(data_frame = happy_df,          # take last 30 rows of dataset
             x="Logged GDP per capita",                   # x axis = country names (remember: now are in the index of the table)
             y="Healthy life expectancy",                    # y axis = Ranking on Happiness index of the country
             orientation='v',                        # layout vertical
             template="plotly_white",                  # Use predefined color scheme for layout
             color="Ladder score",
             #text='Country name'
)
fig.update_layout(width=1000,                        # set width of graph
                  height=500,                        # set height of graph
                  title="GDP per capita vs Healthy life Expectancy") # set title
fig.show() # show the graph