In [2]:
import pandas as pd

# Load the dataset
file_path = 'data/drinks.csv'
drinks = pd.read_csv(file_path)

# Rewrite the R code in Python
selected_countries = ["USA", "China", "Italy", "Saudi Arabia"]
drinks_smaller = drinks[drinks['country'].isin(selected_countries)].drop(columns=['total_litres_of_pure_alcohol'])
drinks_smaller = drinks_smaller.rename(columns={'beer_servings': 'beer', 'spirit_servings': 'spirit', 'wine_servings': 'wine'})

drinks_smaller.head()

Unnamed: 0,country,beer,spirit,wine
36,China,79,192,8
83,Italy,85,42,237
149,Saudi Arabia,0,5,0
184,USA,249,158,84


In [4]:
import plotly.express as px

# First, transform the data into a tidy format
drinks_smaller_tidy = drinks_smaller.melt(id_vars='country', var_name='type', value_name='servings')

# Plot using plotly
fig = px.bar(drinks_smaller_tidy, x='country', y='servings', color='type', barmode='group',
             labels={'country': 'Country', 'servings': 'Servings'},
             title='Drink Servings by Country and Type')
fig.show()

print(drinks_smaller_tidy.head()) 
print(drinks_smaller.head())

        country    type  servings
0         China    beer        79
1         Italy    beer        85
2  Saudi Arabia    beer         0
3           USA    beer       249
4         China  spirit       192
          country  beer  spirit  wine
36          China    79     192     8
83          Italy    85      42   237
149  Saudi Arabia     0       5     0
184           USA   249     158    84


In [6]:
# Transforming the drinks_smaller dataframe to a tidy format using Python's pandas
# The R code uses pivot_longer, which is equivalent to pandas' melt function

drinks_smaller_tidy_python = drinks_smaller.melt(id_vars='country', var_name='type', value_name='servings')
drinks_smaller_tidy_python.head()

Unnamed: 0,country,type,servings
0,China,beer,79
1,Italy,beer,85
2,Saudi Arabia,beer,0
3,USA,beer,249
4,China,spirit,192


In [7]:
# Specifying the columns explicitly for the transformation
cols_to_melt = ['beer', 'spirit', 'wine']
drinks_smaller_tidy_specific = drinks_smaller.melt(id_vars='country', 
                                                   value_vars=cols_to_melt, 
                                                   var_name='type', 
                                                   value_name='servings')
drinks_smaller_tidy_specific.head()

Unnamed: 0,country,type,servings
0,China,beer,79
1,Italy,beer,85
2,Saudi Arabia,beer,0
3,USA,beer,249
4,China,spirit,192


In [8]:
cols_to_melt_range = drinks_smaller.loc[:, 'beer':'wine'].columns.tolist()

drinks_smaller_tidy_range = drinks_smaller.melt(id_vars='country', 
                                                value_vars=cols_to_melt_range, 
                                                var_name='type', 
                                                value_name='servings')
drinks_smaller_tidy_range.head()

Unnamed: 0,country,type,servings
0,China,beer,79
1,Italy,beer,85
2,Saudi Arabia,beer,0
3,USA,beer,249
4,China,spirit,192


In [9]:
import plotly.express as px

fig = px.bar(drinks_smaller_tidy, x='country', y='servings', color='type', barmode='group',
             labels={'country': 'Country', 'servings': 'Servings'},
             title='Drink Servings by Country and Type')
fig.show()

In [14]:

airline_safety = pd.read_csv("data/airline_safety.csv")
dem_score = pd.read_csv("data/dem_score.csv")
# Generating airline_safety_smaller
airline_safety_smaller = airline_safety.filter(regex='^airline|^fatalities')

# Generating guat_dem
guat_dem = dem_score[dem_score['country'] == "Guatemala"]

# Displaying the first few rows of each dataframe
print(airline_safety_smaller.head()) 
print(guat_dem.head())

                 airline  fatalities_85_99  fatalities_00_14
0             Aer Lingus                 0                 0
1               Aeroflot               128                88
2  Aerolineas Argentinas                 0                 0
3             Aeromexico                64                 0
4             Air Canada                 0                 0
      country  1952  1957  1962  1967  1972  1977  1982  1987  1992
32  Guatemala     2    -6    -5     3     1    -3    -7     3     3


In [15]:
guat_dem_tidy = guat_dem.melt(id_vars='country', var_name='year', value_name='democracy_score')
guat_dem_tidy['year'] = guat_dem_tidy['year'].astype(int)

guat_dem_tidy.head()

Unnamed: 0,country,year,democracy_score
0,Guatemala,1952,2
1,Guatemala,1957,-6
2,Guatemala,1962,-5
3,Guatemala,1967,3
4,Guatemala,1972,1


In [16]:
import plotly.express as px

fig_guat_dem = px.line(guat_dem_tidy, x='year', y='democracy_score', 
                       labels={'year': 'Year', 'democracy_score': 'Democracy Score'},
                       title='Democracy Score Over Time for Guatemala')
fig_guat_dem.show()