In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Read consumption.csv
# Read CSV file into a DataFrame
AlcoholConsumptionPerCapita = pd.read_csv('/Users/heather/Documents/CodeRepos/Global_Alcohol_Consumption_Patterns_and_Health_Impacts/Resources/total-alcohol-consumption-per-capita-litres-of-pure-alcohol.csv')

# Display the first 5 rows of the DataFrame
AlcoholConsumptionPerCapita.head()


Unnamed: 0,Entity,Code,Year,"Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)"
0,Afghanistan,AFG,2000,0.00277
1,Afghanistan,AFG,2005,0.02684
2,Afghanistan,AFG,2010,0.01241
3,Afghanistan,AFG,2015,0.00874
4,Afghanistan,AFG,2019,0.01618


In [3]:
#Clean data

# 1. Remove the 'code' column
AlcoholConsumptionPerCapita = AlcoholConsumptionPerCapita.drop(columns=['Code'])

# 2. Rename the column with the long title to 'Total Alcohol Consumption'
AlcoholConsumptionPerCapita = AlcoholConsumptionPerCapita.rename(columns={'Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)': 'AlcoholConsumptionPerCapita'})

# 3. Sort by 'Entity' and 'Year' columns
AlcoholConsumptionPerCapita = AlcoholConsumptionPerCapita.sort_values(by=['Entity', 'Year'])

# 4. Remove index when displaying or saving the DataFrame
# Save to a CSV without the index:
AlcoholConsumptionPerCapita.to_csv('Resources/AlcoholConsumptionPerCapita.csv', index=False)

# Display the final DataFrame without index
AlcoholConsumptionPerCapita.head()


Unnamed: 0,Entity,Year,AlcoholConsumptionPerCapita
0,Afghanistan,2000,0.00277
1,Afghanistan,2005,0.02684
2,Afghanistan,2010,0.01241
3,Afghanistan,2015,0.00874
4,Afghanistan,2019,0.01618


In [5]:
#Column names
AlcoholConsumptionPerCapita.columns


Index(['Entity', 'Year', 'AlcoholConsumptionPerCapita'], dtype='object')

In [6]:
# Read per_capita.csv
# Read CSV file into a DataFrame
AlcoholConsumptionVsGDP = pd.read_csv('/Users/heather/Documents/CodeRepos/Global_Alcohol_Consumption_Patterns_and_Health_Impacts/Resources/alcohol-consumption-vs-gdp-per-capita.csv')

# Display the first 5 rows of the DataFrame
AlcoholConsumptionVsGDP.head()

Unnamed: 0,Entity,Code,Year,"Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)","GDP per capita, PPP (constant 2017 international $)",Continent
0,Abkhazia,OWID_ABK,2015,,,Asia
1,Afghanistan,AFG,2000,0.00277,,
2,Afghanistan,AFG,2002,,1280.4631,
3,Afghanistan,AFG,2003,,1292.3335,
4,Afghanistan,AFG,2004,,1260.0605,


In [7]:
#Clean data

# 1. Remove the 'Code' column
AlcoholConsumptionVsGDP = AlcoholConsumptionVsGDP.drop(columns=['Code', 'Continent'])

# 2. Rename the columns with the long titles
AlcoholConsumptionVsGDP = AlcoholConsumptionVsGDP.rename(columns={
    'Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)': 'AlcoholConsumptionPerCapita',
    'GDP per capita, PPP (constant 2017 international $)': 'GDPPerCapita'
})

# 3. Drop rows where both 'Total Alcohol Consumption' and 'GDP per Capita' are NaN
AlcoholConsumptionVsGDP = AlcoholConsumptionVsGDP.dropna(subset=['AlcoholConsumptionPerCapita', 'GDPPerCapita'], how='all')

# 4. Sort by 'Entity' and 'Year' columns
AlcoholConsumptionVsGDP = AlcoholConsumptionVsGDP.sort_values(by=['Entity', 'Year'])

# 5. Remove index when displaying or saving the DataFrame
# Save to a CSV without the index:
AlcoholConsumptionVsGDP.to_csv('Resources/AlcoholConsumptionVsGDP.csv', index=False)

# Display the final DataFrame without index
AlcoholConsumptionVsGDP.head()


Unnamed: 0,Entity,Year,AlcoholConsumptionPerCapita,GDPPerCapita
1,Afghanistan,2000,0.00277,
2,Afghanistan,2002,,1280.4631
3,Afghanistan,2003,,1292.3335
4,Afghanistan,2004,,1260.0605
5,Afghanistan,2005,0.02684,1352.3207


In [8]:
#Column names
AlcoholConsumptionVsGDP.columns

Index(['Entity', 'Year', 'AlcoholConsumptionPerCapita', 'GDPPerCapita'], dtype='object')

In [12]:
# Read mortality.csv
# Read CSV file into a DataFrame
AlcoholRelatedMortality = pd.read_csv('/Users/heather/Documents/CodeRepos/Global_Alcohol_Consumption_Patterns_and_Health_Impacts/Resources/alcohol-attributable-fraction-of-mortality.csv')

# Display the first 5 rows of the DataFrame
AlcoholRelatedMortality.head()

Unnamed: 0,Entity,Code,Year,"Alcohol-attributable fractions, all-cause deaths (%) - Sex: both sexes"
0,Afghanistan,AFG,2016,0.2
1,Albania,ALB,2016,6.4
2,Algeria,DZA,2016,1.0
3,Angola,AGO,2016,6.8
4,Antigua and Barbuda,ATG,2016,4.5


In [13]:
#Clean data

# 1. Remove the 'Code' column
AlcoholRelatedMortality = AlcoholRelatedMortality.drop(columns=['Code'])

# 2. Rename the column with the long title
AlcoholRelatedMortality = AlcoholRelatedMortality.rename(columns={
    'Alcohol-attributable fractions, all-cause deaths (%) - Sex: both sexes': 'AlcoholRelatedMortality',
})

# 3. Sort by 'Entity' and 'Year' columns
AlcoholRelatedMortality = AlcoholRelatedMortality.sort_values(by=['Entity', 'Year'])

# 4. Remove index when displaying or saving the DataFrame
# Save to a CSV without the index:
AlcoholRelatedMortality.to_csv('Resources/AlcoholRelatedMortality.csv', index=False)

# Display the final DataFrame without index
AlcoholRelatedMortality.head()

Unnamed: 0,Entity,Year,AlcoholRelatedMortality
0,Afghanistan,2016,0.2
1,Albania,2016,6.4
2,Algeria,2016,1.0
3,Angola,2016,6.8
4,Antigua and Barbuda,2016,4.5


In [14]:
#Column names
AlcoholRelatedMortality.columns

Index(['Entity', 'Year', 'AlcoholRelatedMortality'], dtype='object')