<a href="https://colab.research.google.com/github/amycelliott/DMP_LP_2022/blob/main/DMP_LP_2022_Target_Country_Current_Consumption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Pandas - Only need to run once

In [5]:
# Install Pandas
#!pip install -U pandas~=1.2.4
# plotly is used for simple visualizations.
#!pip install -U plotly
# Import Pandas and numpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Load Data from GitHub



Narrow down to specific columns

**Key Fields**
* iso_code
* country
* year

**Energy Consumption**
* biofuel_consumption
* coal_consumption
* fossil_fuel_consumption
* gas_consumption
* hydro_consumption
* low_carbon_consumption
* nuclear_consumption
* oil_consumption
* other_renewable_consumption
* primary_energy_consumption
* renewables_consumption
* solar_consumption
* wind_consumption

In [15]:

# Import the files that will be used
from tempfile import mkdtemp
from urllib.request import urlretrieve

tmp_dir = mkdtemp()

# Upload All Data
all_data_csv_path = f"{tmp_dir}/world_energy_consumption_all.csv"
urlretrieve("https://github.com/amycelliott/DMP_LP_2022/blob/main/files/World_Energy_Consumption.csv?raw=true", all_data_csv_path)

df_all_data = pd.read_csv(
    all_data_csv_path, 
    # Only pull in columns needed
    usecols=["iso_code","country","year","biofuel_consumption","coal_consumption","fossil_fuel_consumption","gas_consumption","hydro_consumption","low_carbon_consumption","nuclear_consumption","oil_consumption","other_renewable_consumption","primary_energy_consumption","renewables_consumption","solar_consumption","wind_consumption"],
)


# after loading, we can explore the data in all different ways pandas support
# No need to do this yet
df_all_data = df_all_data.sort_values('year', ascending=False)

# We have only been looking at data from 2000 forward, filter out the older data
df_all_data = df_all_data.query('`year` >= 2000')

# Only look at the top countries we are interested in
          # Obvious Choices
          # United States - Has dropped from #1 to #2 - however, it is still one of the top electricy usage
          # China - #2 to #1 - Shows growth and has high usage
          # Canada - Increased ranking slightly and a solid top choice

          # Areas of Growth
          # India - #6 to #3
          # South Korea
          # Brazil
          # Iran
          # Saudi Arabia
          # Indonesia
          # Turkey
          # Thalind Thailand
          # United Arib Emerits
          # Vietnom
df_all_data = df_all_data[df_all_data['country'].isin(['United States','China','Canada','India','South Korea','Brazil','Iran','Saudi Arabia','Indonesia','Turkey','Thailand','United Arib Emerits','Vietnom'])]

#Verify we filtered out older data
all_data_agg_year = df_all_data.agg({'year': ['min', 'max','count']})
print(all_data_agg_year)


       year
min    2000
max    2020
count   229


# Evaluate NULL Values


In [16]:
df_all_data.isnull().sum()

iso_code                       0
country                        0
year                           0
biofuel_consumption            9
coal_consumption               9
fossil_fuel_consumption        9
gas_consumption                9
hydro_consumption              9
low_carbon_consumption         9
nuclear_consumption            9
oil_consumption                9
other_renewable_consumption    9
primary_energy_consumption     9
renewables_consumption         9
solar_consumption              9
wind_consumption               9
dtype: int64