## Imports

In [1]:
import pandas as pd
import numpy as np

main_df = pd.read_csv('../data/energy_data.csv')
life_exp = pd.read_excel('../data/life_expectancy.xlsx')
not_in_countries = [country for country in set(main_df['country']) \
                    if country not in set(life_exp['country'])]

## Cleaning
---
- Reordering columns
- Replacing `'--'` with nulls
- Dropping countries not found in the World Bank data, and years 2019 and 2020

In [3]:
# re-ordering columns
cols = main_df.columns.to_list()
cols = cols[:2] + cols[27:28] + cols[32:] + cols[24:27]  + cols[2:24] + cols[28:32]
main_df = main_df[cols]

In [4]:
# replacing '--' with np.nan
main_df.replace(to_replace = '--', value = np.nan, inplace = True)

In [5]:
# dropping
drop_index = main_df[(main_df.country.isin(not_in_countries))|
                     (main_df['year']==2019)|
                     (main_df['year']==2020)].index

main_df.drop(index = drop_index, inplace=True)

# saving progress, delete before submitting
main_df.to_csv('../data/energy_data.csv', index=False)

main_df

Unnamed: 0,country,year,population,life_expectancy,consumption_per_capita,consumption_per_GDP,ppp_2015USD,production_total,production_coal,production_natural_gas,...,imports_coal,imports_electricity,exports_crude_oil,exports_natural_gas,exports_coal,exports_electricity,emissions_co2_emissions,emissions_coal_and_coke,emissions_consumed_natural_gas,emissions_petroleum_and_other_liquids
0,World,1980,4298126.5224,62.841745,68.1556461505447,10.5581736251749,27745.479547,296.214352546245,79.9919425299794,54.761045594,...,284893.825,145.5369836,30580.206,,297478.5297,150.131648,18671.570671825,7455.93975375439,2843.422967239,8372.20795083112
1,Afghanistan,1980,13356.5,43.244000,1.99028313387203,0,,0.0725611561466706,0.00235528580748066,0.06282,...,0,0,0,,0,0,1.325964518,0.231313858444469,0.11110066,0.98355
2,Albania,1980,2682.7,70.208000,60.7529063336374,0,,0.155561620237156,0.0132290386336457,0.01047,...,176.3696,0,0,,0,0.5,9.618526399,2.37975309891915,0.5555033,6.68327
3,Algeria,1980,19221.7,58.198000,40.6153028667236,0,,2.80301735502645,7.59200610490114e-05,0.48498,...,99.06257471,0.07,713.534,,0,0.061,45.663590904,0.237672237362358,28.799158667,16.62676
4,American Samoa,1980,32.646,,180.51560374931,0,,0,0,0,...,0,0,0,,0,0,0.425070708,0,0,0.425070707990939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9002,Venezuela,2018,28887.13,72.128000,91.8686767650665,9.2435633243543,287.0995,4.96130933715346,0.0126810828365154,1.0282010136781,...,4.086968648,0,1002.677,0,96.07712024,1.023,128.298044187,0.127185590890007,51.365907596,76.804951
9003,Vietnam,2018,95513.3,75.317000,37.603299261526,4.19840182186666,855.472,2.75873282693529,1.11538149221253,0.3016125726684,...,25541.55007,3.124,66.88,0,1922.007365,1.509,240.322622508,158.66857870264,15.837384805,65.816659
9006,Yemen,2018,28498.5,66.096000,4.33639613281696,2.37691924125027,51.992,0.103734799593185,0,0.0034385429778,...,136.68644,0,0,0,0,0,8.368931695,0.297799155301852,0.18277454,7.888358
9007,Zambia,2018,17363.72,63.510000,12.2076143194944,0.870218997847099,243.5819,0.153047862614526,0.0289169711585265,0,...,97.10584113,0.152,0,0,2.99400293,1.225,7.143270226,2.89410222572559,0,4.249168
