In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.cluster as cluster
import sklearn.metrics as skmet
import matplotlib.pyplot as plt
import cluster_tools as ct

In [7]:
def FilterData(df):
    """
    This function filters the dataframe for the required countries such as US,
    UK, France, India, China, Germany, Russia and keeps only the required
    fields.
    """
    # filter out data for US, UK, France, India, China, Germany, Russia
    countries = ['United States', 'United Kingdom', 'France', 'India', 'China',
             'Germany', 'Russian Federation', ]
    df = df.loc[df['Country Name'].isin(countries)]
    
    # required indicators
    indicator = ['SP.URB.TOTL', 'SP.POP.TOTL', 'SH.DYN.MORT', 'SH.DYN.MORT', 
                 'ER.H2O.FWTL.K3', 'EN.ATM.GHGT.KT.CE', 'EN.ATM.CO2E.KT',
                 'EN.ATM.CO2E.SF.KT', 'EN.ATM.CO2E.LF.KT', 'EN.ATM.CO2E.GF.KT',
                 'EG.USE.ELEC.KH.PC', 'EG.ELC.RNEW.ZS', 'AG.LND.FRST.K2',
                 'AG.LND.ARBL.ZS', 'AG.LND.AGRI.K2']
    df = df.loc[df['Indicator Code'].isin(indicator)]
    return df


def Preprocess(df):
    """
    This function preprocesses the data
    """
    df.drop('Country Code', axis=1, inplace=True)
    df.fillna(0, inplace=True)
    return df


def ElectricityConsumption(df):
    """
    """
    econs_df =  df.loc[df['Indicator Code'] == 'EG.USE.ELEC.KH.PC']
    econs_df.drop(['Indicator Name', 'Indicator Code'],
                  axis=1, inplace=True)
    econs_df.reset_index(drop=True, inplace=True)
    econs_df = econs_df.T
    econs_df = econs_df.rename(columns=econs_df.iloc[0])
    econs_df.drop(labels=['Country Name'],axis=0, inplace=True)
    econs_df.rename(columns={'United Kingdom': 'UK',
                             'Russian Federation': 'Russia',
                             'United States': 'US'}, inplace=True)
    econs_df.reset_index(inplace=True)
    econs_df.drop(econs_df.tail(1).index,inplace=True)
    return econs_df

In [23]:
def HeatmapPreprocess(df, country):
    """
    """
    hdf = df.loc[df['Country Name'] == country]
    indicator = ['SP.URB.TOTL', 'SP.POP.TOTL', 'SH.DYN.MORT', 'ER.H2O.FWTL.K3',
           'AG.LND.FRST.K2', 'AG.LND.ARBL.ZS', 'AG.LND.AGRI.K2',
           'EN.ATM.CO2E.KT']
    hdf =  hdf.loc[hdf['Indicator Code'].isin(indicator)]
    hdf.drop(['Country Name', 'Indicator Code'], axis=1, inplace=True)
    hdf.reset_index(drop=True, inplace=True)
    hdf = hdf.T
    hdf.reset_index(inplace=True)
    hdf = hdf.rename(columns=hdf.iloc[0])
    hdf.drop(0, inplace=True)
    hdf.drop(hdf.tail(3).index,inplace=True)
    hdf['Indicator Name'] = hdf['Indicator Name'].astype('int')
    hdf = hdf[hdf['Indicator Name'] >= 1990]
    hdf['Indicator Name'] = hdf['Indicator Name'].astype('object')
    return hdf

In [24]:
df = pd.read_csv('API_19_DS2_en_csv_v2_4902199.csv', skiprows=4)
# filters the data for the required countries
df = FilterData(df)
df = Preprocess(df)
edf = ElectricityConsumption(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  econs_df.drop(['Indicator Name', 'Indicator Code'],


In [25]:
hdf = HeatmapPreprocess(df, 'India')

In [31]:
hdf.dtypes

Indicator Name                                                 object
Urban population                                               object
Population, total                                              object
Mortality rate, under-5 (per 1,000 live births)                object
Annual freshwater withdrawals, total (billion cubic meters)    object
CO2 emissions (kt)                                             object
Forest area (sq. km)                                           object
Arable land (% of land area)                                   object
Agricultural land (sq. km)                                     object
dtype: object