# Import climate factors

## Load Modules

In [1]:
import pandas as pd
import numpy as np

## Rainfall

In [2]:
rainfall = pd.read_csv('raw data/WORLDBANK_rainfall.csv').rename(columns={'Rainfall - (MM)':'Total Rainfall (mm)'})
rainfall.head(12)

Unnamed: 0,Total Rainfall (mm),Year,Statistics,Country
0,64.7765,1991,Jan Average,Afghanistan
1,59.4025,1991,Feb Average,Afghanistan
2,119.625,1991,Mar Average,Afghanistan
3,51.8025,1991,Apr Average,Afghanistan
4,57.2438,1991,May Average,Afghanistan
5,5.58788,1991,Jun Average,Afghanistan
6,4.39142,1991,Jul Average,Afghanistan
7,4.66582,1991,Aug Average,Afghanistan
8,12.6348,1991,Sep Average,Afghanistan
9,4.09568,1991,Oct Average,Afghanistan


In [3]:
annual_rainfall = rainfall.groupby(['Country','Year']).sum()
annual_rainfall.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Rainfall (mm)
Country,Year,Unnamed: 2_level_1
Afghanistan,1991,435.4499
Afghanistan,1992,408.15623
Afghanistan,1993,317.0853
Afghanistan,1994,342.22238
Afghanistan,1995,300.89815


Average annual rainfall last 25 years

In [4]:
average_annual_rainfall = annual_rainfall.groupby('Country').mean()
average_annual_rainfall['Standard Deviation'] = annual_rainfall.groupby('Country').std()
average_annual_rainfall.head()

Unnamed: 0_level_0,Total Rainfall (mm),Standard Deviation
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,322.119242,56.667164
Albania,992.182656,150.829612
Algeria,82.027218,14.214621
Andorra,757.658275,127.729671
Angola,966.806905,70.556696


## Temperature

In [5]:
temperature=pd.read_csv('raw data/WORLDBANK_temperature.csv').rename(columns={'Temperature - (Celsius)':'Temperature (°C)'})
temperature.head()

Unnamed: 0,Temperature (°C),Year,Statistics,Country
0,-0.0311,1991,Jan Average,Afghanistan
1,1.43654,1991,Feb Average,Afghanistan
2,6.88685,1991,Mar Average,Afghanistan
3,12.9397,1991,Apr Average,Afghanistan
4,17.0755,1991,May Average,Afghanistan


The average of monthly temepratures should be weighted based on days per month

In [6]:
weights = {' Jan Average':31, ' Feb Average':28, ' Mar Average':31, ' Apr Average':30, ' May Average':31, ' Jun Average':30,' Jul Average':31, ' Aug Average':31, ' Sep Average':30, ' Oct Average':31, ' Nov Average':30, ' Dec Average':31}

In [7]:
temperature['Weight'] = temperature['Statistics'].map(weights)
temperature.head()

Unnamed: 0,Temperature (°C),Year,Statistics,Country,Weight
0,-0.0311,1991,Jan Average,Afghanistan,31
1,1.43654,1991,Feb Average,Afghanistan,28
2,6.88685,1991,Mar Average,Afghanistan,31
3,12.9397,1991,Apr Average,Afghanistan,30
4,17.0755,1991,May Average,Afghanistan,31


In [8]:
annual_temperature = temperature.groupby(['Country','Year']).apply(lambda x: (x['Temperature (°C)'] * x['Weight']).sum() / x['Weight'].sum())
annual_temperature = pd.DataFrame(annual_temperature, columns = ["Average Temperature (°C)"])
annual_temperature.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Temperature (°C)
Country,Year,Unnamed: 2_level_1
Afghanistan,1991,12.958152
Afghanistan,1992,12.598444
Afghanistan,1993,13.016964
Afghanistan,1994,13.150314
Afghanistan,1995,13.095001


Average annual temperature last 25 years

In [9]:
average_annual_temperature = annual_temperature.groupby("Country").mean()
average_annual_temperature["Standard Deviation"] = annual_temperature.groupby("Country").std()
Average rainfall last 25 years per country

SyntaxError: invalid syntax (<ipython-input-9-11608ad6c2c3>, line 3)

## Water inflow

- **Total Internal Renewable Water Resources (IRWR)**: Long-term average annual flow of rivers and recharge of aquifers generated from endogenous precipitation. Double counting of surface water and groundwater resources is avoided by deducting the overlap from the sum of the surface water and groundwater resources. --> should correspond more or less with precipitation data
- **Total External Renewable Water Resources (ERWR)**: That part of the country's long-term average annual renewable water resources that are not generated in the country. It includes inflows from upstream countries (groundwater and surface water), and part of the water of border lakes and/or rivers. ERWR take into account the quantity of flow reserved by upstream (incoming flow) and/or downstream (outflow) countries through formal or informal agreements or treaties. Therefore, it may vary with time. In extreme cases, it may be negative when the flow reserved to downstream countries is more than the incoming flow.
- **Total Renewable Water Resources**: The sum of internal renewable water resources (IRWR) and external renewable water resources (ERWR). It corresponds to the maximum theoretical yearly amount of water available for a country at a given moment.
- **Dependency ratio**: Indicator expressing the percent of total renewable water resources originating outside the country. This indicator may theoretically vary between 0% and 100%. A country with a dependency ratio equal to 0% does not receive any water from neighbouring countries. A country with a dependency ratio equal to 100% receives all its renewable water from upstream countries, without producing any of its own. This indicator does not consider the possible allocation of water to downstream countries. 
- **Total Exploitable Water Resources** (also called manageable water resources or water development potential) are considered to be available for development, taking into consideration factors such as: the economic and environmental feasibility of storing floodwater behind dams, extracting groundwater, the physical possibility of storing water that naturally flows out to the sea, and minimum flow requirements (navigation, environmental services, aquatic life, etc). Methods to assess exploitable water resources vary from country to country. --> maybe not as interesting as this allready takes into account socio-economic factors

In [None]:
water_resources = pd.read_csv('raw data/AQUASTAT_water_resources.csv',nrows=835,index_col=False)
water_resources.head()

pivot table and rename + sort columns

In [None]:
water_resources = water_resources.pivot(index='Area', columns='Variable Name', values='Value').fillna(0).rename(columns={'Water resources: total external renewable':'Total external renewable water resources (ERWR)'})
water_resources = water_resources[['Total internal renewable water resources (IRWR)','Total external renewable water resources (ERWR)','Total renewable water resources','Dependency ratio','Total exploitable water resources']]

water_resources.head()

## Merge Climate factors

In [None]:
climate_factors = pd.merge(average_annual_temperature,average_annual_rainfall,left_index=True,right_index=True)
climate_factors.head()

In [None]:
climate_factors.index

In [None]:
water_resources.index

In [None]:
climate_factors = pd.merge(climate_factors,water_resources,left_index=True,right_index=True;how='outer')
climate_factors.head()