In [18]:
import pandas as pd
import numpy as np
import math

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.figure_factory as ff

from IPython.display import display, HTML

init_notebook_mode(connected=True)

survey_v1 = pd.read_excel("Kassala_baseline_survey_for_CC_project_in_Sudan_final_results.xlsx", encoding = 'utf_8')
survey_v2 = pd.read_excel("Kassala_baseline_survey_for_CC_project_in_Sudan_final_results_v2.xlsx", encoding = 'utf_8')
reg_survey = pd.concat([survey_v1, survey_v2])

# <center>Baseline survey results for the Locality of Telkouk </center>
### <center>*Climate Change and Livelihoods situation of 15 villages located in Telkouk Locality, Kassala State*</center>

In [19]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

#bc_wages.head()
#reg_csv.info()
#bc_wages.describe()

In [20]:
# Functions

# Get ocurrences of each value for a multiple_select field
def get_ocurrences(df, column_name):
    values_sums = df.groupby(column_name).size()
    values_names = reduce(lambda x, y: x + y,map(lambda x: x.split(), values_sums.index.values))
    result = pd.Series()
    for i in values_names:
        result[i] = values_sums.loc[[i in idx for idx in values_sums.index]].sum()
    return result

In [21]:
## File preprocessing
#Deleting 'demography:', 'house_assets:', 'incomes:', 'climate_change:' and those prefix which not provide useful info.

col_names = reg_survey.columns
col_names = col_names.map(lambda x: x[11:] if x.startswith("demography:") else x)
col_names = col_names.map(lambda x: x[19:] if x.startswith("responding_section:") else x)
col_names = col_names.map(lambda x: x[20:] if x.startswith("responding_section2:") else x)
col_names = col_names.map(lambda x: x[13:] if x.startswith("house_assets:") else x)
col_names = col_names.map(lambda x: x[8:] if x.startswith("incomes:") else x)
col_names = col_names.map(lambda x: x[15:] if x.startswith("climate_change:") else x)
col_names = col_names.map(lambda x: x[11:] if x.startswith("intro_data:") else x)
col_names = col_names.map(lambda x: x[:11] + x[16:] if x.startswith("respondant_page:") else x)
col_names = col_names.map(lambda x: x[16:] if x.startswith("general_hh_data:") else x)
col_names = col_names.map(lambda x: x[6:] if x.startswith("males:") else x)
col_names = col_names.map(lambda x: x[8:] if x.startswith("females:") else x)
col_names = col_names.map(lambda x: x[11:] if x.startswith("importance:") else x)
col_names = col_names.map(lambda x: x[19:] if x.startswith("percentage_incomes:") else x)
col_names = col_names.map(lambda x: x[:10] + x[16:] if x.startswith("agri_gard:agri") else x)
col_names = col_names.map(lambda x: x[:10] + x[20:] if x.startswith("livestock:livestock_") else x)
col_names = col_names.map(lambda x: x[:10] + x[21:] if x.startswith("livestock:livestock") else x)
col_names = col_names.map(lambda x: x[:8] + x[17:] if x.startswith("fishing:fishing") else x)
col_names = col_names.map(lambda x: x[:7] + x[15:] if x.startswith("labour:labour") else x)
col_names = col_names.map(lambda x: x[:7] + x[14:] if x.startswith("labour:labour") else x)
col_names = col_names.map(lambda x: x[:12] + x[25:] if x.startswith("handicrafts:handicrafts") else x)
col_names = col_names.map(lambda x: x[:9] + x[19:] if x.startswith("forestry:forestry") else x)
col_names = col_names.map(lambda x: x[:6] + x[13:] if x.startswith("other:other") else x)
col_names = col_names.map(lambda x: x[7:] if x.startswith("coping:") else x)
col_names = col_names.map(lambda x: x[4:] if x.startswith(("cc1:", "cc2:", "cc3:", "ews:")) else x)
col_names = col_names.map(lambda x: x[10:] if x.startswith("renewable:") else x)
col_names = col_names.map(lambda x: x[12:] if x.startswith("agriculture_cc_effects:") else x)
col_names = col_names.map(lambda x: x[10:] if x.startswith("livestock_cc_effects:") else x)

In [22]:
# Replace ':' with '_' for compatibility reasons.
col_names = col_names.map(lambda x: x[:14] + '_' + x[15:] if x.startswith("geocoordinates:") else x)
col_names = col_names.map(lambda x: x[:9] + '_' + x[10:] if x.startswith("agri_gard:") else x)
col_names = col_names.map(lambda x: x[:9] + '_' + x[10:] if x.startswith("livestock:") else x)
col_names = col_names.map(lambda x: x[:7] + '_' + x[8:] if x.startswith("fishing:") else x)
col_names = col_names.map(lambda x: x[:6] + '_' + x[7:] if x.startswith("labour:") else x)
col_names = col_names.map(lambda x: x[:11] + '_' + x[12:] if x.startswith("handicrafts:") else x)
col_names = col_names.map(lambda x: x[:8] + '_' + x[9:] if x.startswith("forestry:") else x)
col_names = col_names.map(lambda x: x[:5] + '_' + x[6:] if x.startswith("other:") else x)
col_names = col_names.map(lambda x: x[:10] + '_' + x[11:] if x.startswith("cc_effects:") else x)
col_names = col_names.map(lambda x: x[:4] + '_' + x[5:] if x.startswith("meta:") else x)

col_names = col_names.map(lambda x: 'livestock_lost_goat' if x == 'livestock_lost_goats' else x)

#print col_names.values

reg_survey.columns = col_names

In [23]:
# Processing NaN and void values

values = {
    'house_material': 'no_answer',
    'kitchen_type': 'no_answer',
    'src_incomes': 'none',
    'use_ews': 'no_answer',
    'renewable_knowledge': 'no_answer',
    'solar_energy': 'no_answer',
    'livestock_selling': 'no_answer',
    'livestock_social': 'no_answer',
    'livestock_milk': 'no_answer',
    'livestock_meat': 'no_answer',
    'livestock_savings': 'no_answer',
    'labour_participants': 'no_answer',
    'agri_gard_water_harv_farmlands': 0,
    'agri_gard_land_quantity': 0,
    'cc_effects_sand_affected_farmlands': 'no_answer'
}
#reg_survey = reg_survey.fillna(value=values, inplace=True)
reg_survey.fillna(value=values, inplace=True)

In [24]:
# Creating villages dict
villages = reg_survey['village'].unique()
villages_table = pd.read_excel("villages_kassala.xlsx")
# Villages dictionary
villages_dict = dict()
villages_dict['ar'] = villages_table.set_index('survey_code').to_dict()['Arabic name']
villages_dict['en'] = villages_table.set_index('survey_code').to_dict()['English transcription']
#print repr(villages_dict).decode('unicode_escape')
reg_survey['village'] = reg_survey.village.map(villages_dict['en'])

## General data

Number of Surveys

In [25]:
total_surveys = reg_survey['deviceid'].count()
print "Total:" + str(total_surveys)
print ""
surveys_village = reg_survey.groupby('village').size()
print surveys_village

Total:399

village
Bahajeil                 22
Eirhagayeet              26
Gabrayeet                12
Gadamayeeb Adkihinab     22
Gadamayeeb Algadima      35
Hamtiyay Abrhemab         1
Khashum Tahday           29
Maman Kameragadap        37
Maman belimiklab         13
Pelait                   29
Takrouf                  30
Talkouk Tamay            63
Telkouk Ilatoit          55
lwayeep                  24
dtype: int64


In [26]:
education = reg_survey.groupby('education').size()

fig_education = {
    "layout": {
        "title": "Education level of the head of household"
    },
    "data": [
        {

            "values": education.values,
            "labels": education.index.values,
            "type": "pie",
            "sort": False
        }
    ]
}

iplot(fig_education)

In [27]:
#hey = reg_survey.groupby(['village', 'house_material']).size()

#house_materials_df = pd.DataFrame(index=house_materials_index, columns=villages, data=[house_materials_general])
#print house_materials_df

house_materials_df = pd.crosstab(reg_survey.village, reg_survey.house_material)
#display(house_materials_df)
#HTML(house_materials_df.to_html())

fig_house_materials = {
    "layout": {
        "title": "House materials"
    },
    "data": [
        {

            "values": house_materials_df.sum().values,
            "labels": house_materials_df.columns.values,
            "type": "pie",
            "sort": False
        }
    ]
}

fig_house_materials2 = {
    "layout": {
        "title": {
            "font": {
                    "size": 20
            },
            "text": "House materials"
        }
    },
    "data": []
}
#print range(len(villages_dict['en'].keys())-1)
#print house_materials_df
for i in range(len(villages_dict['en'].keys())):
    x = .2*(i%5)
    y = 0.35*(i/5)
    fig_house_materials2['data'].append(
        {

            "values": house_materials_df.iloc[i-1].values,
            "labels": house_materials_df.columns.values,
             "domain": {
                "x": [x, x+.18],
                 "y": [y, y+.3]
            },
            "type": "pie",
            "sort": False
        })

iplot(fig_house_materials)
iplot(fig_house_materials2)

###  Mean Household size

In [28]:
mean_hh_size = reg_survey.family_total.mean()
print str(mean_hh_size) + " persons per household"

5.7890625 persons per household


## Climate change awareness

In [29]:
cc_knowledge = reg_survey.groupby('cc_knowledge').size()
taking_action_cc = reg_survey.groupby('taking_action_cc').size()
fig_cc_knowledge = {
    "layout": {
        "title": {
            "text": "Households declaring CC knowledge"
        },
        "annotations": [
            {
                "font": {
                    "size": 18
                },
                "showarrow": False,
                "text": "Households declaring CC knowledge",
                "x": 0.03,
                "y": 1.1
            },
            {
                "font": {
                    "size": 18
                },
                "showarrow": False,
                "text": "Households taking actions against CC",
                "x": 0.98,
                "y": 1.1
            }
        ]
    },
    "data": [
        {

            "values": cc_knowledge.values,
            "labels": cc_knowledge.index.values,
            "domain": {
                "x": [0, .48]
            },
            "type": "pie",
            "sort": False
        },
        {

            "values": taking_action_cc.values,
            "labels": taking_action_cc.index.values,
            "domain": {
                "x": [.52, 1]
            },
            "type": "pie",
            "sort": False
        },
    ]
}

iplot(fig_cc_knowledge)

#### EWS awareness

In [30]:
ews_knowledge = reg_survey.groupby('ews_knowledge').size()
ews_use = reg_survey.groupby('use_ews').size()
fig_ews = {
    "layout": {
        "title": {
            "text": "Households declaring EWS use"
        },
        "annotations": [
            {
                "font": {
                    "size": 18
                },
                "showarrow": False,
                "text": "Households declaring EWS knowledge",
                "x": 0.03,
                "y": 1.15
            },
            {
                "font": {
                    "size": 18
                },
                "showarrow": False,
                "text": "Households using EWS information",
                "x": 0.98,
                "y": 1.15
            }
        ]
    },
    "data": [
        {

            "values": ews_knowledge.values,
            "labels": ews_knowledge.index.values,
            "domain": {
                "x": [0, .48]
            },
            "type": "pie",
            "sort": False
        },
        {

            "values": ews_use.values,
            "labels": ews_use.index.values,
            "domain": {
                "x": [.52, 1]
            },
            "type": "pie",
            "sort": False
        },
    ]
}

iplot(fig_ews)

#### Renewable energy awareness

In [31]:
renewable_knowledge = reg_survey.groupby('renewable_knowledge').size()
solar_energy = reg_survey.groupby('solar_energy').size()
fig_renewable_knowledge = {
    "layout": {
        "title": {
            "text": "Households declaring CC knowledge"
        },
        "annotations": [
            {
                "font": {
                    "size": 15
                },
                "showarrow": False,
                "text": "Households declaring renewable energies knowledge",
                "x": -0.1,
                "y": 1.15
            },
            {
                "font": {
                    "size": 15
                },
                "showarrow": False,
                "text": "Households using renewable energies",
                "x": 1,
                "y": 1.15
            }
        ]
    },
    "data": [
        {

            "values": renewable_knowledge.values,
            "labels": renewable_knowledge.index.values,
            "domain": {
                "x": [0, .48]
            },
            "type": "pie",
            "sort": False
        },
        {
            "values": solar_energy.values,
            "labels": solar_energy.index.values,
            "domain": {
                "x": [.52, 1]
            },
            "type": "pie",
            "sort": False
        },
    ]
}

iplot(fig_renewable_knowledge)

#### Cooking facilities

In [32]:
#kitchen_type = reg_survey.groupby('kitchen_type').size()
kitchen_type = get_ocurrences(reg_survey, 'kitchen_type').sort_values(ascending = False)
kitchen = pd.crosstab(reg_survey.village, reg_survey.kitchen_type)
#display(kitchen)

kitchen_perc = 100*kitchen_type/total_surveys
kitchen_perc = kitchen_perc.round(2)
print kitchen_perc

fig_kitchen_type = {
    "layout": {
        "title": "Cooking techniques",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), kitchen_perc.index.values),
            "y": kitchen_perc.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(225,225,258)',
                "line": {
                    "color": 'rgb(0,0,192)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(kitchen_type)):
    fig_kitchen_type["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), kitchen_perc.index.values)[i],
            "y": kitchen_perc.values[i] + 1.5,
            "text": str(kitchen_perc.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(0, 0, 100, 1)'
            ),
            "showarrow": False,

        }
    )


iplot(fig_kitchen_type)

wood          63.16
iron_stove    30.58
other         15.04
no_answer      6.52
charcoal       0.50
improved       0.25
dtype: float64


## Livelihoods

Number of households practicing each livelihood

In [33]:
#reg_survey.loc[reg_survey['src_incomes'].str.contains("agriculture"), 'agriculture'] = 'yes'
#reg_survey.loc[reg_survey['src_incomes'].str.contains("livestock"), 'livestock'] = 'yes'
#reg_survey.loc[reg_survey['src_incomes'].str.contains("labour"), 'labour'] = 'yes'
#importance = reg_survey.groupby(['agriculture', 'livestock', 'labour'])['perc_incomes_agri'].agg('mean')

#agric = reg_survey[reg_survey['src_incomes'].str.contains("agriculture")]['deviceid'].count()

src_incomes_sums = reg_survey.groupby('src_incomes').size()
lh_values = get_ocurrences(reg_survey, 'src_incomes')
lh_types = pd.DataFrame({"Households": lh_values})
#lh_types["Percentage"] = lh_types.Households.map(lambda x: str(round(100*x/total_surveys))) + " %"

print ""
print lh_values.sort_values(ascending = False)


livestock      234
forestry       130
other           43
handicrafts     30
labour          19
agriculture     19
none            15
dtype: int64


In [34]:
lh_perc = 100*lh_values/total_surveys
lh_perc = lh_perc.round(2)
livelihoods_strategies = {
    "layout": {
        "title": "Livelihoods Strategies",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lh_perc.index.values),
            "y": lh_perc.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(lh_values)):
    livelihoods_strategies["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lh_perc.index.values)[i],
            "y": lh_perc.values[i] + 1.5,
            "text": str(lh_perc.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(livelihoods_strategies)

Main livelihoods per household

In [35]:
print src_incomes_sums.sort_values(ascending = False)

src_incomes
livestock                         162
forestry                           90
livestock forestry                 25
other                              24
livestock handicrafts              18
labour                             16
none                               15
livestock other                    11
agriculture                         9
agriculture livestock               6
livestock forestry handicrafts      5
forestry handicrafts                3
handicrafts                         2
livestock forestry other            2
livestock labour                    2
forestry other                      2
handicrafts other                   1
labour forestry                     1
agriculture other                   1
agriculture livestock other         1
agriculture livestock forestry      1
livestock handicrafts other         1
agriculture forestry                1
dtype: int64


- 277 out of 399 hh, i.e. 69.42%, lives only on livestock, forestry or both.

### Livestock

Number of interviewed households owning each type of livestock

In [36]:
livestock_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("livestock")]

participants = livestock_set.groupby('livestock_participants').size()
participants_goats = livestock_set.loc[livestock_set['livestock_type'].str.contains("goat")].groupby('livestock_participants').size()
#print participants_goats

lvstk_types = ['camel', 'cattle', 'goat', 'sheep', 'chicken']

types_lvstck = get_ocurrences(livestock_set, 'livestock_type')
livestock_women = livestock_set.loc[livestock_set['livestock_participants'] == "women"]
type_women =  livestock_women.groupby('livestock_type').size()
#livestock_set = livestock_set.copy()
for i in lvstk_types:
    livestock_set[i + '_reduced'] = livestock_set['livestock_lost_' + i] / (livestock_set['livestock_number_' + i] + livestock_set['livestock_number_' + i])
#print ("Types for women")
#print(type_women)
#print livestock_set.groupby('camel_reduced').size()
goats = livestock_set['livestock_number_goat']

sheeps = livestock_set['livestock_number_sheep'].dropna()
camels = livestock_set['livestock_number_camel'].dropna()
chickens = livestock_set['livestock_number_chicken'].dropna()

livestock_set.groupby('livestock_training').size()



livestock_training
no     103
yes    131
dtype: int64

In [37]:
perc_lvstck = 100*types_lvstck/livestock_set['deviceid'].count()
perc_lvstck = perc_lvstck.round(2)
fig_types_lvstck = {
    "layout": {
        "title": "Livestock types",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values),
            "y": perc_lvstck.values,
            "type": "bar",
            "name": "Livelihoods Strategies",
            "marker": {
                "color": 'rgb(258,225,225)',
                "line": {
                    "color": 'rgb(192,0,0)',
                    "width": 1.5
                }
            }
        }
    ]
}


for i in range(0, len(types_lvstck)):
    fig_types_lvstck["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), perc_lvstck.index.values)[i],
            "y": perc_lvstck.values[i] + 1.5,
            "text": str(perc_lvstck.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(100, 0, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(fig_types_lvstck)

In [38]:
prueba = pd.crosstab(livestock_set.livestock_number_goat, livestock_set.livestock_number_sheep).stack().reset_index(name='number')
#prueba.plot.scatter('livestock_number_goats', 'livestock_number_sheep', s=prueba.number * 10)
prueba = prueba[prueba.number != 0]
prueba = prueba[prueba.livestock_number_goat <= 100]
prueba = prueba[prueba.livestock_number_sheep <= 100]
#display(prueba)
goats_vs_sheeps = {
    "layout": {
        "title": "Relation between the number of goats and sheeps"
    },
    "data": [
        {

            "x": prueba.livestock_number_goat,
            "y": prueba.livestock_number_sheep,
            "mode": "markers",
            "marker": {
                "size": prueba.number * 5               
            },
            "type": "scatter"
        }
    ]
}
iplot(goats_vs_sheeps)

With a few exceptions, the number of goats per household is proportional to the number of sheeps. So to define wealth groups for livestock tenants the number of goats will be the main variable.

In [39]:
quantiles = [.35, .97, 1]
q = goats.quantile(quantiles)
print "Quantiles:"
print q
goats1 = goats[goats < q[quantiles[0]]].dropna()
goats2 = goats[goats.between(q[quantiles[0]], q[quantiles[1]])].dropna()
goats3 = goats[goats.between(q[quantiles[1]], q[quantiles[2]])].dropna()
fig = ff.create_distplot([goats1.values, goats2.values, goats3.values], ['poor', 'medium', 'rich'])
iplot (fig)

Quantiles:
0.35      9.0
0.97     35.0
1.00    110.0
Name: livestock_number_goat, dtype: float64


#### Quantiles for goats
- Poors: 35% of households have less than 10 goats and less than 10 sheeps
- Medium: 62% of households have between 10 and 35 goats and between 20 and 30 sheeps
- Richs: 3% of households have more than 35 goats and/or more than 30 sheeps

In [40]:
livestock_uses = ["selling", "social", "milk", "meat", "savings"]
goats_uses = ["selling", "social", "milk", "meat", "savings"]
sheeps_uses = ["selling", "social", "milk", "meat", "savings"]
livestock_uses_df = livestock_set[map(lambda x: "livestock_" + x, livestock_uses)]
livestock_uses_df.fillna('no')
print "USES TOTAL LIVESTOCK"
print livestock_uses_df.apply(pd.Series.value_counts)
goats_uses = livestock_set.loc[livestock_set['livestock_type'].str.contains("goat")][map(lambda x: "livestock_" + x, goats_uses)]
print ""
print "USES TOTAL GOATS"
print goats_uses.apply(pd.Series.value_counts)
sheeps_uses = livestock_set.loc[livestock_set['livestock_type'].str.contains("sheep")][map(lambda x: "livestock_" + x, sheeps_uses)]
print ""
print "USES TOTAL SHEEPS"
print sheeps_uses.apply(pd.Series.value_counts)

USES TOTAL LIVESTOCK
           livestock_selling  livestock_social  livestock_milk  \
no                      79.0                76               4   
no_answer                NaN                 2               3   
yes                    155.0               156             227   

           livestock_meat  livestock_savings  
no                     61                 32  
no_answer               3                  7  
yes                   170                195  

USES TOTAL GOATS
           livestock_selling  livestock_social  livestock_milk  \
no                      74.0                72               3   
no_answer                NaN                 2               3   
yes                    144.0               144             212   

           livestock_meat  livestock_savings  
no                     54                 30  
no_answer               3                  7  
yes                   161                181  

USES TOTAL SHEEPS
           livestock_selling  livest

### Laborers

In [41]:
labour_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("labour")]
print get_ocurrences(labour_set, 'labour_sector')
labour_participants =  labour_set.groupby('labour_participants').size()
labour_set.loc[labour_set['labour_participants'].str.contains('women')][['labour_participants', 'respondant_gender', 'respondant_age', 'respondant_head_hh', 'male_teenager', 'male_adult', 'male_elderly', 'labour_sector']]
labour_incomes = labour_set.groupby('labour_daily_salary').size()
mean_salary = labour_set['labour_daily_salary'].mean()
labour_incomes_others = labour_set.loc[labour_set['labour_sector'] == 'other'].groupby('labour_daily_salary').size()
sectors_100 = labour_set.loc[labour_set['labour_daily_salary'] == 100].groupby('labour_sector').size()
labor_term = labour_set.groupby('labour_work_term').size()
print labor_term

other    17
dtype: int64
labour_work_term
permanent    9
temporary    7
dtype: int64


### Agriculture

In [42]:
agri_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("agriculture")]

# Inconsistencies
# print agri_set.loc[agri_set.agri_gard_water_harv_farmlands > agri_set.agri_gard_land_quantity]['agri_gard_water_harv_farmlands']
# print agri_set.loc[agri_set.agri_gard_water_harv_farmlands > agri_set.agri_gard_land_quantity]['agri_gard_land_quantity']

agri_set['perc_rainfed_farmlands'] = agri_set['agri_gard_water_harv_farmlands'] / agri_set['agri_gard_land_quantity']

# Estimation to correct the inconsistencies: All feddan have water harvesting access
wh_feddans = agri_set['agri_gard_water_harv_farmlands'].sum() - 257

print wh_feddans / agri_set['deviceid'].count()
print wh_feddans / total_surveys

print agri_set.groupby('perc_rainfed_farmlands').size()
print agri_set.groupby('agri_gard_farm_tech').size()
print agri_set.groupby('agri_gard_water_harv_farmlands').size()
all_farm_rainfed = agri_set.loc[agri_set['perc_rainfed_farmlands'] == 1].groupby('agri_gard_land_quantity').size()
print agri_set.groupby('agri_gard_land_quantity').size()
agri_set['agri_gard_water_harv_farmlands'].sum()
agri_set['agri_gard_land_quantity'].sum()
print agri_set.groupby('agri_gard_land_property').size()
print agri_set.groupby('cc_effects_sand_affected_farmlands').size()

agri_set.groupby('agri_gard_training').size()

2.42105263158
0.115288220551
perc_rainfed_farmlands
0.000000     1
0.600000     2
0.666667     1
0.714286     1
0.800000     2
1.000000     6
2.500000     1
25.000000    1
inf          3
dtype: int64
agri_gard_farm_tech
traditional    19
dtype: int64
agri_gard_water_harv_farmlands
0.0      2
1.0      1
2.0      2
3.0      4
4.0      4
5.0      4
50.0     1
200.0    1
dtype: int64
agri_gard_land_quantity
0.0    4
2.0    4
3.0    2
4.0    2
5.0    5
6.0    1
7.0    1
dtype: int64
agri_gard_land_property
community    6
none         3
own          8
rent         1
dtype: int64
cc_effects_sand_affected_farmlands
0.0          4
1.0          5
2.0          7
3.0          2
no_answer    1
dtype: int64


agri_gard_training
no      7
yes    12
dtype: int64

### Forestry (charcoal production)

In [43]:
forestry_set = reg_survey.loc[reg_survey['src_incomes'].str.contains("forestry")]
print forestry_set.groupby('forestry_forestry_type').size()
print get_ocurrences(forestry_set, 'forestry_forestry_type')
forestry_participants =  forestry_set.groupby('forestry_participants').size()
#forestry_set.loc[forestry_set['forestry_participants'].str.contains('women')][['forestry_participants', 'respondant_gender', 'respondant_age', 'respondant_head_hh', 'male_teenager', 'male_adult', 'male_elderly', 'labour_sector']]
forestry_land_property = forestry_set.groupby('forestry_land_property').size()

print forestry_participants
print forestry_land_property

forestry_forestry_type
charcoal              3
charcoal fuelwood    95
fuelwood             28
fuelwood other        4
dtype: int64
charcoal     98
fuelwood    127
other         4
dtype: int64
forestry_participants
men             110
men children      9
men women        10
dtype: int64
forestry_land_property
community             60
none                  58
none own community     1
own                    4
own community          2
shared                 5
dtype: int64


## Climate change effects

In [44]:
cc_effects_on_rain = get_ocurrences(reg_survey, 'changed_rainy_season')

print cc_effects_on_rain

decreased         327
season_changed    176
unsteady          106
increased          53
no_changes         14
unpredictable       3
dtype: int64


In [45]:
sand_mov_sums = reg_survey.groupby('sand_movements').size()
wind_changes = get_ocurrences(reg_survey, 'sand_movements')

print wind_changes
print sand_mov_sums

less_windy        59
more_windy       257
no_changes        45
unpredictable     27
dtype: int64
sand_movements
less_windy                   55
more_windy                  254
more_windy less_windy         2
no_changes                   45
unpredictable                24
unpredictable less_windy      2
unpredictable more_windy      1
dtype: int64


In [46]:
temps_changes = get_ocurrences(reg_survey, 'temps_change')

print temps_changes

decreased         53
unsteady          71
unpredictable      6
increased        309
no_changes         1
dtype: int64


In [47]:
effects_on_lvstck = get_ocurrences(reg_survey, 'cc_effects_grassland_availability').sort_values(ascending = False)
lvstck_perc = 100*effects_on_lvstck/livestock_set['deviceid'].size
lvstck_perc = lvstck_perc.round(2)
fig_effects_on_lvstck = {
    "layout": {
        "title": "Trend of the availability of grassland in the last 5 years",
        "xaxis": {
            "tickangle": -45
        },
        "width": 700,
        "annotations": []
    },
    "data": [
        {
            "x": map(lambda x: x.capitalize(), lvstck_perc.index.values),
            "y": lvstck_perc.values,
            "type": "bar",
            "marker": {
                "color": 'rgb(25,158,25,0.6)',
                "line": {
                    "color": 'rgb(0,70,0)',
                    "width": 1.5
                }
            },
        }
    ]
}


for i in range(0, len(effects_on_lvstck)):
    fig_effects_on_lvstck["layout"]["annotations"].append(
        {
            "x": map(lambda x: x.capitalize(), lvstck_perc.index.values)[i],
            "y": lvstck_perc.values[i] + 1.7,
            "text": str(lvstck_perc.values[i]) + "%",
            "font": dict(
                family='Arial',
                size=14,
                color='rgba(0, 100, 0, 1)'
            ),
            "showarrow": False,

        }
    )

iplot(fig_effects_on_lvstck)
